SIMD implementation of vdev_raidz generate and reconstruct routines
This is a new implementation of RAIDZ1/2/3 routines using x86_64 scalar, SSE, and AVX2 instruction sets. Included are 3 parity generation routines (P, PQ, and PQR) and 7 reconstruction routines, for all RAIDZ level. On module load, a quick benchmark of supported routines will select the fastest for each operation and they will be used at runtime. Original implementation is still present and can be selected via module parameter. Patch contains: - specialized gen/rec routines for all RAIDZ levels, - new scalar raidz implementation (unrolled), - two x86_64 SIMD implementations (SSE and AVX2 instructions sets), - fastest routines selected on module load (benchmark). - cmd/raidz_test - verify and benchmark all implementations - added raidz_test to the ZFS Test Suite New zfs module parameters: - zfs_vdev_raidz_impl (str): selects the implementation to use. On module load, the parameter will only accept first 3 options, and the other implementations can be set once module is finished loading. Possible values for this option are: "fastest" - use the fastest math available "original" - use the original raidz code "scalar" - new scalar impl "sse" - new SSE impl if available "avx2" - new AVX2 impl if available See contents of `/sys/module/zfs/parameters/zfs_vdev_raidz_impl` to get the list of supported values. If an implementation is not supported on the system, it will not be shown. Currently selected option is enclosed in `[]`. Signed-off-by: Gvozden Neskovic <neskovic@gmail.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #4328
This commit is contained in:
parent
09fb30e5e9
commit
ab9f4b0b82
|
@ -1,3 +1,3 @@
|
|||
SUBDIRS = zfs zpool zdb zhack zinject zstreamdump ztest zpios
|
||||
SUBDIRS += mount_zfs fsck_zfs zvol_id vdev_id arcstat dbufstat zed
|
||||
SUBDIRS += arc_summary
|
||||
SUBDIRS += arc_summary raidz_test
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
/raidz_test
|
|
@ -0,0 +1,21 @@
|
|||
include $(top_srcdir)/config/Rules.am
|
||||
|
||||
AM_CFLAGS += $(DEBUG_STACKFLAGS) $(FRAME_LARGER_THAN)
|
||||
AM_CPPFLAGS += -DDEBUG
|
||||
|
||||
DEFAULT_INCLUDES += \
|
||||
-I$(top_srcdir)/include \
|
||||
-I$(top_srcdir)/lib/libspl/include
|
||||
|
||||
bin_PROGRAMS = raidz_test
|
||||
|
||||
raidz_test_SOURCES = \
|
||||
raidz_test.h \
|
||||
raidz_test.c \
|
||||
raidz_bench.c
|
||||
|
||||
raidz_test_LDADD = \
|
||||
$(top_builddir)/lib/libuutil/libuutil.la \
|
||||
$(top_builddir)/lib/libzpool/libzpool.la
|
||||
|
||||
raidz_test_LDADD += -lm -ldl
|
|
@ -0,0 +1,241 @@
|
|||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2016 Gvozden Nešković. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/vdev_raidz.h>
|
||||
#include <sys/vdev_raidz_impl.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
|
||||
#include "raidz_test.h"
|
||||
|
||||
#define GEN_BENCH_MEMORY (((uint64_t)1ULL)<<32)
|
||||
#define REC_BENCH_MEMORY (((uint64_t)1ULL)<<29)
|
||||
#define BENCH_ASHIFT 12
|
||||
#define MIN_CS_SHIFT BENCH_ASHIFT
|
||||
#define MAX_CS_SHIFT SPA_MAXBLOCKSHIFT
|
||||
|
||||
|
||||
static zio_t zio_bench;
|
||||
static raidz_map_t *rm_bench;
|
||||
static size_t max_data_size = SPA_MAXBLOCKSIZE;
|
||||
|
||||
static void
|
||||
bench_init_raidz_map(void)
|
||||
{
|
||||
zio_bench.io_offset = 0;
|
||||
zio_bench.io_size = max_data_size;
|
||||
|
||||
/*
|
||||
* To permit larger column sizes these have to be done
|
||||
* allocated using aligned alloc instead of zio_data_buf_alloc
|
||||
*/
|
||||
zio_bench.io_data = raidz_alloc(max_data_size);
|
||||
|
||||
init_zio_data(&zio_bench);
|
||||
}
|
||||
|
||||
static void
|
||||
bench_fini_raidz_maps(void)
|
||||
{
|
||||
/* tear down golden zio */
|
||||
raidz_free(zio_bench.io_data, max_data_size);
|
||||
bzero(&zio_bench, sizeof (zio_t));
|
||||
}
|
||||
|
||||
static double
|
||||
get_time_diff(struct rusage *start, struct rusage *stop)
|
||||
{
|
||||
return (((double)stop->ru_utime.tv_sec * (double)MICROSEC +
|
||||
(double)stop->ru_utime.tv_usec) -
|
||||
((double)start->ru_utime.tv_sec * (double)MICROSEC +
|
||||
(double)start->ru_utime.tv_usec)) / (double)MICROSEC;
|
||||
}
|
||||
|
||||
static inline void
|
||||
run_gen_bench_impl(const char *impl)
|
||||
{
|
||||
int fn, ncols;
|
||||
uint64_t ds, iter_cnt, iter, disksize;
|
||||
struct rusage start, stop;
|
||||
double elapsed, d_bw;
|
||||
|
||||
/* Benchmark generate functions */
|
||||
for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
|
||||
|
||||
for (ds = MIN_CS_SHIFT; ds <= MAX_CS_SHIFT; ds++) {
|
||||
|
||||
/* create suitable raidz_map */
|
||||
ncols = rto_opts.rto_dcols + fn + 1;
|
||||
zio_bench.io_size = 1ULL << ds;
|
||||
rm_bench = vdev_raidz_map_alloc(&zio_bench,
|
||||
BENCH_ASHIFT, ncols, fn+1);
|
||||
|
||||
/* estimate iteration count */
|
||||
iter_cnt = GEN_BENCH_MEMORY;
|
||||
iter_cnt /= zio_bench.io_size;
|
||||
|
||||
getrusage(RUSAGE_THREAD, &start);
|
||||
for (iter = 0; iter < iter_cnt; iter++)
|
||||
vdev_raidz_generate_parity(rm_bench);
|
||||
getrusage(RUSAGE_THREAD, &stop);
|
||||
|
||||
elapsed = get_time_diff(&start, &stop);
|
||||
disksize = (1ULL << ds) / rto_opts.rto_dcols;
|
||||
d_bw = (double)iter_cnt * (double)disksize;
|
||||
d_bw /= (1024.0 * 1024.0 * elapsed);
|
||||
|
||||
LOG(D_ALL, "%10s, %8s, %zu, %10llu, %lf, %lf, %u\n",
|
||||
impl,
|
||||
raidz_gen_name[fn],
|
||||
rto_opts.rto_dcols,
|
||||
(1ULL<<ds),
|
||||
d_bw,
|
||||
d_bw * (double)(ncols),
|
||||
(unsigned) iter_cnt);
|
||||
|
||||
vdev_raidz_map_free(rm_bench);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
run_gen_bench(void)
|
||||
{
|
||||
char **impl_name;
|
||||
|
||||
LOG(D_INFO, DBLSEP "\nBenchmarking parity generation...\n\n");
|
||||
LOG(D_ALL, "impl, math, dcols, iosize, disk_bw, total_bw, iter\n");
|
||||
|
||||
for (impl_name = (char **)raidz_impl_names; *impl_name != NULL;
|
||||
impl_name++) {
|
||||
|
||||
if (vdev_raidz_impl_set(*impl_name) != 0)
|
||||
continue;
|
||||
|
||||
run_gen_bench_impl(*impl_name);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
run_rec_bench_impl(const char *impl)
|
||||
{
|
||||
struct rusage start, stop;
|
||||
int fn, ncols, nbad;
|
||||
uint64_t ds, iter_cnt, iter, disksize;
|
||||
double elapsed, d_bw;
|
||||
static const int tgt[7][3] = {
|
||||
{1, 2, 3}, /* rec_p: bad QR & D[0] */
|
||||
{0, 2, 3}, /* rec_q: bad PR & D[0] */
|
||||
{0, 1, 3}, /* rec_r: bad PQ & D[0] */
|
||||
{2, 3, 4}, /* rec_pq: bad R & D[0][1] */
|
||||
{1, 3, 4}, /* rec_pr: bad Q & D[0][1] */
|
||||
{0, 3, 4}, /* rec_qr: bad P & D[0][1] */
|
||||
{3, 4, 5} /* rec_pqr: bad & D[0][1][2] */
|
||||
};
|
||||
|
||||
for (fn = 0; fn < RAIDZ_REC_NUM; fn++) {
|
||||
for (ds = MIN_CS_SHIFT; ds <= MAX_CS_SHIFT; ds++) {
|
||||
|
||||
/* create suitable raidz_map */
|
||||
ncols = rto_opts.rto_dcols + PARITY_PQR;
|
||||
zio_bench.io_size = 1ULL << ds;
|
||||
|
||||
/*
|
||||
* raidz block is too short to test
|
||||
* the requested method
|
||||
*/
|
||||
if (zio_bench.io_size / rto_opts.rto_dcols <
|
||||
(1ULL << BENCH_ASHIFT))
|
||||
continue;
|
||||
|
||||
rm_bench = vdev_raidz_map_alloc(&zio_bench,
|
||||
BENCH_ASHIFT, ncols, PARITY_PQR);
|
||||
|
||||
/* estimate iteration count */
|
||||
iter_cnt = (REC_BENCH_MEMORY);
|
||||
iter_cnt /= zio_bench.io_size;
|
||||
|
||||
/* calculate how many bad columns there are */
|
||||
nbad = MIN(3, raidz_ncols(rm_bench) -
|
||||
raidz_parity(rm_bench));
|
||||
|
||||
getrusage(RUSAGE_THREAD, &start);
|
||||
for (iter = 0; iter < iter_cnt; iter++)
|
||||
vdev_raidz_reconstruct(rm_bench, tgt[fn], nbad);
|
||||
getrusage(RUSAGE_THREAD, &stop);
|
||||
|
||||
elapsed = get_time_diff(&start, &stop);
|
||||
disksize = (1ULL << ds) / rto_opts.rto_dcols;
|
||||
d_bw = (double)iter_cnt * (double)(disksize);
|
||||
d_bw /= (1024.0 * 1024.0 * elapsed);
|
||||
|
||||
LOG(D_ALL, "%10s, %8s, %zu, %10llu, %lf, %lf, %u\n",
|
||||
impl,
|
||||
raidz_rec_name[fn],
|
||||
rto_opts.rto_dcols,
|
||||
(1ULL<<ds),
|
||||
d_bw,
|
||||
d_bw * (double)ncols,
|
||||
(unsigned) iter_cnt);
|
||||
|
||||
vdev_raidz_map_free(rm_bench);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
run_rec_bench(void)
|
||||
{
|
||||
char **impl_name;
|
||||
|
||||
LOG(D_INFO, DBLSEP "\nBenchmarking data reconstruction...\n\n");
|
||||
LOG(D_ALL, "impl, math, dcols, iosize, disk_bw, total_bw, iter\n");
|
||||
|
||||
for (impl_name = (char **)raidz_impl_names; *impl_name != NULL;
|
||||
impl_name++) {
|
||||
|
||||
if (vdev_raidz_impl_set(*impl_name) != 0)
|
||||
continue;
|
||||
|
||||
run_rec_bench_impl(*impl_name);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
run_raidz_benchmark(void)
|
||||
{
|
||||
bench_init_raidz_map();
|
||||
|
||||
run_gen_bench();
|
||||
run_rec_bench();
|
||||
|
||||
bench_fini_raidz_maps();
|
||||
}
|
|
@ -0,0 +1,770 @@
|
|||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2016 Gvozden Nešković. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/zio.h>
|
||||
#include <umem.h>
|
||||
#include <sys/vdev_raidz.h>
|
||||
#include <sys/vdev_raidz_impl.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include "raidz_test.h"
|
||||
|
||||
static int *rand_data;
|
||||
raidz_test_opts_t rto_opts;
|
||||
|
||||
static char gdb[256];
|
||||
static const char gdb_tmpl[] = "gdb -ex \"set pagination 0\" -p %d";
|
||||
|
||||
static void sig_handler(int signo)
|
||||
{
|
||||
struct sigaction action;
|
||||
/*
|
||||
* Restore default action and re-raise signal so SIGSEGV and
|
||||
* SIGABRT can trigger a core dump.
|
||||
*/
|
||||
action.sa_handler = SIG_DFL;
|
||||
sigemptyset(&action.sa_mask);
|
||||
action.sa_flags = 0;
|
||||
(void) sigaction(signo, &action, NULL);
|
||||
|
||||
if (rto_opts.rto_gdb)
|
||||
if (system(gdb));
|
||||
|
||||
raise(signo);
|
||||
}
|
||||
|
||||
static void print_opts(raidz_test_opts_t *opts, boolean_t force)
|
||||
{
|
||||
char *verbose;
|
||||
switch (opts->rto_v) {
|
||||
case 0:
|
||||
verbose = "no";
|
||||
break;
|
||||
case 1:
|
||||
verbose = "info";
|
||||
break;
|
||||
default:
|
||||
verbose = "debug";
|
||||
break;
|
||||
}
|
||||
|
||||
if (force || opts->rto_v >= D_INFO) {
|
||||
(void) fprintf(stdout, DBLSEP "Running with options:\n"
|
||||
" (-a) zio ashift : %zu\n"
|
||||
" (-o) zio offset : 1 << %zu\n"
|
||||
" (-d) number of raidz data columns : %zu\n"
|
||||
" (-s) size of DATA : 1 << %zu\n"
|
||||
" (-S) sweep parameters : %s \n"
|
||||
" (-v) verbose : %s \n\n",
|
||||
opts->rto_ashift, /* -a */
|
||||
ilog2(opts->rto_offset), /* -o */
|
||||
opts->rto_dcols, /* -d */
|
||||
ilog2(opts->rto_dsize), /* -s */
|
||||
opts->rto_sweep ? "yes" : "no", /* -S */
|
||||
verbose /* -v */
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
static void usage(boolean_t requested)
|
||||
{
|
||||
const raidz_test_opts_t *o = &rto_opts_defaults;
|
||||
|
||||
FILE *fp = requested ? stdout : stderr;
|
||||
|
||||
(void) fprintf(fp, "Usage:\n"
|
||||
"\t[-a zio ashift (default: %zu)]\n"
|
||||
"\t[-o zio offset, exponent radix 2 (default: %zu)]\n"
|
||||
"\t[-d number of raidz data columns (default: %zu)]\n"
|
||||
"\t[-s zio size, exponent radix 2 (default: %zu)]\n"
|
||||
"\t[-S parameter sweep (default: %s)]\n"
|
||||
"\t[-t timeout for parameter sweep test]\n"
|
||||
"\t[-B benchmark all raidz implementations]\n"
|
||||
"\t[-v increase verbosity (default: %zu)]\n"
|
||||
"\t[-h (print help)]\n"
|
||||
"\t[-T test the test, see if failure would be detected]\n"
|
||||
"\t[-D debug (attach gdb on SIGSEGV)]\n"
|
||||
"",
|
||||
o->rto_ashift, /* -a */
|
||||
ilog2(o->rto_offset), /* -o */
|
||||
o->rto_dcols, /* -d */
|
||||
ilog2(o->rto_dsize), /* -s */
|
||||
rto_opts.rto_sweep ? "yes" : "no", /* -S */
|
||||
o->rto_v /* -d */
|
||||
);
|
||||
|
||||
exit(requested ? 0 : 1);
|
||||
}
|
||||
|
||||
static void process_options(int argc, char **argv)
|
||||
{
|
||||
size_t value;
|
||||
int opt;
|
||||
|
||||
raidz_test_opts_t *o = &rto_opts;
|
||||
|
||||
bcopy(&rto_opts_defaults, o, sizeof (*o));
|
||||
|
||||
while ((opt = getopt(argc, argv, "TDBSvha:o:d:s:t:")) != -1) {
|
||||
value = 0;
|
||||
|
||||
switch (opt) {
|
||||
case 'a':
|
||||
value = strtoull(optarg, NULL, 0);
|
||||
o->rto_ashift = MIN(13, MAX(9, value));
|
||||
break;
|
||||
case 'o':
|
||||
value = strtoull(optarg, NULL, 0);
|
||||
o->rto_offset = ((1ULL << MIN(12, value)) >> 9) << 9;
|
||||
break;
|
||||
case 'd':
|
||||
value = strtoull(optarg, NULL, 0);
|
||||
o->rto_dcols = MIN(255, MAX(1, value));
|
||||
break;
|
||||
case 's':
|
||||
value = strtoull(optarg, NULL, 0);
|
||||
o->rto_dsize = 1ULL << MIN(SPA_MAXBLOCKSHIFT,
|
||||
MAX(SPA_MINBLOCKSHIFT, value));
|
||||
break;
|
||||
case 't':
|
||||
value = strtoull(optarg, NULL, 0);
|
||||
o->rto_sweep_timeout = value;
|
||||
break;
|
||||
case 'v':
|
||||
o->rto_v++;
|
||||
break;
|
||||
case 'S':
|
||||
o->rto_sweep = 1;
|
||||
break;
|
||||
case 'B':
|
||||
o->rto_benchmark = 1;
|
||||
break;
|
||||
case 'D':
|
||||
o->rto_gdb = 1;
|
||||
break;
|
||||
case 'T':
|
||||
o->rto_sanity = 1;
|
||||
break;
|
||||
case 'h':
|
||||
usage(B_TRUE);
|
||||
break;
|
||||
case '?':
|
||||
default:
|
||||
usage(B_FALSE);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define DATA_COL(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_data)
|
||||
#define DATA_COL_SIZE(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_size)
|
||||
|
||||
#define CODE_COL(rm, i) ((rm)->rm_col[(i)].rc_data)
|
||||
#define CODE_COL_SIZE(rm, i) ((rm)->rm_col[(i)].rc_size)
|
||||
|
||||
static int
|
||||
cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
|
||||
{
|
||||
int i, ret = 0;
|
||||
|
||||
VERIFY(parity >= 1 && parity <= 3);
|
||||
|
||||
for (i = 0; i < parity; i++) {
|
||||
if (0 != memcmp(CODE_COL(rm, i), CODE_COL(opts->rm_golden, i),
|
||||
CODE_COL_SIZE(rm, i))) {
|
||||
ret++;
|
||||
|
||||
LOG_OPT(D_DEBUG, opts,
|
||||
"\nParity block [%d] different!\n", i);
|
||||
}
|
||||
}
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static int
|
||||
cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
|
||||
{
|
||||
int i, ret = 0;
|
||||
int dcols = opts->rm_golden->rm_cols - raidz_parity(opts->rm_golden);
|
||||
|
||||
for (i = 0; i < dcols; i++) {
|
||||
if (0 != memcmp(DATA_COL(opts->rm_golden, i), DATA_COL(rm, i),
|
||||
DATA_COL_SIZE(opts->rm_golden, i))) {
|
||||
ret++;
|
||||
|
||||
LOG_OPT(D_DEBUG, opts,
|
||||
"\nData block [%d] different!\n", i);
|
||||
}
|
||||
}
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static void
|
||||
corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt)
|
||||
{
|
||||
int i;
|
||||
int *dst;
|
||||
raidz_col_t *col;
|
||||
|
||||
for (i = 0; i < cnt; i++) {
|
||||
col = &rm->rm_col[tgts[i]];
|
||||
dst = col->rc_data;
|
||||
for (i = 0; i < col->rc_size / sizeof (int); i++)
|
||||
dst[i] = rand();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
init_zio_data(zio_t *zio)
|
||||
{
|
||||
int i;
|
||||
int *dst = (int *) zio->io_data;
|
||||
|
||||
for (i = 0; i < zio->io_size / sizeof (int); i++) {
|
||||
dst[i] = rand_data[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fini_raidz_map(zio_t **zio, raidz_map_t **rm)
|
||||
{
|
||||
vdev_raidz_map_free(*rm);
|
||||
raidz_free((*zio)->io_data, (*zio)->io_size);
|
||||
umem_free(*zio, sizeof (zio_t));
|
||||
|
||||
*zio = NULL;
|
||||
*rm = NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
|
||||
{
|
||||
int err = 0;
|
||||
zio_t *zio_test;
|
||||
raidz_map_t *rm_test;
|
||||
const size_t total_ncols = opts->rto_dcols + parity;
|
||||
|
||||
if (opts->rm_golden) {
|
||||
fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
|
||||
}
|
||||
|
||||
opts->zio_golden = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
|
||||
zio_test = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
|
||||
|
||||
opts->zio_golden->io_offset = zio_test->io_offset = opts->rto_offset;
|
||||
opts->zio_golden->io_size = zio_test->io_size = opts->rto_dsize;
|
||||
|
||||
opts->zio_golden->io_data = raidz_alloc(opts->rto_dsize);
|
||||
zio_test->io_data = raidz_alloc(opts->rto_dsize);
|
||||
|
||||
init_zio_data(opts->zio_golden);
|
||||
init_zio_data(zio_test);
|
||||
|
||||
VERIFY0(vdev_raidz_impl_set("original"));
|
||||
|
||||
opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
|
||||
opts->rto_ashift, total_ncols, parity);
|
||||
rm_test = vdev_raidz_map_alloc(zio_test,
|
||||
opts->rto_ashift, total_ncols, parity);
|
||||
|
||||
VERIFY(opts->zio_golden);
|
||||
VERIFY(opts->rm_golden);
|
||||
|
||||
vdev_raidz_generate_parity(opts->rm_golden);
|
||||
vdev_raidz_generate_parity(rm_test);
|
||||
|
||||
/* sanity check */
|
||||
err |= cmp_data(opts, rm_test);
|
||||
err |= cmp_code(opts, rm_test, parity);
|
||||
|
||||
if (err)
|
||||
ERR("initializing the golden copy ... [FAIL]!\n");
|
||||
|
||||
/* tear down raidz_map of test zio */
|
||||
fini_raidz_map(&zio_test, &rm_test);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
static raidz_map_t *
|
||||
init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
|
||||
{
|
||||
raidz_map_t *rm = NULL;
|
||||
const size_t alloc_dsize = opts->rto_dsize;
|
||||
const size_t total_ncols = opts->rto_dcols + parity;
|
||||
const int ccols[] = { 0, 1, 2 };
|
||||
|
||||
VERIFY(zio);
|
||||
VERIFY(parity <= 3 && parity >= 1);
|
||||
|
||||
*zio = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
|
||||
|
||||
(*zio)->io_offset = 0;
|
||||
(*zio)->io_size = alloc_dsize;
|
||||
(*zio)->io_data = raidz_alloc(alloc_dsize);
|
||||
init_zio_data(*zio);
|
||||
|
||||
rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
|
||||
total_ncols, parity);
|
||||
VERIFY(rm);
|
||||
|
||||
/* Make sure code columns are destroyed */
|
||||
corrupt_colums(rm, ccols, parity);
|
||||
|
||||
return (rm);
|
||||
}
|
||||
|
||||
static int
|
||||
run_gen_check(raidz_test_opts_t *opts)
|
||||
{
|
||||
char **impl_name;
|
||||
int fn, err = 0;
|
||||
zio_t *zio_test;
|
||||
raidz_map_t *rm_test;
|
||||
|
||||
err = init_raidz_golden_map(opts, PARITY_PQR);
|
||||
if (0 != err)
|
||||
return (err);
|
||||
|
||||
LOG(D_INFO, DBLSEP);
|
||||
LOG(D_INFO, "Testing parity generation...\n");
|
||||
|
||||
for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
|
||||
impl_name++) {
|
||||
|
||||
LOG(D_INFO, SEP);
|
||||
LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
|
||||
|
||||
if (0 != vdev_raidz_impl_set(*impl_name)) {
|
||||
LOG(D_INFO, "[SKIP]\n");
|
||||
continue;
|
||||
} else {
|
||||
LOG(D_INFO, "[SUPPORTED]\n");
|
||||
}
|
||||
|
||||
for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
|
||||
|
||||
/* create suitable raidz_map */
|
||||
rm_test = init_raidz_map(opts, &zio_test, fn+1);
|
||||
VERIFY(rm_test);
|
||||
|
||||
LOG(D_INFO, "\t\tTesting method [%s] ...",
|
||||
raidz_gen_name[fn]);
|
||||
|
||||
if (!opts->rto_sanity)
|
||||
vdev_raidz_generate_parity(rm_test);
|
||||
|
||||
if (cmp_code(opts, rm_test, fn+1) != 0) {
|
||||
LOG(D_INFO, "[FAIL]\n");
|
||||
err++;
|
||||
} else
|
||||
LOG(D_INFO, "[PASS]\n");
|
||||
|
||||
fini_raidz_map(&zio_test, &rm_test);
|
||||
}
|
||||
}
|
||||
|
||||
fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
|
||||
{
|
||||
int x0, x1, x2;
|
||||
int tgtidx[3];
|
||||
int err = 0;
|
||||
static const int rec_tgts[7][3] = {
|
||||
{1, 2, 3}, /* rec_p: bad QR & D[0] */
|
||||
{0, 2, 3}, /* rec_q: bad PR & D[0] */
|
||||
{0, 1, 3}, /* rec_r: bad PQ & D[0] */
|
||||
{2, 3, 4}, /* rec_pq: bad R & D[0][1] */
|
||||
{1, 3, 4}, /* rec_pr: bad Q & D[0][1] */
|
||||
{0, 3, 4}, /* rec_qr: bad P & D[0][1] */
|
||||
{3, 4, 5} /* rec_pqr: bad & D[0][1][2] */
|
||||
};
|
||||
|
||||
memcpy(tgtidx, rec_tgts[fn], sizeof (tgtidx));
|
||||
|
||||
if (fn < RAIDZ_REC_PQ) {
|
||||
/* can reconstruct 1 failed data disk */
|
||||
for (x0 = 0; x0 < opts->rto_dcols; x0++) {
|
||||
if (x0 >= rm->rm_cols - raidz_parity(rm))
|
||||
continue;
|
||||
|
||||
LOG(D_DEBUG, "[%d] ", x0);
|
||||
|
||||
tgtidx[2] = x0 + raidz_parity(rm);
|
||||
|
||||
corrupt_colums(rm, tgtidx+2, 1);
|
||||
|
||||
if (!opts->rto_sanity)
|
||||
vdev_raidz_reconstruct(rm, tgtidx, 3);
|
||||
|
||||
if (cmp_data(opts, rm) != 0) {
|
||||
err++;
|
||||
LOG(D_DEBUG, "\nREC D[%d]... [FAIL]\n", x0);
|
||||
}
|
||||
}
|
||||
|
||||
} else if (fn < RAIDZ_REC_PQR) {
|
||||
/* can reconstruct 2 failed data disk */
|
||||
for (x0 = 0; x0 < opts->rto_dcols; x0++) {
|
||||
if (x0 >= rm->rm_cols - raidz_parity(rm))
|
||||
continue;
|
||||
for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
|
||||
if (x1 >= rm->rm_cols - raidz_parity(rm))
|
||||
continue;
|
||||
|
||||
LOG(D_DEBUG, "[%d %d] ", x0, x1);
|
||||
|
||||
tgtidx[1] = x0 + raidz_parity(rm);
|
||||
tgtidx[2] = x1 + raidz_parity(rm);
|
||||
|
||||
corrupt_colums(rm, tgtidx+1, 2);
|
||||
|
||||
if (!opts->rto_sanity)
|
||||
vdev_raidz_reconstruct(rm, tgtidx, 3);
|
||||
|
||||
if (cmp_data(opts, rm) != 0) {
|
||||
err++;
|
||||
LOG(D_DEBUG, "\nREC D[%d %d]... "
|
||||
"[FAIL]\n", x0, x1);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* can reconstruct 3 failed data disk */
|
||||
for (x0 = 0;
|
||||
x0 < opts->rto_dcols; x0++) {
|
||||
if (x0 >= rm->rm_cols - raidz_parity(rm))
|
||||
continue;
|
||||
for (x1 = x0 + 1;
|
||||
x1 < opts->rto_dcols; x1++) {
|
||||
if (x1 >= rm->rm_cols - raidz_parity(rm))
|
||||
continue;
|
||||
for (x2 = x1 + 1;
|
||||
x2 < opts->rto_dcols; x2++) {
|
||||
if (x2 >=
|
||||
rm->rm_cols - raidz_parity(rm))
|
||||
continue;
|
||||
|
||||
LOG(D_DEBUG, "[%d %d %d]", x0, x1, x2);
|
||||
|
||||
tgtidx[0] = x0 + raidz_parity(rm);
|
||||
tgtidx[1] = x1 + raidz_parity(rm);
|
||||
tgtidx[2] = x2 + raidz_parity(rm);
|
||||
|
||||
corrupt_colums(rm, tgtidx, 3);
|
||||
|
||||
if (!opts->rto_sanity)
|
||||
vdev_raidz_reconstruct(rm,
|
||||
tgtidx, 3);
|
||||
|
||||
if (cmp_data(opts, rm) != 0) {
|
||||
err++;
|
||||
LOG(D_DEBUG,
|
||||
"\nREC D[%d %d %d]... "
|
||||
"[FAIL]\n", x0, x1, x2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
run_rec_check(raidz_test_opts_t *opts)
|
||||
{
|
||||
char **impl_name;
|
||||
unsigned fn, err = 0;
|
||||
zio_t *zio_test;
|
||||
raidz_map_t *rm_test;
|
||||
|
||||
err = init_raidz_golden_map(opts, PARITY_PQR);
|
||||
if (0 != err)
|
||||
return (err);
|
||||
|
||||
LOG(D_INFO, DBLSEP);
|
||||
LOG(D_INFO, "Testing data reconstruction...\n");
|
||||
|
||||
for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
|
||||
impl_name++) {
|
||||
|
||||
LOG(D_INFO, SEP);
|
||||
LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
|
||||
|
||||
if (vdev_raidz_impl_set(*impl_name) != 0) {
|
||||
LOG(D_INFO, "[SKIP]\n");
|
||||
continue;
|
||||
} else
|
||||
LOG(D_INFO, "[SUPPORTED]\n");
|
||||
|
||||
|
||||
/* create suitable raidz_map */
|
||||
rm_test = init_raidz_map(opts, &zio_test, PARITY_PQR);
|
||||
/* generate parity */
|
||||
vdev_raidz_generate_parity(rm_test);
|
||||
|
||||
for (fn = 0; fn < RAIDZ_REC_NUM; fn++) {
|
||||
|
||||
LOG(D_INFO, "\t\tTesting method [%s] ...",
|
||||
raidz_rec_name[fn]);
|
||||
|
||||
if (run_rec_check_impl(opts, rm_test, fn) != 0) {
|
||||
LOG(D_INFO, "[FAIL]\n");
|
||||
err++;
|
||||
|
||||
} else
|
||||
LOG(D_INFO, "[PASS]\n");
|
||||
|
||||
}
|
||||
/* tear down test raidz_map */
|
||||
fini_raidz_map(&zio_test, &rm_test);
|
||||
}
|
||||
|
||||
fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
run_test(raidz_test_opts_t *opts)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (opts == NULL)
|
||||
opts = &rto_opts;
|
||||
|
||||
print_opts(opts, B_FALSE);
|
||||
|
||||
err |= run_gen_check(opts);
|
||||
err |= run_rec_check(opts);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
#define SWEEP_RUNNING 0
|
||||
#define SWEEP_FINISHED 1
|
||||
#define SWEEP_ERROR 2
|
||||
#define SWEEP_TIMEOUT 3
|
||||
|
||||
static int sweep_state = 0;
|
||||
static raidz_test_opts_t failed_opts;
|
||||
|
||||
static kmutex_t sem_mtx;
|
||||
static kcondvar_t sem_cv;
|
||||
static int max_free_slots;
|
||||
static int free_slots;
|
||||
|
||||
static void
|
||||
sweep_thread(void *arg)
|
||||
{
|
||||
int err = 0;
|
||||
raidz_test_opts_t *opts = (raidz_test_opts_t *) arg;
|
||||
VERIFY(opts != NULL);
|
||||
|
||||
err = run_test(opts);
|
||||
|
||||
if (rto_opts.rto_sanity) {
|
||||
/* 25% chance that a sweep test fails */
|
||||
if (rand() < (RAND_MAX/4))
|
||||
err = 1;
|
||||
}
|
||||
|
||||
if (0 != err) {
|
||||
mutex_enter(&sem_mtx);
|
||||
memcpy(&failed_opts, opts, sizeof (raidz_test_opts_t));
|
||||
sweep_state = SWEEP_ERROR;
|
||||
mutex_exit(&sem_mtx);
|
||||
}
|
||||
|
||||
umem_free(opts, sizeof (raidz_test_opts_t));
|
||||
|
||||
/* signal the next thread */
|
||||
mutex_enter(&sem_mtx);
|
||||
free_slots++;
|
||||
cv_signal(&sem_cv);
|
||||
mutex_exit(&sem_mtx);
|
||||
|
||||
thread_exit();
|
||||
}
|
||||
|
||||
static int
|
||||
run_sweep(void)
|
||||
{
|
||||
static const size_t dcols_v[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
|
||||
static const size_t ashift_v[] = { 9, 12 };
|
||||
static const size_t offset_cnt = 4;
|
||||
static const size_t size_v[] = { 1 << 9, 21 * (1 << 9), 13 * (1 << 12),
|
||||
1 << 17, (1 << 20) - (1 << 12), SPA_MAXBLOCKSIZE };
|
||||
|
||||
(void) setvbuf(stdout, NULL, _IONBF, 0);
|
||||
|
||||
ulong_t total_comb = ARRAY_SIZE(size_v) * ARRAY_SIZE(ashift_v) *
|
||||
ARRAY_SIZE(dcols_v) * offset_cnt;
|
||||
ulong_t tried_comb = 0;
|
||||
hrtime_t time_diff, start_time = gethrtime();
|
||||
raidz_test_opts_t *opts;
|
||||
int a, d, o, s;
|
||||
|
||||
max_free_slots = free_slots = MAX(2, boot_ncpus);
|
||||
|
||||
mutex_init(&sem_mtx, NULL, MUTEX_DEFAULT, NULL);
|
||||
cv_init(&sem_cv, NULL, CV_DEFAULT, NULL);
|
||||
|
||||
for (s = 0; s < ARRAY_SIZE(size_v); s++)
|
||||
for (a = 0; a < ARRAY_SIZE(ashift_v); a++)
|
||||
for (o = 0; o < offset_cnt; o++)
|
||||
for (d = 0; d < ARRAY_SIZE(dcols_v); d++) {
|
||||
|
||||
if ((size_v[s] < (1 << ashift_v[a]) * o) ||
|
||||
(size_v[s] < (1 << ashift_v[a]) * dcols_v[d])) {
|
||||
total_comb--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (++tried_comb % 20 == 0)
|
||||
LOG(D_ALL, "%lu/%lu... ", tried_comb, total_comb);
|
||||
|
||||
/* wait for signal to start new thread */
|
||||
mutex_enter(&sem_mtx);
|
||||
while (cv_timedwait_sig(&sem_cv, &sem_mtx,
|
||||
ddi_get_lbolt() + hz)) {
|
||||
|
||||
/* check if should stop the test (timeout) */
|
||||
time_diff = (gethrtime() - start_time) / NANOSEC;
|
||||
if (rto_opts.rto_sweep_timeout > 0 &&
|
||||
time_diff >= rto_opts.rto_sweep_timeout) {
|
||||
sweep_state = SWEEP_TIMEOUT;
|
||||
mutex_exit(&sem_mtx);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* check if should stop the test (error) */
|
||||
if (sweep_state != SWEEP_RUNNING) {
|
||||
mutex_exit(&sem_mtx);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* exit loop if a slot is available */
|
||||
if (free_slots > 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
free_slots--;
|
||||
mutex_exit(&sem_mtx);
|
||||
|
||||
opts = umem_zalloc(sizeof (raidz_test_opts_t), UMEM_NOFAIL);
|
||||
opts->rto_ashift = ashift_v[a];
|
||||
opts->rto_dcols = dcols_v[d];
|
||||
opts->rto_offset = (1 << ashift_v[a]) * o;
|
||||
opts->rto_dsize = size_v[s];
|
||||
opts->rto_v = 0; /* be quiet */
|
||||
|
||||
VERIFY3P(zk_thread_create(NULL, 0,
|
||||
(thread_func_t) sweep_thread,
|
||||
(void *) opts, TS_RUN, NULL, 0, 0,
|
||||
PTHREAD_CREATE_JOINABLE), !=, NULL);
|
||||
}
|
||||
|
||||
exit:
|
||||
LOG(D_ALL, "\nWaiting for test threads to finish...\n");
|
||||
mutex_enter(&sem_mtx);
|
||||
VERIFY(free_slots <= max_free_slots);
|
||||
while (free_slots < max_free_slots) {
|
||||
(void) cv_wait(&sem_cv, &sem_mtx);
|
||||
}
|
||||
mutex_exit(&sem_mtx);
|
||||
|
||||
if (sweep_state == SWEEP_ERROR) {
|
||||
ERR("Sweep test failed! Failed option: \n");
|
||||
print_opts(&failed_opts, B_TRUE);
|
||||
} else {
|
||||
if (sweep_state == SWEEP_TIMEOUT)
|
||||
LOG(D_ALL, "Test timeout (%lus). Stopping...\n",
|
||||
(ulong_t)rto_opts.rto_sweep_timeout);
|
||||
|
||||
LOG(D_ALL, "Sweep test succeeded on %lu raidz maps!\n",
|
||||
(ulong_t)tried_comb);
|
||||
}
|
||||
|
||||
return (sweep_state == SWEEP_ERROR ? SWEEP_ERROR : 0);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
size_t i;
|
||||
struct sigaction action;
|
||||
int err = 0;
|
||||
|
||||
/* init gdb string early */
|
||||
(void) sprintf(gdb, gdb_tmpl, getpid());
|
||||
|
||||
action.sa_handler = sig_handler;
|
||||
sigemptyset(&action.sa_mask);
|
||||
action.sa_flags = 0;
|
||||
|
||||
if (sigaction(SIGSEGV, &action, NULL) < 0) {
|
||||
ERR("raidz_test: cannot catch SIGSEGV: %s.\n", strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
(void) setvbuf(stdout, NULL, _IOLBF, 0);
|
||||
|
||||
dprintf_setup(&argc, argv);
|
||||
|
||||
process_options(argc, argv);
|
||||
|
||||
kernel_init(FREAD);
|
||||
|
||||
/* setup random data because rand() is not reentrant */
|
||||
rand_data = (int *) umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
|
||||
srand((unsigned)time(NULL) * getpid());
|
||||
for (i = 0; i < SPA_MAXBLOCKSIZE / sizeof (int); i++)
|
||||
rand_data[i] = rand();
|
||||
|
||||
mprotect(rand_data, SPA_MAXBLOCKSIZE, PROT_READ);
|
||||
|
||||
if (rto_opts.rto_benchmark) {
|
||||
run_raidz_benchmark();
|
||||
} else if (rto_opts.rto_sweep) {
|
||||
err = run_sweep();
|
||||
} else {
|
||||
err = run_test(NULL);
|
||||
}
|
||||
|
||||
umem_free(rand_data, SPA_MAXBLOCKSIZE);
|
||||
kernel_fini();
|
||||
|
||||
return (err);
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2016 Gvozden Nešković. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef RAIDZ_TEST_H
|
||||
#define RAIDZ_TEST_H
|
||||
|
||||
#include <sys/spa.h>
|
||||
|
||||
static const char *raidz_impl_names[] = {
|
||||
"original",
|
||||
"scalar",
|
||||
"sse",
|
||||
"avx2",
|
||||
NULL
|
||||
};
|
||||
|
||||
typedef struct raidz_test_opts {
|
||||
size_t rto_ashift;
|
||||
size_t rto_offset;
|
||||
size_t rto_dcols;
|
||||
size_t rto_dsize;
|
||||
size_t rto_v;
|
||||
size_t rto_sweep;
|
||||
size_t rto_sweep_timeout;
|
||||
size_t rto_benchmark;
|
||||
size_t rto_sanity;
|
||||
size_t rto_gdb;
|
||||
|
||||
zio_t *zio_golden;
|
||||
raidz_map_t *rm_golden;
|
||||
} raidz_test_opts_t;
|
||||
|
||||
static const raidz_test_opts_t rto_opts_defaults = {
|
||||
.rto_ashift = 9,
|
||||
.rto_offset = 1ULL << 0,
|
||||
.rto_dcols = 8,
|
||||
.rto_dsize = 1<<19,
|
||||
.rto_v = 0,
|
||||
.rto_sweep = 0,
|
||||
.rto_benchmark = 0,
|
||||
.rto_sanity = 0,
|
||||
.rto_gdb = 0
|
||||
};
|
||||
|
||||
extern raidz_test_opts_t rto_opts;
|
||||
|
||||
static inline size_t ilog2(size_t a)
|
||||
{
|
||||
return (a > 1 ? 1 + ilog2(a >> 1) : 0);
|
||||
}
|
||||
|
||||
|
||||
#define D_ALL 0
|
||||
#define D_INFO 1
|
||||
#define D_DEBUG 2
|
||||
|
||||
#define LOG(lvl, a...) \
|
||||
{ \
|
||||
if (rto_opts.rto_v >= lvl) \
|
||||
(void) fprintf(stdout, a); \
|
||||
} \
|
||||
|
||||
#define LOG_OPT(lvl, opt, a...) \
|
||||
{ \
|
||||
if (opt->rto_v >= lvl) \
|
||||
(void) fprintf(stdout, a); \
|
||||
} \
|
||||
|
||||
#define ERR(a...) (void) fprintf(stderr, a)
|
||||
|
||||
|
||||
#define DBLSEP "================\n"
|
||||
#define SEP "----------------\n"
|
||||
|
||||
|
||||
#define raidz_alloc(size) zio_data_buf_alloc(size)
|
||||
#define raidz_free(p, size) zio_data_buf_free(p, size)
|
||||
|
||||
|
||||
void init_zio_data(zio_t *zio);
|
||||
|
||||
void run_raidz_benchmark(void);
|
||||
|
||||
#endif /* RAIDZ_TEST_H */
|
|
@ -111,6 +111,7 @@ AC_CONFIG_FILES([
|
|||
cmd/dbufstat/Makefile
|
||||
cmd/arc_summary/Makefile
|
||||
cmd/zed/Makefile
|
||||
cmd/raidz_test/Makefile
|
||||
contrib/Makefile
|
||||
contrib/bash_completion.d/Makefile
|
||||
contrib/dracut/Makefile
|
||||
|
@ -250,6 +251,7 @@ AC_CONFIG_FILES([
|
|||
tests/zfs-tests/tests/functional/poolversion/Makefile
|
||||
tests/zfs-tests/tests/functional/privilege/Makefile
|
||||
tests/zfs-tests/tests/functional/quota/Makefile
|
||||
tests/zfs-tests/tests/functional/raidz/Makefile
|
||||
tests/zfs-tests/tests/functional/redundancy/Makefile
|
||||
tests/zfs-tests/tests/functional/refquota/Makefile
|
||||
tests/zfs-tests/tests/functional/refreserv/Makefile
|
||||
|
|
|
@ -78,6 +78,8 @@ COMMON_H = \
|
|||
$(top_srcdir)/include/sys/vdev_file.h \
|
||||
$(top_srcdir)/include/sys/vdev.h \
|
||||
$(top_srcdir)/include/sys/vdev_impl.h \
|
||||
$(top_srcdir)/include/sys/vdev_raidz.h \
|
||||
$(top_srcdir)/include/sys/vdev_raidz_impl.h \
|
||||
$(top_srcdir)/include/sys/xvattr.h \
|
||||
$(top_srcdir)/include/sys/zap.h \
|
||||
$(top_srcdir)/include/sys/zap_impl.h \
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (C) 2016 Gvozden Neskovic <neskovic@compeng.uni-frankfurt.de>.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_VDEV_RAIDZ_H
|
||||
#define _SYS_VDEV_RAIDZ_H
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct zio;
|
||||
struct raidz_map;
|
||||
#if !defined(_KERNEL)
|
||||
struct kernel_param {};
|
||||
#endif
|
||||
|
||||
/*
|
||||
* vdev_raidz interface
|
||||
*/
|
||||
struct raidz_map * vdev_raidz_map_alloc(struct zio *, uint64_t, uint64_t,
|
||||
uint64_t);
|
||||
void vdev_raidz_map_free(struct raidz_map *);
|
||||
void vdev_raidz_generate_parity(struct raidz_map *);
|
||||
int vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
|
||||
|
||||
/*
|
||||
* vdev_raidz_math interface
|
||||
*/
|
||||
void vdev_raidz_math_init(void);
|
||||
void vdev_raidz_math_fini(void);
|
||||
void vdev_raidz_math_get_ops(struct raidz_map *);
|
||||
void vdev_raidz_math_generate(struct raidz_map *);
|
||||
int vdev_raidz_math_reconstruct(struct raidz_map *, const int *,
|
||||
const int *, const int);
|
||||
int vdev_raidz_impl_set(const char *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_VDEV_RAIDZ_H */
|
|
@ -0,0 +1,344 @@
|
|||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (C) 2016 Gvozden Nešković. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _VDEV_RAIDZ_H
|
||||
#define _VDEV_RAIDZ_H
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/debug.h>
|
||||
#include <sys/kstat.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define CODE_P (0U)
|
||||
#define CODE_Q (1U)
|
||||
#define CODE_R (2U)
|
||||
|
||||
#define PARITY_P (1U)
|
||||
#define PARITY_PQ (2U)
|
||||
#define PARITY_PQR (3U)
|
||||
|
||||
#define TARGET_X (0U)
|
||||
#define TARGET_Y (1U)
|
||||
#define TARGET_Z (2U)
|
||||
|
||||
/*
|
||||
* Parity generation methods indexes
|
||||
*/
|
||||
enum raidz_math_gen_op {
|
||||
RAIDZ_GEN_P = 0,
|
||||
RAIDZ_GEN_PQ,
|
||||
RAIDZ_GEN_PQR,
|
||||
RAIDZ_GEN_NUM = 3
|
||||
};
|
||||
/*
|
||||
* Data reconstruction methods indexes
|
||||
*/
|
||||
enum raidz_rec_op {
|
||||
RAIDZ_REC_P = 0,
|
||||
RAIDZ_REC_Q,
|
||||
RAIDZ_REC_R,
|
||||
RAIDZ_REC_PQ,
|
||||
RAIDZ_REC_PR,
|
||||
RAIDZ_REC_QR,
|
||||
RAIDZ_REC_PQR,
|
||||
RAIDZ_REC_NUM = 7
|
||||
};
|
||||
|
||||
extern const char *raidz_gen_name[RAIDZ_GEN_NUM];
|
||||
extern const char *raidz_rec_name[RAIDZ_REC_NUM];
|
||||
|
||||
/*
|
||||
* Methods used to define raidz implementation
|
||||
*
|
||||
* @raidz_gen_f Parity generation function
|
||||
* @par1 pointer to raidz_map
|
||||
* @raidz_rec_f Data reconstruction function
|
||||
* @par1 pointer to raidz_map
|
||||
* @par2 array of reconstruction targets
|
||||
* @will_work_f Function returns TRUE if impl. is supported on the system
|
||||
* @init_impl_f Function is called once on init
|
||||
* @fini_impl_f Function is called once on fini
|
||||
*/
|
||||
typedef void (*raidz_gen_f)(void *);
|
||||
typedef int (*raidz_rec_f)(void *, const int *);
|
||||
typedef boolean_t (*will_work_f)(void);
|
||||
typedef void (*init_impl_f)(void);
|
||||
typedef void (*fini_impl_f)(void);
|
||||
|
||||
typedef struct raidz_impl_ops {
|
||||
init_impl_f init;
|
||||
fini_impl_f fini;
|
||||
raidz_gen_f gen[RAIDZ_GEN_NUM]; /* Parity generate functions */
|
||||
raidz_rec_f rec[RAIDZ_REC_NUM]; /* Data reconstruction functions */
|
||||
will_work_f is_supported; /* Support check function */
|
||||
char *name; /* Name of the implementation */
|
||||
} raidz_impl_ops_t;
|
||||
|
||||
typedef struct raidz_col {
|
||||
size_t rc_devidx; /* child device index for I/O */
|
||||
size_t rc_offset; /* device offset */
|
||||
size_t rc_size; /* I/O size */
|
||||
void *rc_data; /* I/O data */
|
||||
void *rc_gdata; /* used to store the "good" version */
|
||||
int rc_error; /* I/O error for this device */
|
||||
unsigned int rc_tried; /* Did we attempt this I/O column? */
|
||||
unsigned int rc_skipped; /* Did we skip this I/O column? */
|
||||
} raidz_col_t;
|
||||
|
||||
typedef struct raidz_map {
|
||||
size_t rm_cols; /* Regular column count */
|
||||
size_t rm_scols; /* Count including skipped columns */
|
||||
size_t rm_bigcols; /* Number of oversized columns */
|
||||
size_t rm_asize; /* Actual total I/O size */
|
||||
size_t rm_missingdata; /* Count of missing data devices */
|
||||
size_t rm_missingparity; /* Count of missing parity devices */
|
||||
size_t rm_firstdatacol; /* First data column/parity count */
|
||||
size_t rm_nskip; /* Skipped sectors for padding */
|
||||
size_t rm_skipstart; /* Column index of padding start */
|
||||
void *rm_datacopy; /* rm_asize-buffer of copied data */
|
||||
size_t rm_reports; /* # of referencing checksum reports */
|
||||
unsigned int rm_freed; /* map no longer has referencing ZIO */
|
||||
unsigned int rm_ecksuminjected; /* checksum error was injected */
|
||||
raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */
|
||||
raidz_col_t rm_col[1]; /* Flexible array of I/O columns */
|
||||
} raidz_map_t;
|
||||
|
||||
/*
|
||||
* Commonly used raidz_map helpers
|
||||
*
|
||||
* raidz_parity Returns parity of the RAIDZ block
|
||||
* raidz_ncols Returns number of columns the block spans
|
||||
* raidz_nbigcols Returns number of big columns columns
|
||||
* raidz_col_p Returns pointer to a column
|
||||
* raidz_col_size Returns size of a column
|
||||
* raidz_big_size Returns size of big columns
|
||||
* raidz_short_size Returns size of short columns
|
||||
*/
|
||||
#define raidz_parity(rm) ((rm)->rm_firstdatacol)
|
||||
#define raidz_ncols(rm) ((rm)->rm_cols)
|
||||
#define raidz_nbigcols(rm) ((rm)->rm_bigcols)
|
||||
#define raidz_col_p(rm, c) ((rm)->rm_col + (c))
|
||||
#define raidz_col_size(rm, c) ((rm)->rm_col[c].rc_size)
|
||||
#define raidz_big_size(rm) (raidz_col_size(rm, CODE_P))
|
||||
#define raidz_short_size(rm) (raidz_col_size(rm, raidz_ncols(rm)-1))
|
||||
|
||||
/*
|
||||
* Macro defines an RAIDZ parity generation method
|
||||
*
|
||||
* @code parity the function produce
|
||||
* @impl name of the implementation
|
||||
*/
|
||||
#define _RAIDZ_GEN_WRAP(code, impl) \
|
||||
static void \
|
||||
impl ## _gen_ ## code(void *rmp) \
|
||||
{ \
|
||||
raidz_map_t *rm = (raidz_map_t *) rmp; \
|
||||
raidz_generate_## code ## _impl(rm); \
|
||||
}
|
||||
|
||||
/*
|
||||
* Macro defines an RAIDZ data reconstruction method
|
||||
*
|
||||
* @code parity the function produce
|
||||
* @impl name of the implementation
|
||||
*/
|
||||
#define _RAIDZ_REC_WRAP(code, impl) \
|
||||
static int \
|
||||
impl ## _rec_ ## code(void *rmp, const int *tgtidx) \
|
||||
{ \
|
||||
raidz_map_t *rm = (raidz_map_t *) rmp; \
|
||||
return (raidz_reconstruct_## code ## _impl(rm, tgtidx)); \
|
||||
}
|
||||
|
||||
/*
|
||||
* Define all gen methods for an implementation
|
||||
*
|
||||
* @impl name of the implementation
|
||||
*/
|
||||
#define DEFINE_GEN_METHODS(impl) \
|
||||
_RAIDZ_GEN_WRAP(p, impl); \
|
||||
_RAIDZ_GEN_WRAP(pq, impl); \
|
||||
_RAIDZ_GEN_WRAP(pqr, impl)
|
||||
|
||||
/*
|
||||
* Define all rec functions for an implementation
|
||||
*
|
||||
* @impl name of the implementation
|
||||
*/
|
||||
#define DEFINE_REC_METHODS(impl) \
|
||||
_RAIDZ_REC_WRAP(p, impl); \
|
||||
_RAIDZ_REC_WRAP(q, impl); \
|
||||
_RAIDZ_REC_WRAP(r, impl); \
|
||||
_RAIDZ_REC_WRAP(pq, impl); \
|
||||
_RAIDZ_REC_WRAP(pr, impl); \
|
||||
_RAIDZ_REC_WRAP(qr, impl); \
|
||||
_RAIDZ_REC_WRAP(pqr, impl)
|
||||
|
||||
#define RAIDZ_GEN_METHODS(impl) \
|
||||
{ \
|
||||
[RAIDZ_GEN_P] = & impl ## _gen_p, \
|
||||
[RAIDZ_GEN_PQ] = & impl ## _gen_pq, \
|
||||
[RAIDZ_GEN_PQR] = & impl ## _gen_pqr \
|
||||
}
|
||||
|
||||
#define RAIDZ_REC_METHODS(impl) \
|
||||
{ \
|
||||
[RAIDZ_REC_P] = & impl ## _rec_p, \
|
||||
[RAIDZ_REC_Q] = & impl ## _rec_q, \
|
||||
[RAIDZ_REC_R] = & impl ## _rec_r, \
|
||||
[RAIDZ_REC_PQ] = & impl ## _rec_pq, \
|
||||
[RAIDZ_REC_PR] = & impl ## _rec_pr, \
|
||||
[RAIDZ_REC_QR] = & impl ## _rec_qr, \
|
||||
[RAIDZ_REC_PQR] = & impl ## _rec_pqr \
|
||||
}
|
||||
|
||||
|
||||
typedef struct raidz_impl_kstat {
|
||||
kstat_named_t gen[RAIDZ_GEN_NUM]; /* gen method speed kiB/s */
|
||||
kstat_named_t rec[RAIDZ_REC_NUM]; /* rec method speed kiB/s */
|
||||
} raidz_impl_kstat_t;
|
||||
|
||||
/*
|
||||
* Enumerate various multiplication constants
|
||||
* used in reconstruction methods
|
||||
*/
|
||||
typedef enum raidz_mul_info {
|
||||
/* Reconstruct Q */
|
||||
MUL_Q_X = 0,
|
||||
/* Reconstruct R */
|
||||
MUL_R_X = 0,
|
||||
/* Reconstruct PQ */
|
||||
MUL_PQ_X = 0,
|
||||
MUL_PQ_Y = 1,
|
||||
/* Reconstruct PR */
|
||||
MUL_PR_X = 0,
|
||||
MUL_PR_Y = 1,
|
||||
/* Reconstruct QR */
|
||||
MUL_QR_XQ = 0,
|
||||
MUL_QR_X = 1,
|
||||
MUL_QR_YQ = 2,
|
||||
MUL_QR_Y = 3,
|
||||
/* Reconstruct PQR */
|
||||
MUL_PQR_XP = 0,
|
||||
MUL_PQR_XQ = 1,
|
||||
MUL_PQR_XR = 2,
|
||||
MUL_PQR_YU = 3,
|
||||
MUL_PQR_YP = 4,
|
||||
MUL_PQR_YQ = 5,
|
||||
|
||||
MUL_CNT = 6
|
||||
} raidz_mul_info_t;
|
||||
|
||||
/*
|
||||
* Powers of 2 in the Galois field.
|
||||
*/
|
||||
extern const uint8_t vdev_raidz_pow2[256] __attribute__((aligned(256)));
|
||||
/* Logs of 2 in the Galois field defined above. */
|
||||
extern const uint8_t vdev_raidz_log2[256] __attribute__((aligned(256)));
|
||||
|
||||
/*
|
||||
* Multiply a given number by 2 raised to the given power.
|
||||
*/
|
||||
static inline uint8_t
|
||||
vdev_raidz_exp2(const uint8_t a, const unsigned exp)
|
||||
{
|
||||
if (a == 0)
|
||||
return (0);
|
||||
|
||||
return (vdev_raidz_pow2[(exp + (unsigned) vdev_raidz_log2[a]) % 255]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Galois Field operations.
|
||||
*
|
||||
* gf_exp2 - computes 2 raised to the given power
|
||||
* gf_exp2 - computes 4 raised to the given power
|
||||
* gf_mul - multiplication
|
||||
* gf_div - division
|
||||
* gf_inv - multiplicative inverse
|
||||
*/
|
||||
typedef unsigned gf_t;
|
||||
typedef unsigned gf_log_t;
|
||||
|
||||
static inline gf_t
|
||||
gf_mul(const gf_t a, const gf_t b)
|
||||
{
|
||||
gf_log_t logsum;
|
||||
|
||||
if (a == 0 || b == 0)
|
||||
return (0);
|
||||
|
||||
logsum = (gf_log_t) vdev_raidz_log2[a] + (gf_log_t) vdev_raidz_log2[b];
|
||||
|
||||
return ((gf_t) vdev_raidz_pow2[logsum % 255]);
|
||||
}
|
||||
|
||||
static inline gf_t
|
||||
gf_div(const gf_t a, const gf_t b)
|
||||
{
|
||||
gf_log_t logsum;
|
||||
|
||||
ASSERT3U(b, >, 0);
|
||||
if (a == 0)
|
||||
return (0);
|
||||
|
||||
logsum = (gf_log_t) 255 + (gf_log_t) vdev_raidz_log2[a] -
|
||||
(gf_log_t) vdev_raidz_log2[b];
|
||||
|
||||
return ((gf_t) vdev_raidz_pow2[logsum % 255]);
|
||||
}
|
||||
|
||||
static inline gf_t
|
||||
gf_inv(const gf_t a)
|
||||
{
|
||||
gf_log_t logsum;
|
||||
|
||||
ASSERT3U(a, >, 0);
|
||||
|
||||
logsum = (gf_log_t) 255 - (gf_log_t) vdev_raidz_log2[a];
|
||||
|
||||
return ((gf_t) vdev_raidz_pow2[logsum]);
|
||||
}
|
||||
|
||||
static inline gf_t
|
||||
gf_exp2(gf_log_t exp)
|
||||
{
|
||||
return (vdev_raidz_pow2[exp % 255]);
|
||||
}
|
||||
|
||||
static inline gf_t
|
||||
gf_exp4(gf_log_t exp)
|
||||
{
|
||||
ASSERT3U(exp, <=, 255);
|
||||
return ((gf_t) vdev_raidz_pow2[(2 * exp) % 255]);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _VDEV_RAIDZ_H */
|
|
@ -92,6 +92,10 @@ KERNEL_C = \
|
|||
vdev_missing.c \
|
||||
vdev_queue.c \
|
||||
vdev_raidz.c \
|
||||
vdev_raidz_math.c \
|
||||
vdev_raidz_math_scalar.c \
|
||||
vdev_raidz_math_sse.c \
|
||||
vdev_raidz_math_avx2.c \
|
||||
vdev_root.c \
|
||||
zap.c \
|
||||
zap_leaf.c \
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
dist_man_MANS = zhack.1 zpios.1 ztest.1
|
||||
dist_man_MANS = zhack.1 zpios.1 ztest.1 raidz_test.1
|
||||
EXTRA_DIST = cstyle.1
|
||||
|
||||
install-data-local:
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
'\" t
|
||||
.\"
|
||||
.\" CDDL HEADER START
|
||||
.\"
|
||||
.\" The contents of this file are subject to the terms of the
|
||||
.\" Common Development and Distribution License (the "License").
|
||||
.\" You may not use this file except in compliance with the License.
|
||||
.\"
|
||||
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
.\" or http://www.opensolaris.org/os/licensing.
|
||||
.\" See the License for the specific language governing permissions
|
||||
.\" and limitations under the License.
|
||||
.\"
|
||||
.\" When distributing Covered Code, include this CDDL HEADER in each
|
||||
.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
.\" If applicable, add the following below this CDDL HEADER, with the
|
||||
.\" fields enclosed by brackets "[]" replaced with your own identifying
|
||||
.\" information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
.\"
|
||||
.\" CDDL HEADER END
|
||||
.\"
|
||||
.\"
|
||||
.\" Copyright (c) 2016 Gvozden Nešković. All rights reserved.
|
||||
.\"
|
||||
.TH raidz_test 1 "2016" "ZFS on Linux" "User Commands"
|
||||
|
||||
.SH NAME
|
||||
\fBraidz_test\fR \- raidz implementation verification and bencmarking tool
|
||||
.SH SYNOPSIS
|
||||
.LP
|
||||
.BI "raidz_test <options>"
|
||||
.SH DESCRIPTION
|
||||
.LP
|
||||
This manual page documents briefly the \fBraidz_test\fR command.
|
||||
.LP
|
||||
Purpose of this tool is to run all supported raidz implementation and verify
|
||||
results of all methods. Tool also contains a parameter sweep option where all
|
||||
parameters affecting RAIDZ block are verified (like ashift size, data offset,
|
||||
data size, etc...).
|
||||
The tool also supports a benchmarking mode using -B option.
|
||||
.SH OPTION
|
||||
.HP
|
||||
.BI "\-h" ""
|
||||
.IP
|
||||
Print a help summary.
|
||||
.HP
|
||||
.BI "\-a" " ashift (default: 9)"
|
||||
.IP
|
||||
Ashift value.
|
||||
.HP
|
||||
.BI "\-o" " zio_off_shift" " (default: 0)"
|
||||
.IP
|
||||
Zio offset for raidz block. Offset value is 1 << (zio_off_shift)
|
||||
.HP
|
||||
.BI "\-d" " raidz_data_disks" " (default: 8)"
|
||||
.IP
|
||||
Number of raidz data disks to use. Additional disks for parity will be used
|
||||
during testing.
|
||||
.HP
|
||||
.BI "\-s" " zio_size_shift" " (default: 19)"
|
||||
.IP
|
||||
Size of data for raidz block. Size is 1 << (zio_size_shift).
|
||||
.HP
|
||||
.BI "\-S(weep)"
|
||||
.IP
|
||||
Sweep parameter space while verifying the raidz implementations. This option
|
||||
will exhaust all most of valid values for -a -o -d -s options. Runtime using
|
||||
this option will be long.
|
||||
.HP
|
||||
.BI "\-t(imeout)"
|
||||
.IP
|
||||
Wall time for sweep test in seconds. The actual runtime could be longer.
|
||||
.HP
|
||||
.BI "\-B(enchmark)"
|
||||
.IP
|
||||
This options starts the benchmark mode. All implementations are benchmarked
|
||||
using increasing per disk data size. Results are given as throughput per disk,
|
||||
measured in MiB/s.
|
||||
.HP
|
||||
.BI "\-v(erbose)"
|
||||
.IP
|
||||
Increase verbosity.
|
||||
.HP
|
||||
.BI "\-T(est the test)"
|
||||
.IP
|
||||
Debugging option. When this option is specified tool is supposed to fail
|
||||
all tests. This is to check if tests would properly verify bit-exactness.
|
||||
.HP
|
||||
.BI "\-D(ebug)"
|
||||
.IP
|
||||
Debugging option. Specify to attach gdb when SIGSEGV or SIGABRT are received.
|
||||
.HP
|
||||
|
||||
.SH "SEE ALSO"
|
||||
.BR "ztest (1)"
|
||||
.SH "AUTHORS"
|
||||
vdev_raidz, created for ZFS on Linux by Gvozden Nešković <neskovic@gmail.com>
|
|
@ -1674,6 +1674,33 @@ Aggregate write I/O over gap
|
|||
Default value: \fB4,096\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fBzfs_vdev_raidz_impl\fR (string)
|
||||
.ad
|
||||
.RS 12n
|
||||
Parameter for selecting raidz implementation to use.
|
||||
|
||||
Options marked (always) below may be selected on module load as they are
|
||||
supported on all systems.
|
||||
The remaining options may only be set after the module is loaded, as they
|
||||
are available only if the implementations are compiled in and supported
|
||||
on the running system.
|
||||
|
||||
Once the module is loaded, the content of
|
||||
/sys/module/zfs/parameters/zfs_vdev_raidz_impl will show available options
|
||||
with the currently selected one enclosed in [].
|
||||
Possible options are:
|
||||
fastest - (always) implementation selected using built-in benchmark
|
||||
original - (always) original raidz implementation
|
||||
scalar - (always) scalar raidz implementation
|
||||
sse - implementation using SSE instruction set (64bit x86 only)
|
||||
avx2 - implementation using AVX2 instruction set (64bit x86 only)
|
||||
.sp
|
||||
Default value: \fBfastest\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
|
|
|
@ -71,6 +71,8 @@ $(MODULE)-objs += vdev_mirror.o
|
|||
$(MODULE)-objs += vdev_missing.o
|
||||
$(MODULE)-objs += vdev_queue.o
|
||||
$(MODULE)-objs += vdev_raidz.o
|
||||
$(MODULE)-objs += vdev_raidz_math.o
|
||||
$(MODULE)-objs += vdev_raidz_math_scalar.o
|
||||
$(MODULE)-objs += vdev_root.o
|
||||
$(MODULE)-objs += zap.o
|
||||
$(MODULE)-objs += zap_leaf.o
|
||||
|
@ -109,3 +111,6 @@ $(MODULE)-objs += zrlock.o
|
|||
$(MODULE)-objs += zvol.o
|
||||
$(MODULE)-objs += dsl_destroy.o
|
||||
$(MODULE)-objs += dsl_userhold.o
|
||||
|
||||
$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_sse.o
|
||||
$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_avx2.o
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#include <sys/zil.h>
|
||||
#include <sys/vdev_impl.h>
|
||||
#include <sys/vdev_file.h>
|
||||
#include <sys/vdev_raidz.h>
|
||||
#include <sys/metaslab.h>
|
||||
#include <sys/uberblock_impl.h>
|
||||
#include <sys/txg.h>
|
||||
|
@ -1831,6 +1832,7 @@ spa_init(int mode)
|
|||
dmu_init();
|
||||
zil_init();
|
||||
vdev_cache_stat_init();
|
||||
vdev_raidz_math_init();
|
||||
zfs_prop_init();
|
||||
zpool_prop_init();
|
||||
zpool_feature_init();
|
||||
|
@ -1846,6 +1848,7 @@ spa_fini(void)
|
|||
spa_evict_all();
|
||||
|
||||
vdev_cache_stat_fini();
|
||||
vdev_raidz_math_fini();
|
||||
zil_fini();
|
||||
dmu_fini();
|
||||
zio_fini();
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2016 Gvozden Nešković. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
@ -31,6 +32,8 @@
|
|||
#include <sys/zio_checksum.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/fm/fs/zfs.h>
|
||||
#include <sys/vdev_raidz.h>
|
||||
#include <sys/vdev_raidz_impl.h>
|
||||
|
||||
/*
|
||||
* Virtual device vector for RAID-Z.
|
||||
|
@ -99,34 +102,6 @@
|
|||
* or in concert to recover missing data columns.
|
||||
*/
|
||||
|
||||
typedef struct raidz_col {
|
||||
uint64_t rc_devidx; /* child device index for I/O */
|
||||
uint64_t rc_offset; /* device offset */
|
||||
uint64_t rc_size; /* I/O size */
|
||||
void *rc_data; /* I/O data */
|
||||
void *rc_gdata; /* used to store the "good" version */
|
||||
int rc_error; /* I/O error for this device */
|
||||
uint8_t rc_tried; /* Did we attempt this I/O column? */
|
||||
uint8_t rc_skipped; /* Did we skip this I/O column? */
|
||||
} raidz_col_t;
|
||||
|
||||
typedef struct raidz_map {
|
||||
uint64_t rm_cols; /* Regular column count */
|
||||
uint64_t rm_scols; /* Count including skipped columns */
|
||||
uint64_t rm_bigcols; /* Number of oversized columns */
|
||||
uint64_t rm_asize; /* Actual total I/O size */
|
||||
uint64_t rm_missingdata; /* Count of missing data devices */
|
||||
uint64_t rm_missingparity; /* Count of missing parity devices */
|
||||
uint64_t rm_firstdatacol; /* First data column/parity count */
|
||||
uint64_t rm_nskip; /* Skipped sectors for padding */
|
||||
uint64_t rm_skipstart; /* Column index of padding start */
|
||||
void *rm_datacopy; /* rm_asize-buffer of copied data */
|
||||
uintptr_t rm_reports; /* # of referencing checksum reports */
|
||||
uint8_t rm_freed; /* map no longer has referencing ZIO */
|
||||
uint8_t rm_ecksuminjected; /* checksum error was injected */
|
||||
raidz_col_t rm_col[1]; /* Flexible array of I/O columns */
|
||||
} raidz_map_t;
|
||||
|
||||
#define VDEV_RAIDZ_P 0
|
||||
#define VDEV_RAIDZ_Q 1
|
||||
#define VDEV_RAIDZ_R 2
|
||||
|
@ -154,104 +129,7 @@ typedef struct raidz_map {
|
|||
VDEV_RAIDZ_64MUL_2((x), mask); \
|
||||
}
|
||||
|
||||
/*
|
||||
* Force reconstruction to use the general purpose method.
|
||||
*/
|
||||
int vdev_raidz_default_to_general;
|
||||
|
||||
/* Powers of 2 in the Galois field defined above. */
|
||||
static const uint8_t vdev_raidz_pow2[256] = {
|
||||
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
|
||||
0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
|
||||
0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
|
||||
0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
|
||||
0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
|
||||
0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
|
||||
0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
|
||||
0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
|
||||
0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
|
||||
0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
|
||||
0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
|
||||
0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
|
||||
0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
|
||||
0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
|
||||
0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
|
||||
0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
|
||||
0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
|
||||
0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
|
||||
0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
|
||||
0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
|
||||
0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
|
||||
0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
|
||||
0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
|
||||
0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
|
||||
0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
|
||||
0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
|
||||
0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
|
||||
0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
|
||||
0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
|
||||
0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
|
||||
0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
|
||||
0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
|
||||
};
|
||||
/* Logs of 2 in the Galois field defined above. */
|
||||
static const uint8_t vdev_raidz_log2[256] = {
|
||||
0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
|
||||
0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
|
||||
0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
|
||||
0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
|
||||
0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
|
||||
0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
|
||||
0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
|
||||
0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
|
||||
0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
|
||||
0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
|
||||
0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
|
||||
0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
|
||||
0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
|
||||
0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
|
||||
0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
|
||||
0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
|
||||
0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
|
||||
0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
|
||||
0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,
|
||||
0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
|
||||
0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd,
|
||||
0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
|
||||
0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e,
|
||||
0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
|
||||
0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
|
||||
0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
|
||||
0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa,
|
||||
0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
|
||||
0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51,
|
||||
0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
|
||||
0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8,
|
||||
0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf,
|
||||
};
|
||||
|
||||
static void vdev_raidz_generate_parity(raidz_map_t *rm);
|
||||
|
||||
/*
|
||||
* Multiply a given number by 2 raised to the given power.
|
||||
*/
|
||||
static uint8_t
|
||||
vdev_raidz_exp2(uint_t a, int exp)
|
||||
{
|
||||
if (a == 0)
|
||||
return (0);
|
||||
|
||||
ASSERT(exp >= 0);
|
||||
ASSERT(vdev_raidz_log2[a] > 0 || a == 1);
|
||||
|
||||
exp += vdev_raidz_log2[a];
|
||||
if (exp > 255)
|
||||
exp -= 255;
|
||||
|
||||
return (vdev_raidz_pow2[exp]);
|
||||
}
|
||||
|
||||
static void
|
||||
void
|
||||
vdev_raidz_map_free(raidz_map_t *rm)
|
||||
{
|
||||
int c;
|
||||
|
@ -437,7 +315,7 @@ static const zio_vsd_ops_t vdev_raidz_vsd_ops = {
|
|||
* Avoid inlining the function to keep vdev_raidz_io_start(), which
|
||||
* is this functions only caller, as small as possible on the stack.
|
||||
*/
|
||||
noinline static raidz_map_t *
|
||||
noinline raidz_map_t *
|
||||
vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols,
|
||||
uint64_t nparity)
|
||||
{
|
||||
|
@ -579,6 +457,10 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols,
|
|||
|
||||
zio->io_vsd = rm;
|
||||
zio->io_vsd_ops = &vdev_raidz_vsd_ops;
|
||||
|
||||
/* RAIDZ ops init */
|
||||
vdev_raidz_math_get_ops(rm);
|
||||
|
||||
return (rm);
|
||||
}
|
||||
|
||||
|
@ -726,9 +608,14 @@ vdev_raidz_generate_parity_pqr(raidz_map_t *rm)
|
|||
* Generate RAID parity in the first virtual columns according to the number of
|
||||
* parity columns available.
|
||||
*/
|
||||
static void
|
||||
void
|
||||
vdev_raidz_generate_parity(raidz_map_t *rm)
|
||||
{
|
||||
if (rm->rm_ops) {
|
||||
vdev_raidz_math_generate(rm);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (rm->rm_firstdatacol) {
|
||||
case 1:
|
||||
vdev_raidz_generate_parity_p(rm);
|
||||
|
@ -1392,8 +1279,8 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts)
|
|||
return (code);
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_raidz_reconstruct(raidz_map_t *rm, int *t, int nt)
|
||||
int
|
||||
vdev_raidz_reconstruct(raidz_map_t *rm, const int *t, int nt)
|
||||
{
|
||||
int tgts[VDEV_RAIDZ_MAXPARITY], *dt;
|
||||
int ntgts;
|
||||
|
@ -1435,34 +1322,41 @@ vdev_raidz_reconstruct(raidz_map_t *rm, int *t, int nt)
|
|||
|
||||
dt = &tgts[nbadparity];
|
||||
|
||||
/*
|
||||
* Reconstruct using the new math implementation if
|
||||
* rm_ops is set.
|
||||
*/
|
||||
if (rm->rm_ops) {
|
||||
return (vdev_raidz_math_reconstruct(rm, parity_valid, dt,
|
||||
nbaddata));
|
||||
}
|
||||
|
||||
/*
|
||||
* See if we can use any of our optimized reconstruction routines.
|
||||
*/
|
||||
if (!vdev_raidz_default_to_general) {
|
||||
switch (nbaddata) {
|
||||
case 1:
|
||||
if (parity_valid[VDEV_RAIDZ_P])
|
||||
return (vdev_raidz_reconstruct_p(rm, dt, 1));
|
||||
switch (nbaddata) {
|
||||
case 1:
|
||||
if (parity_valid[VDEV_RAIDZ_P])
|
||||
return (vdev_raidz_reconstruct_p(rm, dt, 1));
|
||||
|
||||
ASSERT(rm->rm_firstdatacol > 1);
|
||||
ASSERT(rm->rm_firstdatacol > 1);
|
||||
|
||||
if (parity_valid[VDEV_RAIDZ_Q])
|
||||
return (vdev_raidz_reconstruct_q(rm, dt, 1));
|
||||
if (parity_valid[VDEV_RAIDZ_Q])
|
||||
return (vdev_raidz_reconstruct_q(rm, dt, 1));
|
||||
|
||||
ASSERT(rm->rm_firstdatacol > 2);
|
||||
break;
|
||||
ASSERT(rm->rm_firstdatacol > 2);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
ASSERT(rm->rm_firstdatacol > 1);
|
||||
case 2:
|
||||
ASSERT(rm->rm_firstdatacol > 1);
|
||||
|
||||
if (parity_valid[VDEV_RAIDZ_P] &&
|
||||
parity_valid[VDEV_RAIDZ_Q])
|
||||
return (vdev_raidz_reconstruct_pq(rm, dt, 2));
|
||||
if (parity_valid[VDEV_RAIDZ_P] &&
|
||||
parity_valid[VDEV_RAIDZ_Q])
|
||||
return (vdev_raidz_reconstruct_pq(rm, dt, 2));
|
||||
|
||||
ASSERT(rm->rm_firstdatacol > 2);
|
||||
ASSERT(rm->rm_firstdatacol > 2);
|
||||
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
code = vdev_raidz_reconstruct_general(rm, tgts, ntgts);
|
||||
|
@ -1739,11 +1633,6 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
|
|||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Keep statistics on all the ways that we used parity to correct data.
|
||||
*/
|
||||
static uint64_t raidz_corrected[1 << VDEV_RAIDZ_MAXPARITY];
|
||||
|
||||
static int
|
||||
vdev_raidz_worst_error(raidz_map_t *rm)
|
||||
{
|
||||
|
@ -1845,7 +1734,6 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
|
|||
*/
|
||||
code = vdev_raidz_reconstruct(rm, tgts, n);
|
||||
if (raidz_checksum_verify(zio) == 0) {
|
||||
atomic_inc_64(&raidz_corrected[code]);
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
c = tgts[i];
|
||||
|
@ -2058,8 +1946,6 @@ vdev_raidz_io_done(zio_t *zio)
|
|||
code = vdev_raidz_reconstruct(rm, tgts, n);
|
||||
|
||||
if (raidz_checksum_verify(zio) == 0) {
|
||||
atomic_inc_64(&raidz_corrected[code]);
|
||||
|
||||
/*
|
||||
* If we read more parity disks than were used
|
||||
* for reconstruction, confirm that the other
|
||||
|
|
|
@ -0,0 +1,571 @@
|
|||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (C) 2016 Gvozden Nešković. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/debug.h>
|
||||
#include <sys/zfs_debug.h>
|
||||
|
||||
#include <sys/vdev_raidz.h>
|
||||
#include <sys/vdev_raidz_impl.h>
|
||||
|
||||
extern const raidz_impl_ops_t vdev_raidz_scalar_impl;
|
||||
extern const raidz_impl_ops_t vdev_raidz_sse_impl;
|
||||
extern const raidz_impl_ops_t vdev_raidz_avx2_impl;
|
||||
|
||||
/* All compiled in implementations */
|
||||
const raidz_impl_ops_t *raidz_all_maths[] = {
|
||||
&vdev_raidz_scalar_impl,
|
||||
#if defined(__x86_64) && defined(HAVE_SSSE3) /* only x86_64 for now */
|
||||
&vdev_raidz_sse_impl,
|
||||
#endif
|
||||
#if defined(__x86_64) && defined(HAVE_AVX2) /* only x86_64 for now */
|
||||
&vdev_raidz_avx2_impl
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Indicate that benchmark has been completed */
|
||||
static boolean_t raidz_math_initialized = B_FALSE;
|
||||
|
||||
/* Select raidz implementation */
|
||||
static enum vdev_raidz_impl_sel {
|
||||
IMPL_FASTEST = -1,
|
||||
IMPL_ORIGINAL = -2,
|
||||
IMPL_CYCLE = -3,
|
||||
IMPL_SCALAR = 0,
|
||||
} zfs_vdev_raidz_impl = IMPL_SCALAR;
|
||||
|
||||
/* selected implementation and its lock */
|
||||
static krwlock_t vdev_raidz_impl_lock;
|
||||
static raidz_impl_ops_t *vdev_raidz_used_impl =
|
||||
(raidz_impl_ops_t *) &vdev_raidz_scalar_impl;
|
||||
static boolean_t vdev_raidz_impl_user_set = B_FALSE;
|
||||
|
||||
/* RAIDZ op that contain the fastest routines */
|
||||
static raidz_impl_ops_t vdev_raidz_fastest_impl = {
|
||||
.name = "fastest"
|
||||
};
|
||||
|
||||
/* Hold all supported implementations */
|
||||
size_t raidz_supp_impl_cnt = 1;
|
||||
raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths) + 1] = {
|
||||
(raidz_impl_ops_t *) &vdev_raidz_scalar_impl, /* scalar is supported */
|
||||
NULL
|
||||
};
|
||||
|
||||
/*
|
||||
* kstats values for supported impl & original methods
|
||||
* Values represent per disk throughput of 8 disk+parity raidz vdev (Bps)
|
||||
*/
|
||||
static raidz_impl_kstat_t raidz_impl_kstats[ARRAY_SIZE(raidz_all_maths) + 1];
|
||||
|
||||
/* kstat for benchmarked implementations */
|
||||
static kstat_t *raidz_math_kstat = NULL;
|
||||
|
||||
/*
|
||||
* Selects the raidz operation for raidz_map
|
||||
* If rm_ops is set to NULL original raidz implementation will be used
|
||||
*/
|
||||
void
|
||||
vdev_raidz_math_get_ops(raidz_map_t *rm)
|
||||
{
|
||||
rw_enter(&vdev_raidz_impl_lock, RW_READER);
|
||||
|
||||
rm->rm_ops = vdev_raidz_used_impl;
|
||||
|
||||
#if !defined(_KERNEL)
|
||||
if (zfs_vdev_raidz_impl == IMPL_CYCLE) {
|
||||
static size_t cycle_impl_idx = 0;
|
||||
size_t idx;
|
||||
/*
|
||||
* Cycle through all supported new implementations, and
|
||||
* when idx == raidz_supp_impl_cnt, use the original
|
||||
*/
|
||||
idx = (++cycle_impl_idx) % (raidz_supp_impl_cnt + 1);
|
||||
rm->rm_ops = raidz_supp_impl[idx];
|
||||
}
|
||||
#endif
|
||||
|
||||
rw_exit(&vdev_raidz_impl_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Select parity generation method for raidz_map
|
||||
*/
|
||||
void
|
||||
vdev_raidz_math_generate(raidz_map_t *rm)
|
||||
{
|
||||
raidz_gen_f gen_parity = NULL;
|
||||
|
||||
switch (raidz_parity(rm)) {
|
||||
case 1:
|
||||
gen_parity = rm->rm_ops->gen[RAIDZ_GEN_P];
|
||||
break;
|
||||
case 2:
|
||||
gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQ];
|
||||
break;
|
||||
case 3:
|
||||
gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQR];
|
||||
break;
|
||||
default:
|
||||
gen_parity = NULL;
|
||||
cmn_err(CE_PANIC, "invalid RAID-Z configuration %d",
|
||||
raidz_parity(rm));
|
||||
break;
|
||||
}
|
||||
|
||||
ASSERT(gen_parity != NULL);
|
||||
|
||||
gen_parity(rm);
|
||||
}
|
||||
|
||||
static raidz_rec_f
|
||||
_reconstruct_fun_raidz1(raidz_map_t *rm, const int *parity_valid,
|
||||
const int nbaddata)
|
||||
{
|
||||
if (nbaddata == 1 && parity_valid[CODE_P]) {
|
||||
return (rm->rm_ops->rec[RAIDZ_REC_P]);
|
||||
}
|
||||
return ((raidz_rec_f) NULL);
|
||||
}
|
||||
|
||||
static raidz_rec_f
|
||||
_reconstruct_fun_raidz2(raidz_map_t *rm, const int *parity_valid,
|
||||
const int nbaddata)
|
||||
{
|
||||
if (nbaddata == 1) {
|
||||
if (parity_valid[CODE_P]) {
|
||||
return (rm->rm_ops->rec[RAIDZ_REC_P]);
|
||||
} else if (parity_valid[CODE_Q]) {
|
||||
return (rm->rm_ops->rec[RAIDZ_REC_Q]);
|
||||
}
|
||||
} else if (nbaddata == 2 &&
|
||||
parity_valid[CODE_P] && parity_valid[CODE_Q]) {
|
||||
return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
|
||||
}
|
||||
return ((raidz_rec_f) NULL);
|
||||
}
|
||||
|
||||
static raidz_rec_f
|
||||
_reconstruct_fun_raidz3(raidz_map_t *rm, const int *parity_valid,
|
||||
const int nbaddata)
|
||||
{
|
||||
if (nbaddata == 1) {
|
||||
if (parity_valid[CODE_P]) {
|
||||
return (rm->rm_ops->rec[RAIDZ_REC_P]);
|
||||
} else if (parity_valid[CODE_Q]) {
|
||||
return (rm->rm_ops->rec[RAIDZ_REC_Q]);
|
||||
} else if (parity_valid[CODE_R]) {
|
||||
return (rm->rm_ops->rec[RAIDZ_REC_R]);
|
||||
}
|
||||
} else if (nbaddata == 2) {
|
||||
if (parity_valid[CODE_P] && parity_valid[CODE_Q]) {
|
||||
return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
|
||||
} else if (parity_valid[CODE_P] && parity_valid[CODE_R]) {
|
||||
return (rm->rm_ops->rec[RAIDZ_REC_PR]);
|
||||
} else if (parity_valid[CODE_Q] && parity_valid[CODE_R]) {
|
||||
return (rm->rm_ops->rec[RAIDZ_REC_QR]);
|
||||
}
|
||||
} else if (nbaddata == 3 &&
|
||||
parity_valid[CODE_P] && parity_valid[CODE_Q] &&
|
||||
parity_valid[CODE_R]) {
|
||||
return (rm->rm_ops->rec[RAIDZ_REC_PQR]);
|
||||
}
|
||||
return ((raidz_rec_f) NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Select data reconstruction method for raidz_map
|
||||
* @parity_valid - Parity validity flag
|
||||
* @dt - Failed data index array
|
||||
* @nbaddata - Number of failed data columns
|
||||
*/
|
||||
int
|
||||
vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid,
|
||||
const int *dt, const int nbaddata)
|
||||
{
|
||||
raidz_rec_f rec_data = NULL;
|
||||
|
||||
switch (raidz_parity(rm)) {
|
||||
case 1:
|
||||
rec_data = _reconstruct_fun_raidz1(rm, parity_valid,
|
||||
nbaddata);
|
||||
break;
|
||||
case 2:
|
||||
rec_data = _reconstruct_fun_raidz2(rm, parity_valid,
|
||||
nbaddata);
|
||||
break;
|
||||
case 3:
|
||||
rec_data = _reconstruct_fun_raidz3(rm, parity_valid,
|
||||
nbaddata);
|
||||
break;
|
||||
default:
|
||||
cmn_err(CE_PANIC, "invalid RAID-Z configuration %d",
|
||||
raidz_parity(rm));
|
||||
break;
|
||||
}
|
||||
|
||||
ASSERT(rec_data != NULL);
|
||||
|
||||
return (rec_data(rm, dt));
|
||||
}
|
||||
|
||||
const char *raidz_gen_name[] = {
|
||||
"gen_p", "gen_pq", "gen_pqr"
|
||||
};
|
||||
const char *raidz_rec_name[] = {
|
||||
"rec_p", "rec_q", "rec_r",
|
||||
"rec_pq", "rec_pr", "rec_qr", "rec_pqr"
|
||||
};
|
||||
|
||||
static void
|
||||
init_raidz_kstat(raidz_impl_kstat_t *rs, const char *name)
|
||||
{
|
||||
int i;
|
||||
const size_t impl_name_len = strnlen(name, KSTAT_STRLEN);
|
||||
const size_t op_name_max = (KSTAT_STRLEN - 2) > impl_name_len ?
|
||||
KSTAT_STRLEN - impl_name_len - 2 : 0;
|
||||
|
||||
for (i = 0; i < RAIDZ_GEN_NUM; i++) {
|
||||
strncpy(rs->gen[i].name, name, impl_name_len);
|
||||
strncpy(rs->gen[i].name + impl_name_len, "_", 1);
|
||||
strncpy(rs->gen[i].name + impl_name_len + 1,
|
||||
raidz_gen_name[i], op_name_max);
|
||||
|
||||
rs->gen[i].data_type = KSTAT_DATA_UINT64;
|
||||
rs->gen[i].value.ui64 = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < RAIDZ_REC_NUM; i++) {
|
||||
strncpy(rs->rec[i].name, name, impl_name_len);
|
||||
strncpy(rs->rec[i].name + impl_name_len, "_", 1);
|
||||
strncpy(rs->rec[i].name + impl_name_len + 1,
|
||||
raidz_rec_name[i], op_name_max);
|
||||
|
||||
rs->rec[i].data_type = KSTAT_DATA_UINT64;
|
||||
rs->rec[i].value.ui64 = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#define BENCH_D_COLS (8ULL)
|
||||
#define BENCH_COLS (BENCH_D_COLS + PARITY_PQR)
|
||||
#define BENCH_ZIO_SIZE (2ULL << 17) /* 128 kiB */
|
||||
#define BENCH_NS MSEC2NSEC(25) /* 25ms */
|
||||
|
||||
typedef void (*benchmark_fn)(raidz_map_t *rm, const int fn);
|
||||
|
||||
static void
|
||||
benchmark_gen_impl(raidz_map_t *rm, const int fn)
|
||||
{
|
||||
(void) fn;
|
||||
vdev_raidz_generate_parity(rm);
|
||||
}
|
||||
|
||||
static void
|
||||
benchmark_rec_impl(raidz_map_t *rm, const int fn)
|
||||
{
|
||||
static const int rec_tgt[7][3] = {
|
||||
{1, 2, 3}, /* rec_p: bad QR & D[0] */
|
||||
{0, 2, 3}, /* rec_q: bad PR & D[0] */
|
||||
{0, 1, 3}, /* rec_r: bad PQ & D[0] */
|
||||
{2, 3, 4}, /* rec_pq: bad R & D[0][1] */
|
||||
{1, 3, 4}, /* rec_pr: bad Q & D[0][1] */
|
||||
{0, 3, 4}, /* rec_qr: bad P & D[0][1] */
|
||||
{3, 4, 5} /* rec_pqr: bad & D[0][1][2] */
|
||||
};
|
||||
|
||||
vdev_raidz_reconstruct(rm, rec_tgt[fn], 3);
|
||||
}
|
||||
|
||||
/*
|
||||
* Benchmarking of all supported implementations (raidz_supp_impl_cnt)
|
||||
* is performed by setting the rm_ops pointer and calling the top level
|
||||
* generate/reconstruct methods of bench_rm.
|
||||
*/
|
||||
static void
|
||||
benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn)
|
||||
{
|
||||
uint64_t run_cnt, speed, best_speed = 0;
|
||||
hrtime_t t_start, t_diff;
|
||||
raidz_impl_ops_t *curr_impl;
|
||||
int impl, i;
|
||||
|
||||
/*
|
||||
* Use the sentinel (NULL) from the end of raidz_supp_impl_cnt
|
||||
* to run "original" implementation (bench_rm->rm_ops = NULL)
|
||||
*/
|
||||
for (impl = 0; impl <= raidz_supp_impl_cnt; impl++) {
|
||||
/* set an implementation to benchmark */
|
||||
curr_impl = raidz_supp_impl[impl];
|
||||
bench_rm->rm_ops = curr_impl;
|
||||
|
||||
run_cnt = 0;
|
||||
t_start = gethrtime();
|
||||
|
||||
do {
|
||||
for (i = 0; i < 25; i++, run_cnt++)
|
||||
bench_fn(bench_rm, fn);
|
||||
|
||||
t_diff = gethrtime() - t_start;
|
||||
} while (t_diff < BENCH_NS);
|
||||
|
||||
speed = run_cnt * BENCH_ZIO_SIZE * NANOSEC;
|
||||
speed /= (t_diff * BENCH_COLS);
|
||||
|
||||
if (bench_fn == benchmark_gen_impl)
|
||||
raidz_impl_kstats[impl].gen[fn].value.ui64 = speed;
|
||||
else
|
||||
raidz_impl_kstats[impl].rec[fn].value.ui64 = speed;
|
||||
|
||||
/* if curr_impl==NULL the original impl is benchmarked */
|
||||
if (curr_impl != NULL && speed > best_speed) {
|
||||
best_speed = speed;
|
||||
|
||||
if (bench_fn == benchmark_gen_impl)
|
||||
vdev_raidz_fastest_impl.gen[fn] =
|
||||
curr_impl->gen[fn];
|
||||
else
|
||||
vdev_raidz_fastest_impl.rec[fn] =
|
||||
curr_impl->rec[fn];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vdev_raidz_math_init(void)
|
||||
{
|
||||
raidz_impl_ops_t *curr_impl;
|
||||
zio_t *bench_zio = NULL;
|
||||
raidz_map_t *bench_rm = NULL;
|
||||
uint64_t bench_parity;
|
||||
int i, c, fn;
|
||||
|
||||
/* init & vdev_raidz_impl_lock */
|
||||
rw_init(&vdev_raidz_impl_lock, NULL, RW_DEFAULT, NULL);
|
||||
|
||||
/* move supported impl into raidz_supp_impl */
|
||||
for (i = 0, c = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
|
||||
curr_impl = (raidz_impl_ops_t *) raidz_all_maths[i];
|
||||
|
||||
/* initialize impl */
|
||||
if (curr_impl->init)
|
||||
curr_impl->init();
|
||||
|
||||
if (curr_impl->is_supported()) {
|
||||
/* init kstat */
|
||||
init_raidz_kstat(&raidz_impl_kstats[c],
|
||||
curr_impl->name);
|
||||
raidz_supp_impl[c++] = (raidz_impl_ops_t *) curr_impl;
|
||||
}
|
||||
}
|
||||
raidz_supp_impl_cnt = c; /* number of supported impl */
|
||||
raidz_supp_impl[c] = NULL; /* sentinel */
|
||||
|
||||
/* init kstat for original routines */
|
||||
init_raidz_kstat(&(raidz_impl_kstats[raidz_supp_impl_cnt]), "original");
|
||||
|
||||
#if !defined(_KERNEL)
|
||||
/*
|
||||
* Skip benchmarking and use last implementation as fastest
|
||||
*/
|
||||
memcpy(&vdev_raidz_fastest_impl, raidz_supp_impl[raidz_supp_impl_cnt-1],
|
||||
sizeof (vdev_raidz_fastest_impl));
|
||||
|
||||
vdev_raidz_fastest_impl.name = "fastest";
|
||||
|
||||
raidz_math_initialized = B_TRUE;
|
||||
|
||||
/* Use 'cycle' math selection method for userspace */
|
||||
VERIFY0(vdev_raidz_impl_set("cycle"));
|
||||
return;
|
||||
#endif
|
||||
|
||||
/* Fake an zio and run the benchmark on it */
|
||||
bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP);
|
||||
bench_zio->io_offset = 0;
|
||||
bench_zio->io_size = BENCH_ZIO_SIZE; /* only data columns */
|
||||
bench_zio->io_data = zio_data_buf_alloc(BENCH_ZIO_SIZE);
|
||||
VERIFY(bench_zio->io_data);
|
||||
|
||||
/* Benchmark parity generation methods */
|
||||
for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
|
||||
bench_parity = fn + 1;
|
||||
/* New raidz_map is needed for each generate_p/q/r */
|
||||
bench_rm = vdev_raidz_map_alloc(bench_zio, 9,
|
||||
BENCH_D_COLS + bench_parity, bench_parity);
|
||||
|
||||
benchmark_raidz_impl(bench_rm, fn, benchmark_gen_impl);
|
||||
|
||||
vdev_raidz_map_free(bench_rm);
|
||||
}
|
||||
|
||||
/* Benchmark data reconstruction methods */
|
||||
bench_rm = vdev_raidz_map_alloc(bench_zio, 9, BENCH_COLS, PARITY_PQR);
|
||||
|
||||
for (fn = 0; fn < RAIDZ_REC_NUM; fn++)
|
||||
benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl);
|
||||
|
||||
vdev_raidz_map_free(bench_rm);
|
||||
|
||||
/* cleanup the bench zio */
|
||||
zio_data_buf_free(bench_zio->io_data, BENCH_ZIO_SIZE);
|
||||
kmem_free(bench_zio, sizeof (zio_t));
|
||||
|
||||
/* install kstats for all impl */
|
||||
raidz_math_kstat = kstat_create("zfs", 0, "vdev_raidz_bench",
|
||||
"misc", KSTAT_TYPE_NAMED,
|
||||
sizeof (raidz_impl_kstat_t) / sizeof (kstat_named_t) *
|
||||
(raidz_supp_impl_cnt + 1), KSTAT_FLAG_VIRTUAL);
|
||||
|
||||
if (raidz_math_kstat != NULL) {
|
||||
raidz_math_kstat->ks_data = raidz_impl_kstats;
|
||||
kstat_install(raidz_math_kstat);
|
||||
}
|
||||
|
||||
/* Finish initialization */
|
||||
raidz_math_initialized = B_TRUE;
|
||||
if (!vdev_raidz_impl_user_set)
|
||||
VERIFY0(vdev_raidz_impl_set("fastest"));
|
||||
}
|
||||
|
||||
void
|
||||
vdev_raidz_math_fini(void)
|
||||
{
|
||||
raidz_impl_ops_t const *curr_impl;
|
||||
int i;
|
||||
|
||||
if (raidz_math_kstat != NULL) {
|
||||
kstat_delete(raidz_math_kstat);
|
||||
raidz_math_kstat = NULL;
|
||||
}
|
||||
|
||||
rw_destroy(&vdev_raidz_impl_lock);
|
||||
|
||||
/* fini impl */
|
||||
for (i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
|
||||
curr_impl = raidz_all_maths[i];
|
||||
|
||||
if (curr_impl->fini)
|
||||
curr_impl->fini();
|
||||
}
|
||||
}
|
||||
|
||||
static const
|
||||
struct {
|
||||
char *name;
|
||||
raidz_impl_ops_t *impl;
|
||||
enum vdev_raidz_impl_sel sel;
|
||||
} math_impl_opts[] = {
|
||||
{ "fastest", &vdev_raidz_fastest_impl, IMPL_FASTEST },
|
||||
{ "original", NULL, IMPL_ORIGINAL },
|
||||
#if !defined(_KERNEL)
|
||||
{ "cycle", NULL, IMPL_CYCLE },
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* Function sets desired raidz implementation.
|
||||
* If called after module_init(), vdev_raidz_impl_lock must be held for writing.
|
||||
*
|
||||
* @val Name of raidz implementation to use
|
||||
* @param Unused.
|
||||
*/
|
||||
static int
|
||||
zfs_vdev_raidz_impl_set(const char *val, struct kernel_param *kp)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
/* Check mandatory options */
|
||||
for (i = 0; i < ARRAY_SIZE(math_impl_opts); i++) {
|
||||
if (strcmp(val, math_impl_opts[i].name) == 0) {
|
||||
zfs_vdev_raidz_impl = math_impl_opts[i].sel;
|
||||
vdev_raidz_used_impl = math_impl_opts[i].impl;
|
||||
vdev_raidz_impl_user_set = B_TRUE;
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
/* check all supported implementations */
|
||||
for (i = 0; i < raidz_supp_impl_cnt; i++) {
|
||||
if (strcmp(val, raidz_supp_impl[i]->name) == 0) {
|
||||
zfs_vdev_raidz_impl = i;
|
||||
vdev_raidz_used_impl = raidz_supp_impl[i];
|
||||
vdev_raidz_impl_user_set = B_TRUE;
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
return (-EINVAL);
|
||||
}
|
||||
|
||||
int
|
||||
vdev_raidz_impl_set(const char *val)
|
||||
{
|
||||
int err;
|
||||
|
||||
ASSERT(raidz_math_initialized);
|
||||
|
||||
rw_enter(&vdev_raidz_impl_lock, RW_WRITER);
|
||||
err = zfs_vdev_raidz_impl_set(val, NULL);
|
||||
rw_exit(&vdev_raidz_impl_lock);
|
||||
return (err);
|
||||
}
|
||||
|
||||
#if defined(_KERNEL) && defined(HAVE_SPL)
|
||||
static int
|
||||
zfs_vdev_raidz_impl_get(char *buffer, struct kernel_param *kp)
|
||||
{
|
||||
int i, cnt = 0;
|
||||
char *fmt;
|
||||
|
||||
ASSERT(raidz_math_initialized);
|
||||
|
||||
rw_enter(&vdev_raidz_impl_lock, RW_READER);
|
||||
|
||||
/* list mandatory options */
|
||||
for (i = 0; i < ARRAY_SIZE(math_impl_opts); i++) {
|
||||
if (math_impl_opts[i].sel == zfs_vdev_raidz_impl)
|
||||
fmt = "[%s] ";
|
||||
else
|
||||
fmt = "%s ";
|
||||
|
||||
cnt += sprintf(buffer + cnt, fmt, math_impl_opts[i].name);
|
||||
}
|
||||
|
||||
/* list all supported implementations */
|
||||
for (i = 0; i < raidz_supp_impl_cnt; i++) {
|
||||
fmt = (i == zfs_vdev_raidz_impl) ? "[%s] " : "%s ";
|
||||
cnt += sprintf(buffer + cnt, fmt, raidz_supp_impl[i]->name);
|
||||
}
|
||||
|
||||
rw_exit(&vdev_raidz_impl_lock);
|
||||
|
||||
return (cnt);
|
||||
}
|
||||
|
||||
module_param_call(zfs_vdev_raidz_impl, zfs_vdev_raidz_impl_set,
|
||||
zfs_vdev_raidz_impl_get, NULL, 0644);
|
||||
MODULE_PARM_DESC(zfs_vdev_raidz_impl, "Select raidz implementation.");
|
||||
#endif
|
|
@ -0,0 +1,396 @@
|
|||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (C) 2016 Gvozden Nešković. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/isa_defs.h>
|
||||
|
||||
#if defined(__x86_64) && defined(HAVE_AVX2)
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <linux/simd_x86.h>
|
||||
|
||||
#define __asm __asm__ __volatile__
|
||||
|
||||
#define _REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
|
||||
#define REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
|
||||
|
||||
#define VR0_(REG, ...) "ymm"#REG
|
||||
#define VR1_(_1, REG, ...) "ymm"#REG
|
||||
#define VR2_(_1, _2, REG, ...) "ymm"#REG
|
||||
#define VR3_(_1, _2, _3, REG, ...) "ymm"#REG
|
||||
#define VR4_(_1, _2, _3, _4, REG, ...) "ymm"#REG
|
||||
#define VR5_(_1, _2, _3, _4, _5, REG, ...) "ymm"#REG
|
||||
#define VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "ymm"#REG
|
||||
#define VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "ymm"#REG
|
||||
|
||||
#define VR0(r...) VR0_(r)
|
||||
#define VR1(r...) VR1_(r)
|
||||
#define VR2(r...) VR2_(r, 1)
|
||||
#define VR3(r...) VR3_(r, 1, 2)
|
||||
#define VR4(r...) VR4_(r, 1)
|
||||
#define VR5(r...) VR5_(r, 1, 2)
|
||||
#define VR6(r...) VR6_(r, 1, 2, 3)
|
||||
#define VR7(r...) VR7_(r, 1, 2, 3, 4)
|
||||
|
||||
#define R_01(REG1, REG2, ...) REG1, REG2
|
||||
#define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3
|
||||
#define R_23(REG...) _R_23(REG, 1, 2, 3)
|
||||
|
||||
#define ASM_BUG() ASSERT(0)
|
||||
|
||||
extern const uint8_t gf_clmul_mod_lt[4*256][16];
|
||||
|
||||
#define ELEM_SIZE 32
|
||||
|
||||
typedef struct v {
|
||||
uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
|
||||
} v_t;
|
||||
|
||||
#define PREFETCHNTA(ptr, offset) \
|
||||
{ \
|
||||
__asm( \
|
||||
"prefetchnta " #offset "(%[MEM])\n" \
|
||||
: : [MEM] "r" (ptr)); \
|
||||
}
|
||||
|
||||
#define PREFETCH(ptr, offset) \
|
||||
{ \
|
||||
__asm( \
|
||||
"prefetcht0 " #offset "(%[MEM])\n" \
|
||||
: : [MEM] "r" (ptr)); \
|
||||
}
|
||||
|
||||
#define XOR_ACC(src, r...) \
|
||||
{ \
|
||||
switch (REG_CNT(r)) { \
|
||||
case 4: \
|
||||
__asm( \
|
||||
"vpxor 0x00(%[SRC]), %%" VR0(r)", %%" VR0(r) "\n" \
|
||||
"vpxor 0x20(%[SRC]), %%" VR1(r)", %%" VR1(r) "\n" \
|
||||
"vpxor 0x40(%[SRC]), %%" VR2(r)", %%" VR2(r) "\n" \
|
||||
"vpxor 0x60(%[SRC]), %%" VR3(r)", %%" VR3(r) "\n" \
|
||||
: : [SRC] "r" (src)); \
|
||||
break; \
|
||||
case 2: \
|
||||
__asm( \
|
||||
"vpxor 0x00(%[SRC]), %%" VR0(r)", %%" VR0(r) "\n" \
|
||||
"vpxor 0x20(%[SRC]), %%" VR1(r)", %%" VR1(r) "\n" \
|
||||
: : [SRC] "r" (src)); \
|
||||
break; \
|
||||
default: \
|
||||
ASM_BUG(); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define XOR(r...) \
|
||||
{ \
|
||||
switch (REG_CNT(r)) { \
|
||||
case 8: \
|
||||
__asm( \
|
||||
"vpxor %" VR0(r) ", %" VR4(r)", %" VR4(r) "\n" \
|
||||
"vpxor %" VR1(r) ", %" VR5(r)", %" VR5(r) "\n" \
|
||||
"vpxor %" VR2(r) ", %" VR6(r)", %" VR6(r) "\n" \
|
||||
"vpxor %" VR3(r) ", %" VR7(r)", %" VR7(r)); \
|
||||
break; \
|
||||
case 4: \
|
||||
__asm( \
|
||||
"vpxor %" VR0(r) ", %" VR2(r)", %" VR2(r) "\n" \
|
||||
"vpxor %" VR1(r) ", %" VR3(r)", %" VR3(r)); \
|
||||
break; \
|
||||
default: \
|
||||
ASM_BUG(); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define COPY(r...) \
|
||||
{ \
|
||||
switch (REG_CNT(r)) { \
|
||||
case 8: \
|
||||
__asm( \
|
||||
"vmovdqa %" VR0(r) ", %" VR4(r) "\n" \
|
||||
"vmovdqa %" VR1(r) ", %" VR5(r) "\n" \
|
||||
"vmovdqa %" VR2(r) ", %" VR6(r) "\n" \
|
||||
"vmovdqa %" VR3(r) ", %" VR7(r)); \
|
||||
break; \
|
||||
case 4: \
|
||||
__asm( \
|
||||
"vmovdqa %" VR0(r) ", %" VR2(r) "\n" \
|
||||
"vmovdqa %" VR1(r) ", %" VR3(r)); \
|
||||
break; \
|
||||
default: \
|
||||
ASM_BUG(); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define LOAD(src, r...) \
|
||||
{ \
|
||||
switch (REG_CNT(r)) { \
|
||||
case 4: \
|
||||
__asm( \
|
||||
"vmovdqa 0x00(%[SRC]), %%" VR0(r) "\n" \
|
||||
"vmovdqa 0x20(%[SRC]), %%" VR1(r) "\n" \
|
||||
"vmovdqa 0x40(%[SRC]), %%" VR2(r) "\n" \
|
||||
"vmovdqa 0x60(%[SRC]), %%" VR3(r) "\n" \
|
||||
: : [SRC] "r" (src)); \
|
||||
break; \
|
||||
case 2: \
|
||||
__asm( \
|
||||
"vmovdqa 0x00(%[SRC]), %%" VR0(r) "\n" \
|
||||
"vmovdqa 0x20(%[SRC]), %%" VR1(r) "\n" \
|
||||
: : [SRC] "r" (src)); \
|
||||
break; \
|
||||
default: \
|
||||
ASM_BUG(); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define STORE(dst, r...) \
|
||||
{ \
|
||||
switch (REG_CNT(r)) { \
|
||||
case 4: \
|
||||
__asm( \
|
||||
"vmovdqa %%" VR0(r) ", 0x00(%[DST])\n" \
|
||||
"vmovdqa %%" VR1(r) ", 0x20(%[DST])\n" \
|
||||
"vmovdqa %%" VR2(r) ", 0x40(%[DST])\n" \
|
||||
"vmovdqa %%" VR3(r) ", 0x60(%[DST])\n" \
|
||||
: : [DST] "r" (dst)); \
|
||||
break; \
|
||||
case 2: \
|
||||
__asm( \
|
||||
"vmovdqa %%" VR0(r) ", 0x00(%[DST])\n" \
|
||||
"vmovdqa %%" VR1(r) ", 0x20(%[DST])\n" \
|
||||
: : [DST] "r" (dst)); \
|
||||
break; \
|
||||
default: \
|
||||
ASM_BUG(); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define FLUSH() \
|
||||
{ \
|
||||
__asm("vzeroupper"); \
|
||||
}
|
||||
|
||||
#define MUL2_SETUP() \
|
||||
{ \
|
||||
__asm("vmovq %0, %%xmm14" :: "r"(0x1d1d1d1d1d1d1d1d)); \
|
||||
__asm("vpbroadcastq %xmm14, %ymm14"); \
|
||||
__asm("vpxor %ymm15, %ymm15 ,%ymm15"); \
|
||||
}
|
||||
|
||||
#define _MUL2(r...) \
|
||||
{ \
|
||||
switch (REG_CNT(r)) { \
|
||||
case 2: \
|
||||
__asm( \
|
||||
"vpcmpgtb %" VR0(r)", %ymm15, %ymm12\n" \
|
||||
"vpcmpgtb %" VR1(r)", %ymm15, %ymm13\n" \
|
||||
"vpaddb %" VR0(r)", %" VR0(r)", %" VR0(r) "\n" \
|
||||
"vpaddb %" VR1(r)", %" VR1(r)", %" VR1(r) "\n" \
|
||||
"vpand %ymm14, %ymm12, %ymm12\n" \
|
||||
"vpand %ymm14, %ymm13, %ymm13\n" \
|
||||
"vpxor %ymm12, %" VR0(r)", %" VR0(r) "\n" \
|
||||
"vpxor %ymm13, %" VR1(r)", %" VR1(r)); \
|
||||
break; \
|
||||
default: \
|
||||
ASM_BUG(); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define MUL2(r...) \
|
||||
{ \
|
||||
switch (REG_CNT(r)) { \
|
||||
case 4: \
|
||||
_MUL2(R_01(r)); \
|
||||
_MUL2(R_23(r)); \
|
||||
break; \
|
||||
case 2: \
|
||||
_MUL2(r); \
|
||||
break; \
|
||||
default: \
|
||||
ASM_BUG(); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define MUL4(r...) \
|
||||
{ \
|
||||
MUL2(r); \
|
||||
MUL2(r); \
|
||||
}
|
||||
|
||||
#define _0f "ymm15"
|
||||
#define _as "ymm14"
|
||||
#define _bs "ymm13"
|
||||
#define _ltmod "ymm12"
|
||||
#define _ltmul "ymm11"
|
||||
#define _ta "ymm10"
|
||||
#define _tb "ymm15"
|
||||
|
||||
static const uint8_t __attribute__((aligned(32))) _mul_mask = 0x0F;
|
||||
|
||||
#define _MULx2(c, r...) \
|
||||
{ \
|
||||
switch (REG_CNT(r)) { \
|
||||
case 2: \
|
||||
__asm( \
|
||||
"vpbroadcastb (%[mask]), %%" _0f "\n" \
|
||||
/* upper bits */ \
|
||||
"vbroadcasti128 0x00(%[lt]), %%" _ltmod "\n" \
|
||||
"vbroadcasti128 0x10(%[lt]), %%" _ltmul "\n" \
|
||||
\
|
||||
"vpsraw $0x4, %%" VR0(r) ", %%"_as "\n" \
|
||||
"vpsraw $0x4, %%" VR1(r) ", %%"_bs "\n" \
|
||||
"vpand %%" _0f ", %%" VR0(r) ", %%" VR0(r) "\n" \
|
||||
"vpand %%" _0f ", %%" VR1(r) ", %%" VR1(r) "\n" \
|
||||
"vpand %%" _0f ", %%" _as ", %%" _as "\n" \
|
||||
"vpand %%" _0f ", %%" _bs ", %%" _bs "\n" \
|
||||
\
|
||||
"vpshufb %%" _as ", %%" _ltmod ", %%" _ta "\n" \
|
||||
"vpshufb %%" _bs ", %%" _ltmod ", %%" _tb "\n" \
|
||||
"vpshufb %%" _as ", %%" _ltmul ", %%" _as "\n" \
|
||||
"vpshufb %%" _bs ", %%" _ltmul ", %%" _bs "\n" \
|
||||
/* lower bits */ \
|
||||
"vbroadcasti128 0x20(%[lt]), %%" _ltmod "\n" \
|
||||
"vbroadcasti128 0x30(%[lt]), %%" _ltmul "\n" \
|
||||
\
|
||||
"vpxor %%" _ta ", %%" _as ", %%" _as "\n" \
|
||||
"vpxor %%" _tb ", %%" _bs ", %%" _bs "\n" \
|
||||
\
|
||||
"vpshufb %%" VR0(r) ", %%" _ltmod ", %%" _ta "\n" \
|
||||
"vpshufb %%" VR1(r) ", %%" _ltmod ", %%" _tb "\n" \
|
||||
"vpshufb %%" VR0(r) ", %%" _ltmul ", %%" VR0(r) "\n"\
|
||||
"vpshufb %%" VR1(r) ", %%" _ltmul ", %%" VR1(r) "\n"\
|
||||
\
|
||||
"vpxor %%" _ta ", %%" VR0(r) ", %%" VR0(r) "\n" \
|
||||
"vpxor %%" _as ", %%" VR0(r) ", %%" VR0(r) "\n" \
|
||||
"vpxor %%" _tb ", %%" VR1(r) ", %%" VR1(r) "\n" \
|
||||
"vpxor %%" _bs ", %%" VR1(r) ", %%" VR1(r) "\n" \
|
||||
: : [mask] "r" (&_mul_mask), \
|
||||
[lt] "r" (gf_clmul_mod_lt[4*(c)])); \
|
||||
break; \
|
||||
default: \
|
||||
ASM_BUG(); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define MUL(c, r...) \
|
||||
{ \
|
||||
switch (REG_CNT(r)) { \
|
||||
case 4: \
|
||||
_MULx2(c, R_01(r)); \
|
||||
_MULx2(c, R_23(r)); \
|
||||
break; \
|
||||
case 2: \
|
||||
_MULx2(c, R_01(r)); \
|
||||
break; \
|
||||
default: \
|
||||
ASM_BUG(); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define raidz_math_begin() kfpu_begin()
|
||||
#define raidz_math_end() \
|
||||
{ \
|
||||
FLUSH(); \
|
||||
kfpu_end(); \
|
||||
}
|
||||
|
||||
#define GEN_P_DEFINE() {}
|
||||
#define GEN_P_STRIDE 4
|
||||
#define GEN_P_P 0, 1, 2, 3
|
||||
|
||||
#define GEN_PQ_DEFINE() {}
|
||||
#define GEN_PQ_STRIDE 4
|
||||
#define GEN_PQ_D 0, 1, 2, 3
|
||||
#define GEN_PQ_P 4, 5, 6, 7
|
||||
#define GEN_PQ_Q 8, 9, 10, 11
|
||||
|
||||
#define GEN_PQR_DEFINE() {}
|
||||
#define GEN_PQR_STRIDE 2
|
||||
#define GEN_PQR_D 0, 1
|
||||
#define GEN_PQR_P 2, 3
|
||||
#define GEN_PQR_Q 4, 5
|
||||
#define GEN_PQR_R 6, 7
|
||||
|
||||
#define REC_P_DEFINE() {}
|
||||
#define REC_P_STRIDE 4
|
||||
#define REC_P_X 0, 1, 2, 3
|
||||
|
||||
#define REC_Q_DEFINE() {}
|
||||
#define REC_Q_STRIDE 4
|
||||
#define REC_Q_X 0, 1, 2, 3
|
||||
|
||||
#define REC_R_DEFINE() {}
|
||||
#define REC_R_STRIDE 4
|
||||
#define REC_R_X 0, 1, 2, 3
|
||||
|
||||
#define REC_PQ_DEFINE() {}
|
||||
#define REC_PQ_STRIDE 2
|
||||
#define REC_PQ_X 0, 1
|
||||
#define REC_PQ_Y 2, 3
|
||||
#define REC_PQ_D 4, 5
|
||||
|
||||
#define REC_PR_DEFINE() {}
|
||||
#define REC_PR_STRIDE 2
|
||||
#define REC_PR_X 0, 1
|
||||
#define REC_PR_Y 2, 3
|
||||
#define REC_PR_D 4, 5
|
||||
|
||||
#define REC_QR_DEFINE() {}
|
||||
#define REC_QR_STRIDE 2
|
||||
#define REC_QR_X 0, 1
|
||||
#define REC_QR_Y 2, 3
|
||||
#define REC_QR_D 4, 5
|
||||
|
||||
#define REC_PQR_DEFINE() {}
|
||||
#define REC_PQR_STRIDE 2
|
||||
#define REC_PQR_X 0, 1
|
||||
#define REC_PQR_Y 2, 3
|
||||
#define REC_PQR_Z 4, 5
|
||||
#define REC_PQR_D 6, 7
|
||||
#define REC_PQR_XS 6, 7
|
||||
#define REC_PQR_YS 8, 9
|
||||
|
||||
|
||||
#include <sys/vdev_raidz_impl.h>
|
||||
#include "vdev_raidz_math_impl.h"
|
||||
|
||||
DEFINE_GEN_METHODS(avx2);
|
||||
DEFINE_REC_METHODS(avx2);
|
||||
|
||||
static boolean_t
|
||||
raidz_will_avx2_work(void)
|
||||
{
|
||||
return (zfs_avx_available() && zfs_avx2_available());
|
||||
}
|
||||
|
||||
const raidz_impl_ops_t vdev_raidz_avx2_impl = {
|
||||
.init = NULL,
|
||||
.fini = NULL,
|
||||
.gen = RAIDZ_GEN_METHODS(avx2),
|
||||
.rec = RAIDZ_REC_METHODS(avx2),
|
||||
.is_supported = &raidz_will_avx2_work,
|
||||
.name = "avx2"
|
||||
};
|
||||
|
||||
#endif /* defined(__x86_64) && defined(HAVE_AVX2) */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,302 @@
|
|||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2016 Gvozden Nešković. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/vdev_raidz_impl.h>
|
||||
|
||||
/*
|
||||
* Provide native CPU scalar routines.
|
||||
* Support 32bit and 64bit CPUs.
|
||||
*/
|
||||
#if ((~(0x0ULL)) >> 24) == 0xffULL
|
||||
#define ELEM_SIZE 4
|
||||
typedef uint32_t iv_t;
|
||||
#elif ((~(0x0ULL)) >> 56) == 0xffULL
|
||||
#define ELEM_SIZE 8
|
||||
typedef uint64_t iv_t;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Vector type used in scalar implementation
|
||||
*
|
||||
* The union is expected to be of native CPU register size. Since addition
|
||||
* uses XOR operation, it can be performed an all byte elements at once.
|
||||
* Multiplication requires per byte access.
|
||||
*/
|
||||
typedef union {
|
||||
iv_t e;
|
||||
uint8_t b[ELEM_SIZE];
|
||||
} v_t;
|
||||
|
||||
/*
|
||||
* Precomputed lookup tables for multiplication by a constant
|
||||
*
|
||||
* Reconstruction path requires multiplication by a constant factors. Instead of
|
||||
* performing two step lookup (log & exp tables), a direct lookup can be used
|
||||
* instead. Multiplication of element 'a' by a constant 'c' is obtained as:
|
||||
*
|
||||
* r = vdev_raidz_mul_lt[c_log][a];
|
||||
*
|
||||
* where c_log = vdev_raidz_log2[c]. Log of coefficient factors is used because
|
||||
* they are faster to obtain while solving the syndrome equations.
|
||||
*
|
||||
* PERFORMANCE NOTE:
|
||||
* Even though the complete lookup table uses 64kiB, only relatively small
|
||||
* portion of it is used at the same time. Following shows number of accessed
|
||||
* bytes for different cases:
|
||||
* - 1 failed disk: 256B (1 mul. coefficient)
|
||||
* - 2 failed disks: 512B (2 mul. coefficients)
|
||||
* - 3 failed disks: 1536B (6 mul. coefficients)
|
||||
*
|
||||
* Size of actually accessed lookup table regions is only larger for
|
||||
* reconstruction of 3 failed disks, when compared to traditional log/exp
|
||||
* method. But since the result is obtained in one lookup step performance is
|
||||
* doubled.
|
||||
*/
|
||||
static uint8_t vdev_raidz_mul_lt[256][256] __attribute__((aligned(256)));
|
||||
|
||||
static void
|
||||
raidz_init_scalar(void)
|
||||
{
|
||||
int c, i;
|
||||
for (c = 0; c < 256; c++)
|
||||
for (i = 0; i < 256; i++)
|
||||
vdev_raidz_mul_lt[c][i] = gf_mul(c, i);
|
||||
|
||||
}
|
||||
|
||||
#define PREFETCHNTA(ptr, offset) {}
|
||||
#define PREFETCH(ptr, offset) {}
|
||||
|
||||
#define XOR_ACC(src, acc) acc.e ^= ((v_t *)src)[0].e
|
||||
#define XOR(src, acc) acc.e ^= src.e
|
||||
#define COPY(src, dst) dst = src
|
||||
#define LOAD(src, val) val = ((v_t *)src)[0]
|
||||
#define STORE(dst, val) ((v_t *)dst)[0] = val
|
||||
|
||||
/*
|
||||
* Constants used for optimized multiplication by 2.
|
||||
*/
|
||||
static const struct {
|
||||
iv_t mod;
|
||||
iv_t mask;
|
||||
iv_t msb;
|
||||
} scalar_mul2_consts = {
|
||||
#if ELEM_SIZE == 8
|
||||
.mod = 0x1d1d1d1d1d1d1d1dULL,
|
||||
.mask = 0xfefefefefefefefeULL,
|
||||
.msb = 0x8080808080808080ULL,
|
||||
#else
|
||||
.mod = 0x1d1d1d1dULL,
|
||||
.mask = 0xfefefefeULL,
|
||||
.msb = 0x80808080ULL,
|
||||
#endif
|
||||
};
|
||||
|
||||
#define MUL2_SETUP() {}
|
||||
|
||||
#define MUL2(a) \
|
||||
{ \
|
||||
iv_t _mask; \
|
||||
\
|
||||
_mask = (a).e & scalar_mul2_consts.msb; \
|
||||
_mask = (_mask << 1) - (_mask >> 7); \
|
||||
(a).e = ((a).e << 1) & scalar_mul2_consts.mask; \
|
||||
(a).e = (a).e ^ (_mask & scalar_mul2_consts.mod); \
|
||||
}
|
||||
|
||||
#define MUL4(a) \
|
||||
{ \
|
||||
MUL2(a); \
|
||||
MUL2(a); \
|
||||
}
|
||||
|
||||
#define MUL(c, a) \
|
||||
{ \
|
||||
const uint8_t *mul_lt = vdev_raidz_mul_lt[c]; \
|
||||
switch (ELEM_SIZE) { \
|
||||
case 8: \
|
||||
a.b[7] = mul_lt[a.b[7]]; \
|
||||
a.b[6] = mul_lt[a.b[6]]; \
|
||||
a.b[5] = mul_lt[a.b[5]]; \
|
||||
a.b[4] = mul_lt[a.b[4]]; \
|
||||
case 4: \
|
||||
a.b[3] = mul_lt[a.b[3]]; \
|
||||
a.b[2] = mul_lt[a.b[2]]; \
|
||||
a.b[1] = mul_lt[a.b[1]]; \
|
||||
a.b[0] = mul_lt[a.b[0]]; \
|
||||
break; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define raidz_math_begin() {}
|
||||
#define raidz_math_end() {}
|
||||
|
||||
#define GEN_P_DEFINE() v_t p0
|
||||
#define GEN_P_STRIDE 1
|
||||
#define GEN_P_P p0
|
||||
|
||||
#define GEN_PQ_DEFINE() v_t d0, p0, q0
|
||||
#define GEN_PQ_STRIDE 1
|
||||
#define GEN_PQ_D d0
|
||||
#define GEN_PQ_P p0
|
||||
#define GEN_PQ_Q q0
|
||||
|
||||
#define GEN_PQR_DEFINE() v_t d0, p0, q0, r0
|
||||
#define GEN_PQR_STRIDE 1
|
||||
#define GEN_PQR_D d0
|
||||
#define GEN_PQR_P p0
|
||||
#define GEN_PQR_Q q0
|
||||
#define GEN_PQR_R r0
|
||||
|
||||
#define REC_P_DEFINE() v_t x0
|
||||
#define REC_P_STRIDE 1
|
||||
#define REC_P_X x0
|
||||
|
||||
#define REC_Q_DEFINE() v_t x0
|
||||
#define REC_Q_STRIDE 1
|
||||
#define REC_Q_X x0
|
||||
|
||||
#define REC_R_DEFINE() v_t x0
|
||||
#define REC_R_STRIDE 1
|
||||
#define REC_R_X x0
|
||||
|
||||
#define REC_PQ_DEFINE() v_t x0, y0, d0
|
||||
#define REC_PQ_STRIDE 1
|
||||
#define REC_PQ_X x0
|
||||
#define REC_PQ_Y y0
|
||||
#define REC_PQ_D d0
|
||||
|
||||
#define REC_PR_DEFINE() v_t x0, y0, d0
|
||||
#define REC_PR_STRIDE 1
|
||||
#define REC_PR_X x0
|
||||
#define REC_PR_Y y0
|
||||
#define REC_PR_D d0
|
||||
|
||||
#define REC_QR_DEFINE() v_t x0, y0, d0
|
||||
#define REC_QR_STRIDE 1
|
||||
#define REC_QR_X x0
|
||||
#define REC_QR_Y y0
|
||||
#define REC_QR_D d0
|
||||
|
||||
#define REC_PQR_DEFINE() v_t x0, y0, z0, d0, t0
|
||||
#define REC_PQR_STRIDE 1
|
||||
#define REC_PQR_X x0
|
||||
#define REC_PQR_Y y0
|
||||
#define REC_PQR_Z z0
|
||||
#define REC_PQR_D d0
|
||||
#define REC_PQR_XS d0
|
||||
#define REC_PQR_YS t0
|
||||
|
||||
#include "vdev_raidz_math_impl.h"
|
||||
|
||||
DEFINE_GEN_METHODS(scalar);
|
||||
DEFINE_REC_METHODS(scalar);
|
||||
|
||||
static boolean_t
|
||||
raidz_will_scalar_work(void)
|
||||
{
|
||||
return (B_TRUE); /* always */
|
||||
}
|
||||
|
||||
const raidz_impl_ops_t vdev_raidz_scalar_impl = {
|
||||
.init = raidz_init_scalar,
|
||||
.fini = NULL,
|
||||
.gen = RAIDZ_GEN_METHODS(scalar),
|
||||
.rec = RAIDZ_REC_METHODS(scalar),
|
||||
.is_supported = &raidz_will_scalar_work,
|
||||
.name = "scalar"
|
||||
};
|
||||
|
||||
/* Powers of 2 in the RAID-Z Galois field. */
|
||||
const uint8_t vdev_raidz_pow2[256] __attribute__((aligned(256))) = {
|
||||
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
|
||||
0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
|
||||
0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
|
||||
0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
|
||||
0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
|
||||
0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
|
||||
0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
|
||||
0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
|
||||
0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
|
||||
0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
|
||||
0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
|
||||
0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
|
||||
0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
|
||||
0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
|
||||
0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
|
||||
0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
|
||||
0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
|
||||
0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
|
||||
0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
|
||||
0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
|
||||
0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
|
||||
0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
|
||||
0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
|
||||
0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
|
||||
0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
|
||||
0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
|
||||
0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
|
||||
0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
|
||||
0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
|
||||
0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
|
||||
0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
|
||||
0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
|
||||
};
|
||||
|
||||
/* Logs of 2 in the RAID-Z Galois field. */
|
||||
const uint8_t vdev_raidz_log2[256] __attribute__((aligned(256))) = {
|
||||
0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
|
||||
0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
|
||||
0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
|
||||
0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
|
||||
0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
|
||||
0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
|
||||
0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
|
||||
0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
|
||||
0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
|
||||
0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
|
||||
0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
|
||||
0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
|
||||
0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
|
||||
0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
|
||||
0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
|
||||
0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
|
||||
0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
|
||||
0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
|
||||
0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,
|
||||
0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
|
||||
0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd,
|
||||
0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
|
||||
0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e,
|
||||
0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
|
||||
0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
|
||||
0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
|
||||
0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa,
|
||||
0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
|
||||
0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51,
|
||||
0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
|
||||
0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8,
|
||||
0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf,
|
||||
};
|
File diff suppressed because it is too large
Load Diff
|
@ -518,6 +518,9 @@ tests = ['poolversion_001_pos', 'poolversion_002_pos']
|
|||
[tests/functional/quota]
|
||||
tests = ['quota_001_pos', 'quota_003_pos', 'quota_006_neg']
|
||||
|
||||
[tests/functional/raidz]
|
||||
tests = ['raidz_001_neg', 'raidz_002_pos']
|
||||
|
||||
[tests/functional/redundancy]
|
||||
tests = ['redundancy_001_pos', 'redundancy_002_pos', 'redundancy_003_pos']
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@ export ZINJECT=${ZINJECT:-${sbindir}/zinject}
|
|||
export ZPOOL=${ZPOOL:-${sbindir}/zpool}
|
||||
export ZTEST=${ZTEST:-${sbindir}/ztest}
|
||||
export ZPIOS=${ZPIOS:-${sbindir}/zpios}
|
||||
export RAIDZ_TEST=${RAIDZ_TEST:-${bindir}/raidz_test}
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ SUBDIRS = \
|
|||
poolversion \
|
||||
privilege \
|
||||
quota \
|
||||
raidz \
|
||||
redundancy \
|
||||
refquota \
|
||||
refreserv \
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/raidz
|
||||
dist_pkgdata_SCRIPTS = \
|
||||
setup.ksh \
|
||||
cleanup.ksh \
|
||||
raidz_001_neg.ksh \
|
||||
raidz_002_pos.ksh
|
|
@ -0,0 +1,30 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2016 by Gvozden Neskovic. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
# default_cleanup
|
|
@ -0,0 +1,38 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2016 by Gvozden Neskovic. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Call the raidz_test tool with -T options to test the infrastructure.
|
||||
# This option should make raidz_test to return non 0.
|
||||
#
|
||||
|
||||
log_mustnot $RAIDZ_TEST -T
|
||||
|
||||
log_pass "raidz_test detects errors as espected."
|
|
@ -0,0 +1,41 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2016 by Gvozden Neskovic. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Call the raidz_test tool with -S to test all supported raidz
|
||||
# implementations. This options will test several raidz block geometries
|
||||
# and several zio parameters that affect raidz block layout. Data
|
||||
# reconstruction performs all combinations of failed disks. Wall time
|
||||
# is set to 5min, but actual runtime might be longer.
|
||||
#
|
||||
|
||||
log_must $RAIDZ_TEST -S -t 300
|
||||
|
||||
log_pass "raidz_test parameter sweep test succeeded."
|
|
@ -0,0 +1,32 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2016 by Gvozden Neskovic. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
log_pass
|
|
@ -28,6 +28,7 @@ export ZINJECT=${CMDDIR}/zinject/zinject
|
|||
export ZPOOL=${CMDDIR}/zpool/zpool
|
||||
export ZTEST=${CMDDIR}/ztest/ztest
|
||||
export ZPIOS=${CMDDIR}/zpios/zpios
|
||||
export RAIDZ_TEST=${CMDDIR}/raidz_test/raidz_test}
|
||||
|
||||
export COMMON_SH=${SCRIPTDIR}/common.sh
|
||||
export ZFS_SH=${SCRIPTDIR}/zfs.sh
|
||||
|
|
Loading…
Reference in New Issue