ABD Vectorized raidz

Enable vectorized raidz code on ABD buffers.  The avx512f,
avx512bw, neon and aarch64_neonx2 are disabled in this commit.
With the exception of avx512bw these implementations are
updated for ABD in the subsequent commits.

Signed-off-by: Gvozden Neskovic <neskovic@gmail.com>
This commit is contained in:
Gvozden Neskovic 2016-08-24 15:51:33 +02:00 committed by Brian Behlendorf
parent a206522c4f
commit cbf484f8ad
13 changed files with 1438 additions and 1051 deletions

View File

@ -23,8 +23,6 @@
* Copyright (C) 2016 Gvozden Nešković. All rights reserved.
*/
#ifdef _ABD_READY_
#include <sys/zfs_context.h>
#include <sys/time.h>
#include <sys/wait.h>
@ -55,18 +53,18 @@ bench_init_raidz_map(void)
/*
* To permit larger column sizes these have to be done
* allocated using aligned alloc instead of zio_data_buf_alloc
* allocated using aligned alloc instead of zio_abd_buf_alloc
*/
zio_bench.io_data = raidz_alloc(max_data_size);
zio_bench.io_abd = raidz_alloc(max_data_size);
init_zio_data(&zio_bench);
init_zio_abd(&zio_bench);
}
static void
bench_fini_raidz_maps(void)
{
/* tear down golden zio */
raidz_free(zio_bench.io_data, max_data_size);
raidz_free(zio_bench.io_abd, max_data_size);
bzero(&zio_bench, sizeof (zio_t));
}
@ -227,4 +225,3 @@ run_raidz_benchmark(void)
bench_fini_raidz_maps();
}
#endif

View File

@ -32,16 +32,6 @@
#include <sys/vdev_raidz_impl.h>
#include <assert.h>
#include <stdio.h>
#ifndef _ABD_READY_
int
main(int argc, char **argv)
{
exit(0);
}
#else
#include "raidz_test.h"
static int *rand_data;
@ -191,10 +181,10 @@ static void process_options(int argc, char **argv)
}
}
#define DATA_COL(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_data)
#define DATA_COL(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_abd)
#define DATA_COL_SIZE(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_size)
#define CODE_COL(rm, i) ((rm)->rm_col[(i)].rc_data)
#define CODE_COL(rm, i) ((rm)->rm_col[(i)].rc_abd)
#define CODE_COL_SIZE(rm, i) ((rm)->rm_col[(i)].rc_size)
static int
@ -205,10 +195,9 @@ cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
VERIFY(parity >= 1 && parity <= 3);
for (i = 0; i < parity; i++) {
if (0 != memcmp(CODE_COL(rm, i), CODE_COL(opts->rm_golden, i),
CODE_COL_SIZE(rm, i))) {
if (abd_cmp(CODE_COL(rm, i), CODE_COL(opts->rm_golden, i))
!= 0) {
ret++;
LOG_OPT(D_DEBUG, opts,
"\nParity block [%d] different!\n", i);
}
@ -223,8 +212,8 @@ cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
int dcols = opts->rm_golden->rm_cols - raidz_parity(opts->rm_golden);
for (i = 0; i < dcols; i++) {
if (0 != memcmp(DATA_COL(opts->rm_golden, i), DATA_COL(rm, i),
DATA_COL_SIZE(opts->rm_golden, i))) {
if (abd_cmp(DATA_COL(opts->rm_golden, i), DATA_COL(rm, i))
!= 0) {
ret++;
LOG_OPT(D_DEBUG, opts,
@ -234,37 +223,55 @@ cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
return (ret);
}
static int
init_rand(void *data, size_t size, void *private)
{
int i;
int *dst = (int *) data;
for (i = 0; i < size / sizeof (int); i++)
dst[i] = rand_data[i];
return (0);
}
static int
corrupt_rand(void *data, size_t size, void *private)
{
int i;
int *dst = (int *) data;
for (i = 0; i < size / sizeof (int); i++)
dst[i] = rand();
return (0);
}
static void
corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt)
{
int i;
int *dst;
raidz_col_t *col;
for (i = 0; i < cnt; i++) {
col = &rm->rm_col[tgts[i]];
dst = col->rc_data;
for (i = 0; i < col->rc_size / sizeof (int); i++)
dst[i] = rand();
abd_iterate_func(col->rc_abd, 0, col->rc_size, corrupt_rand,
NULL);
}
}
void
init_zio_data(zio_t *zio)
init_zio_abd(zio_t *zio)
{
int i;
int *dst = (int *) zio->io_data;
for (i = 0; i < zio->io_size / sizeof (int); i++) {
dst[i] = rand_data[i];
}
abd_iterate_func(zio->io_abd, 0, zio->io_size, init_rand, NULL);
}
static void
fini_raidz_map(zio_t **zio, raidz_map_t **rm)
{
vdev_raidz_map_free(*rm);
raidz_free((*zio)->io_data, (*zio)->io_size);
raidz_free((*zio)->io_abd, (*zio)->io_size);
umem_free(*zio, sizeof (zio_t));
*zio = NULL;
@ -289,11 +296,11 @@ init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
opts->zio_golden->io_offset = zio_test->io_offset = opts->rto_offset;
opts->zio_golden->io_size = zio_test->io_size = opts->rto_dsize;
opts->zio_golden->io_data = raidz_alloc(opts->rto_dsize);
zio_test->io_data = raidz_alloc(opts->rto_dsize);
opts->zio_golden->io_abd = raidz_alloc(opts->rto_dsize);
zio_test->io_abd = raidz_alloc(opts->rto_dsize);
init_zio_data(opts->zio_golden);
init_zio_data(zio_test);
init_zio_abd(opts->zio_golden);
init_zio_abd(zio_test);
VERIFY0(vdev_raidz_impl_set("original"));
@ -336,8 +343,8 @@ init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
(*zio)->io_offset = 0;
(*zio)->io_size = alloc_dsize;
(*zio)->io_data = raidz_alloc(alloc_dsize);
init_zio_data(*zio);
(*zio)->io_abd = raidz_alloc(alloc_dsize);
init_zio_abd(*zio);
rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
total_ncols, parity);
@ -792,4 +799,3 @@ main(int argc, char **argv)
return (err);
}
#endif

View File

@ -104,11 +104,11 @@ static inline size_t ilog2(size_t a)
#define SEP "----------------\n"
#define raidz_alloc(size) zio_data_buf_alloc(size)
#define raidz_free(p, size) zio_data_buf_free(p, size)
#define raidz_alloc(size) abd_alloc(size, B_FALSE)
#define raidz_free(p, size) abd_free(p)
void init_zio_data(zio_t *zio);
void init_zio_abd(zio_t *zio);
void run_raidz_benchmark(void);

View File

@ -44,16 +44,6 @@ static raidz_impl_ops_t vdev_raidz_fastest_impl = {
.name = "fastest"
};
/* ABD BRINGUP -- not ready yet */
#if 1
#ifdef HAVE_SSSE3
#undef HAVE_SSSE3
#endif
#ifdef HAVE_AVX2
#undef HAVE_AVX2
#endif
#endif
/* All compiled in implementations */
const raidz_impl_ops_t *raidz_all_maths[] = {
&vdev_raidz_original_impl,
@ -68,14 +58,14 @@ const raidz_impl_ops_t *raidz_all_maths[] = {
&vdev_raidz_avx2_impl,
#endif
#if defined(__x86_64) && defined(HAVE_AVX512F) /* only x86_64 for now */
&vdev_raidz_avx512f_impl,
// &vdev_raidz_avx512f_impl,
#endif
#if defined(__x86_64) && defined(HAVE_AVX512BW) /* only x86_64 for now */
&vdev_raidz_avx512bw_impl,
// &vdev_raidz_avx512bw_impl,
#endif
#if defined(__aarch64__)
&vdev_raidz_aarch64_neon_impl,
&vdev_raidz_aarch64_neonx2_impl,
// &vdev_raidz_aarch64_neon_impl,
// &vdev_raidz_aarch64_neonx2_impl,
#endif
};
@ -159,8 +149,6 @@ vdev_raidz_math_generate(raidz_map_t *rm)
{
raidz_gen_f gen_parity = NULL;
/* ABD Bringup -- vector code not ready */
#if 0
switch (raidz_parity(rm)) {
case 1:
gen_parity = rm->rm_ops->gen[RAIDZ_GEN_P];
@ -177,7 +165,6 @@ vdev_raidz_math_generate(raidz_map_t *rm)
raidz_parity(rm));
break;
}
#endif
/* if method is NULL execute the original implementation */
if (gen_parity == NULL)
@ -188,8 +175,6 @@ vdev_raidz_math_generate(raidz_map_t *rm)
return (0);
}
/* ABD Bringup -- vector code not ready */
#if 0
static raidz_rec_f
reconstruct_fun_p_sel(raidz_map_t *rm, const int *parity_valid,
const int nbaddata)
@ -244,7 +229,6 @@ reconstruct_fun_pqr_sel(raidz_map_t *rm, const int *parity_valid,
}
return ((raidz_rec_f) NULL);
}
#endif
/*
* Select data reconstruction method for raidz_map
@ -256,31 +240,28 @@ int
vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid,
const int *dt, const int nbaddata)
{
raidz_rec_f rec_data = NULL;
raidz_rec_f rec_fn = NULL;
/* ABD Bringup -- vector code not ready */
#if 0
switch (raidz_parity(rm)) {
case PARITY_P:
rec_data = reconstruct_fun_p_sel(rm, parity_valid, nbaddata);
rec_fn = reconstruct_fun_p_sel(rm, parity_valid, nbaddata);
break;
case PARITY_PQ:
rec_data = reconstruct_fun_pq_sel(rm, parity_valid, nbaddata);
rec_fn = reconstruct_fun_pq_sel(rm, parity_valid, nbaddata);
break;
case PARITY_PQR:
rec_data = reconstruct_fun_pqr_sel(rm, parity_valid, nbaddata);
rec_fn = reconstruct_fun_pqr_sel(rm, parity_valid, nbaddata);
break;
default:
cmn_err(CE_PANIC, "invalid RAID-Z configuration %d",
raidz_parity(rm));
break;
}
#endif
if (rec_data == NULL)
if (rec_fn == NULL)
return (RAIDZ_ORIGINAL_IMPL);
else
return (rec_data(rm, dt));
return (rec_fn(rm, dt));
}
const char *raidz_gen_name[] = {

View File

@ -23,8 +23,9 @@
*/
#include <sys/isa_defs.h>
#include <sys/types.h>
#if defined(__aarch64__)
#if 0 // defined(__aarch64__)
#include "vdev_raidz_math_aarch64_neon_common.h"
@ -153,7 +154,7 @@ const raidz_impl_ops_t vdev_raidz_aarch64_neon_impl = {
#endif /* defined(__aarch64__) */
#if defined(__aarch64__)
#if 0 // defined(__aarch64__)
const uint8_t
__attribute__((aligned(256))) gf_clmul_mod_lt[4*256][16] = {

View File

@ -24,7 +24,7 @@
#include <sys/isa_defs.h>
#if defined(__aarch64__)
#if 0 // defined(__aarch64__)
#include "vdev_raidz_math_aarch64_neon_common.h"

View File

@ -334,59 +334,86 @@ static const uint8_t __attribute__((aligned(32))) _mul_mask = 0x0F;
kfpu_end(); \
}
#define GEN_P_DEFINE() {}
#define SYN_STRIDE 4
#define ZERO_STRIDE 4
#define ZERO_DEFINE() {}
#define ZERO_D 0, 1, 2, 3
#define COPY_STRIDE 4
#define COPY_DEFINE() {}
#define COPY_D 0, 1, 2, 3
#define ADD_STRIDE 4
#define ADD_DEFINE() {}
#define ADD_D 0, 1, 2, 3
#define MUL_STRIDE 4
#define MUL_DEFINE() {}
#define MUL_D 0, 1, 2, 3
#define GEN_P_STRIDE 4
#define GEN_P_DEFINE() {}
#define GEN_P_P 0, 1, 2, 3
#define GEN_PQ_DEFINE() {}
#define GEN_PQ_STRIDE 4
#define GEN_PQ_DEFINE() {}
#define GEN_PQ_D 0, 1, 2, 3
#define GEN_PQ_P 4, 5, 6, 7
#define GEN_PQ_Q 8, 9, 10, 11
#define GEN_PQ_C 4, 5, 6, 7
#define GEN_PQR_STRIDE 4
#define GEN_PQR_DEFINE() {}
#define GEN_PQR_STRIDE 2
#define GEN_PQR_D 0, 1
#define GEN_PQR_P 2, 3
#define GEN_PQR_Q 4, 5
#define GEN_PQR_R 6, 7
#define GEN_PQR_D 0, 1, 2, 3
#define GEN_PQR_C 4, 5, 6, 7
#define REC_P_DEFINE() {}
#define REC_P_STRIDE 4
#define REC_P_X 0, 1, 2, 3
#define SYN_Q_DEFINE() {}
#define SYN_Q_D 0, 1, 2, 3
#define SYN_Q_X 4, 5, 6, 7
#define REC_Q_DEFINE() {}
#define REC_Q_STRIDE 4
#define REC_Q_X 0, 1, 2, 3
#define SYN_R_DEFINE() {}
#define SYN_R_D 0, 1, 2, 3
#define SYN_R_X 4, 5, 6, 7
#define REC_R_DEFINE() {}
#define REC_R_STRIDE 4
#define REC_R_X 0, 1, 2, 3
#define SYN_PQ_DEFINE() {}
#define SYN_PQ_D 0, 1, 2, 3
#define SYN_PQ_X 4, 5, 6, 7
#define REC_PQ_DEFINE() {}
#define REC_PQ_STRIDE 2
#define REC_PQ_DEFINE() {}
#define REC_PQ_X 0, 1
#define REC_PQ_Y 2, 3
#define REC_PQ_D 4, 5
#define REC_PQ_T 4, 5
#define SYN_PR_DEFINE() {}
#define SYN_PR_D 0, 1, 2, 3
#define SYN_PR_X 4, 5, 6, 7
#define REC_PR_DEFINE() {}
#define REC_PR_STRIDE 2
#define REC_PR_DEFINE() {}
#define REC_PR_X 0, 1
#define REC_PR_Y 2, 3
#define REC_PR_D 4, 5
#define REC_PR_T 4, 5
#define SYN_QR_DEFINE() {}
#define SYN_QR_D 0, 1, 2, 3
#define SYN_QR_X 4, 5, 6, 7
#define REC_QR_DEFINE() {}
#define REC_QR_STRIDE 2
#define REC_QR_DEFINE() {}
#define REC_QR_X 0, 1
#define REC_QR_Y 2, 3
#define REC_QR_D 4, 5
#define REC_QR_T 4, 5
#define SYN_PQR_DEFINE() {}
#define SYN_PQR_D 0, 1, 2, 3
#define SYN_PQR_X 4, 5, 6, 7
#define REC_PQR_DEFINE() {}
#define REC_PQR_STRIDE 2
#define REC_PQR_DEFINE() {}
#define REC_PQR_X 0, 1
#define REC_PQR_Y 2, 3
#define REC_PQR_Z 4, 5
#define REC_PQR_D 6, 7
#define REC_PQR_XS 6, 7
#define REC_PQR_YS 8, 9
@ -400,12 +427,7 @@ DEFINE_REC_METHODS(avx2);
static boolean_t
raidz_will_avx2_work(void)
{
/* ABD Bringup -- vector code not ready */
#if 1
return (B_FALSE);
#else
return (zfs_avx_available() && zfs_avx2_available());
#endif
}
const raidz_impl_ops_t vdev_raidz_avx2_impl = {

View File

@ -24,7 +24,7 @@
#include <sys/isa_defs.h>
#if defined(__x86_64) && defined(HAVE_AVX512BW)
#if 0 // defined(__x86_64) && defined(HAVE_AVX512BW)
#include <sys/types.h>
#include <linux/simd_x86.h>
@ -345,6 +345,22 @@ static const uint8_t __attribute__((aligned(32))) _mul_mask = 0x0F;
kfpu_end(); \
}
#define ZERO_STRIDE 4
#define ZERO_DEFINE() {}
#define ZERO_D 0, 1, 2, 3
#define COPY_STRIDE 4
#define COPY_DEFINE() {}
#define COPY_D 0, 1, 2, 3
#define ADD_STRIDE 4
#define ADD_DEFINE() {}
#define ADD_D 0, 1, 2, 3
#define MUL_STRIDE 4
#define MUL_DEFINE() {}
#define MUL_D 0, 1, 2, 3
#define GEN_P_DEFINE() {}
#define GEN_P_STRIDE 4
#define GEN_P_P 0, 1, 2, 3

View File

@ -24,7 +24,7 @@
#include <sys/isa_defs.h>
#if defined(__x86_64) && defined(HAVE_AVX512F)
#if 0 // defined(__x86_64) && defined(HAVE_AVX512F)
#include <sys/types.h>
#include <linux/simd_x86.h>
@ -437,6 +437,21 @@ typedef struct v {
kfpu_end(); \
}
#define ZERO_STRIDE 4
#define ZERO_DEFINE() {}
#define ZERO_D 20, 21, 22, 23
#define COPY_STRIDE 4
#define COPY_DEFINE() {}
#define COPY_D 20, 21, 22, 23
#define ADD_STRIDE 4
#define ADD_DEFINE() {}
#define ADD_D 20, 21, 22, 23
#define MUL_STRIDE 4
#define MUL_DEFINE() {}
#define MUL_D 20, 21, 22, 23
/*
* This use zmm16-zmm31 registers to free up zmm0-zmm15
* to use with the AVX2 pshufb, see above

File diff suppressed because it is too large Load Diff

View File

@ -24,6 +24,7 @@
*/
#include <sys/vdev_raidz_impl.h>
/*
* Provide native CPU scalar routines.
* Support 32bit and 64bit CPUs.
@ -153,72 +154,97 @@ static const struct {
#define raidz_math_begin() {}
#define raidz_math_end() {}
#define GEN_P_DEFINE() v_t p0
#define GEN_P_STRIDE 1
#define GEN_P_P p0
#define SYN_STRIDE 1
#define GEN_PQ_DEFINE() v_t d0, p0, q0
#define GEN_PQ_STRIDE 1
#define GEN_PQ_D d0
#define GEN_PQ_P p0
#define GEN_PQ_Q q0
#define ZERO_DEFINE() v_t d0
#define ZERO_STRIDE 1
#define ZERO_D d0
#define GEN_PQR_DEFINE() v_t d0, p0, q0, r0
#define GEN_PQR_STRIDE 1
#define GEN_PQR_D d0
#define GEN_PQR_P p0
#define GEN_PQR_Q q0
#define GEN_PQR_R r0
#define COPY_DEFINE() v_t d0
#define COPY_STRIDE 1
#define COPY_D d0
#define REC_P_DEFINE() v_t x0
#define REC_P_STRIDE 1
#define REC_P_X x0
#define ADD_DEFINE() v_t d0
#define ADD_STRIDE 1
#define ADD_D d0
#define REC_Q_DEFINE() v_t x0
#define REC_Q_STRIDE 1
#define REC_Q_X x0
#define MUL_DEFINE() v_t d0
#define MUL_STRIDE 1
#define MUL_D d0
#define REC_R_DEFINE() v_t x0
#define REC_R_STRIDE 1
#define REC_R_X x0
#define GEN_P_STRIDE 1
#define GEN_P_DEFINE() v_t p0
#define GEN_P_P p0
#define REC_PQ_DEFINE() v_t x0, y0, d0
#define REC_PQ_STRIDE 1
#define REC_PQ_X x0
#define REC_PQ_Y y0
#define REC_PQ_D d0
#define GEN_PQ_STRIDE 1
#define GEN_PQ_DEFINE() v_t d0, c0
#define GEN_PQ_D d0
#define GEN_PQ_C c0
#define REC_PR_DEFINE() v_t x0, y0, d0
#define REC_PR_STRIDE 1
#define REC_PR_X x0
#define REC_PR_Y y0
#define REC_PR_D d0
#define GEN_PQR_STRIDE 1
#define GEN_PQR_DEFINE() v_t d0, c0
#define GEN_PQR_D d0
#define GEN_PQR_C c0
#define REC_QR_DEFINE() v_t x0, y0, d0
#define REC_QR_STRIDE 1
#define REC_QR_X x0
#define REC_QR_Y y0
#define REC_QR_D d0
#define SYN_Q_DEFINE() v_t d0, x0
#define SYN_Q_D d0
#define SYN_Q_X x0
#define REC_PQR_DEFINE() v_t x0, y0, z0, d0, t0
#define REC_PQR_STRIDE 1
#define REC_PQR_X x0
#define REC_PQR_Y y0
#define REC_PQR_Z z0
#define REC_PQR_D d0
#define REC_PQR_XS d0
#define REC_PQR_YS t0
#define SYN_R_DEFINE() v_t d0, x0
#define SYN_R_D d0
#define SYN_R_X x0
#define SYN_PQ_DEFINE() v_t d0, x0
#define SYN_PQ_D d0
#define SYN_PQ_X x0
#define REC_PQ_STRIDE 1
#define REC_PQ_DEFINE() v_t x0, y0, t0
#define REC_PQ_X x0
#define REC_PQ_Y y0
#define REC_PQ_T t0
#define SYN_PR_DEFINE() v_t d0, x0
#define SYN_PR_D d0
#define SYN_PR_X x0
#define REC_PR_STRIDE 1
#define REC_PR_DEFINE() v_t x0, y0, t0
#define REC_PR_X x0
#define REC_PR_Y y0
#define REC_PR_T t0
#define SYN_QR_DEFINE() v_t d0, x0
#define SYN_QR_D d0
#define SYN_QR_X x0
#define REC_QR_STRIDE 1
#define REC_QR_DEFINE() v_t x0, y0, t0
#define REC_QR_X x0
#define REC_QR_Y y0
#define REC_QR_T t0
#define SYN_PQR_DEFINE() v_t d0, x0
#define SYN_PQR_D d0
#define SYN_PQR_X x0
#define REC_PQR_STRIDE 1
#define REC_PQR_DEFINE() v_t x0, y0, z0, xs0, ys0
#define REC_PQR_X x0
#define REC_PQR_Y y0
#define REC_PQR_Z z0
#define REC_PQR_XS xs0
#define REC_PQR_YS ys0
#include "vdev_raidz_math_impl.h"
/*
* If compiled with -O0, gcc doesn't do any stack frame coalescing
* and -Wframe-larger-than=1024 is triggered in debug mode.
* Starting with gcc 4.8, new opt level -Og is introduced for debugging, which
* does not trigger this warning.
*/
#pragma GCC diagnostic ignored "-Wframe-larger-than="
DEFINE_GEN_METHODS(scalar);
DEFINE_REC_METHODS(scalar);

View File

@ -236,6 +236,10 @@ typedef struct v {
#define MUL2(r...) \
{ \
switch (REG_CNT(r)) { \
case 4: \
_MUL2_x2(VR0(r), VR1(r)); \
_MUL2_x2(VR2(r), VR3(r)); \
break; \
case 2: \
_MUL2_x2(VR0(r), VR1(r)); \
break; \
@ -271,8 +275,8 @@ typedef struct v {
if (x & 0x80) { MUL2(in); XOR(in, acc); } \
}
#define _mul_x1_in 9
#define _mul_x1_acc 11
#define _mul_x1_in 11
#define _mul_x1_acc 12
#define MUL_x1_DEFINE(x) \
static void \
@ -533,61 +537,87 @@ gf_x2_mul_fns[256] = {
#define raidz_math_begin() kfpu_begin()
#define raidz_math_end() kfpu_end()
#define GEN_P_DEFINE() {}
#define SYN_STRIDE 4
#define ZERO_STRIDE 4
#define ZERO_DEFINE() {}
#define ZERO_D 0, 1, 2, 3
#define COPY_STRIDE 4
#define COPY_DEFINE() {}
#define COPY_D 0, 1, 2, 3
#define ADD_STRIDE 4
#define ADD_DEFINE() {}
#define ADD_D 0, 1, 2, 3
#define MUL_STRIDE 2
#define MUL_DEFINE() {}
#define MUL_D 0, 1
#define GEN_P_STRIDE 4
#define GEN_P_DEFINE() {}
#define GEN_P_P 0, 1, 2, 3
#define GEN_PQ_STRIDE 4
#define GEN_PQ_DEFINE() {}
#define GEN_PQ_STRIDE 2
#define GEN_PQ_D 0, 1
#define GEN_PQ_P 2, 3
#define GEN_PQ_Q 4, 5
#define GEN_PQ_D 0, 1, 2, 3
#define GEN_PQ_C 4, 5, 6, 7
#define GEN_PQR_STRIDE 4
#define GEN_PQR_DEFINE() {}
#define GEN_PQR_STRIDE 2
#define GEN_PQR_D 0, 1
#define GEN_PQR_P 2, 3
#define GEN_PQR_Q 4, 5
#define GEN_PQR_R 6, 7
#define GEN_PQR_D 0, 1, 2, 3
#define GEN_PQR_C 4, 5, 6, 7
#define REC_P_DEFINE() {}
#define REC_P_STRIDE 4
#define REC_P_X 0, 1, 2, 3
#define SYN_Q_DEFINE() {}
#define SYN_Q_D 0, 1, 2, 3
#define SYN_Q_X 4, 5, 6, 7
#define REC_Q_DEFINE() {}
#define REC_Q_STRIDE 2
#define REC_Q_X 0, 1
#define SYN_R_DEFINE() {}
#define SYN_R_D 0, 1, 2, 3
#define SYN_R_X 4, 5, 6, 7
#define REC_R_DEFINE() {}
#define REC_R_STRIDE 2
#define REC_R_X 0, 1
#define SYN_PQ_DEFINE() {}
#define SYN_PQ_D 0, 1, 2, 3
#define SYN_PQ_X 4, 5, 6, 7
#define REC_PQ_DEFINE() {}
#define REC_PQ_STRIDE 2
#define REC_PQ_DEFINE() {}
#define REC_PQ_X 0, 1
#define REC_PQ_Y 2, 3
#define REC_PQ_D 4, 5
#define REC_PQ_T 4, 5
#define SYN_PR_DEFINE() {}
#define SYN_PR_D 0, 1, 2, 3
#define SYN_PR_X 4, 5, 6, 7
#define REC_PR_DEFINE() {}
#define REC_PR_STRIDE 2
#define REC_PR_DEFINE() {}
#define REC_PR_X 0, 1
#define REC_PR_Y 2, 3
#define REC_PR_D 4, 5
#define REC_PR_T 4, 5
#define SYN_QR_DEFINE() {}
#define SYN_QR_D 0, 1, 2, 3
#define SYN_QR_X 4, 5, 6, 7
#define REC_QR_DEFINE() {}
#define REC_QR_STRIDE 2
#define REC_QR_DEFINE() {}
#define REC_QR_X 0, 1
#define REC_QR_Y 2, 3
#define REC_QR_D 4, 5
#define REC_QR_T 4, 5
#define SYN_PQR_DEFINE() {}
#define SYN_PQR_D 0, 1, 2, 3
#define SYN_PQR_X 4, 5, 6, 7
#define REC_PQR_DEFINE() {}
#define REC_PQR_STRIDE 1
#define REC_PQR_DEFINE() {}
#define REC_PQR_X 0
#define REC_PQR_Y 1
#define REC_PQR_Z 2
#define REC_PQR_D 3
#define REC_PQR_XS 4
#define REC_PQR_YS 5
#define REC_PQR_XS 3
#define REC_PQR_YS 4
#include <sys/vdev_raidz_impl.h>

View File

@ -337,59 +337,86 @@ typedef struct v {
#define raidz_math_begin() kfpu_begin()
#define raidz_math_end() kfpu_end()
#define GEN_P_DEFINE() {}
#define SYN_STRIDE 4
#define ZERO_STRIDE 4
#define ZERO_DEFINE() {}
#define ZERO_D 0, 1, 2, 3
#define COPY_STRIDE 4
#define COPY_DEFINE() {}
#define COPY_D 0, 1, 2, 3
#define ADD_STRIDE 4
#define ADD_DEFINE() {}
#define ADD_D 0, 1, 2, 3
#define MUL_STRIDE 4
#define MUL_DEFINE() {}
#define MUL_D 0, 1, 2, 3
#define GEN_P_STRIDE 4
#define GEN_P_DEFINE() {}
#define GEN_P_P 0, 1, 2, 3
#define GEN_PQ_DEFINE() {}
#define GEN_PQ_STRIDE 4
#define GEN_PQ_DEFINE() {}
#define GEN_PQ_D 0, 1, 2, 3
#define GEN_PQ_P 4, 5, 6, 7
#define GEN_PQ_Q 8, 9, 10, 11
#define GEN_PQ_C 4, 5, 6, 7
#define GEN_PQR_STRIDE 4
#define GEN_PQR_DEFINE() {}
#define GEN_PQR_STRIDE 2
#define GEN_PQR_D 0, 1
#define GEN_PQR_P 2, 3
#define GEN_PQR_Q 4, 5
#define GEN_PQR_R 6, 7
#define GEN_PQR_D 0, 1, 2, 3
#define GEN_PQR_C 4, 5, 6, 7
#define REC_P_DEFINE() {}
#define REC_P_STRIDE 4
#define REC_P_X 0, 1, 2, 3
#define SYN_Q_DEFINE() {}
#define SYN_Q_D 0, 1, 2, 3
#define SYN_Q_X 4, 5, 6, 7
#define REC_Q_DEFINE() {}
#define REC_Q_STRIDE 4
#define REC_Q_X 0, 1, 2, 3
#define SYN_R_DEFINE() {}
#define SYN_R_D 0, 1, 2, 3
#define SYN_R_X 4, 5, 6, 7
#define REC_R_DEFINE() {}
#define REC_R_STRIDE 4
#define REC_R_X 0, 1, 2, 3
#define SYN_PQ_DEFINE() {}
#define SYN_PQ_D 0, 1, 2, 3
#define SYN_PQ_X 4, 5, 6, 7
#define REC_PQ_DEFINE() {}
#define REC_PQ_STRIDE 2
#define REC_PQ_DEFINE() {}
#define REC_PQ_X 0, 1
#define REC_PQ_Y 2, 3
#define REC_PQ_D 4, 5
#define REC_PQ_T 4, 5
#define SYN_PR_DEFINE() {}
#define SYN_PR_D 0, 1, 2, 3
#define SYN_PR_X 4, 5, 6, 7
#define REC_PR_DEFINE() {}
#define REC_PR_STRIDE 2
#define REC_PR_DEFINE() {}
#define REC_PR_X 0, 1
#define REC_PR_Y 2, 3
#define REC_PR_D 4, 5
#define REC_PR_T 4, 5
#define SYN_QR_DEFINE() {}
#define SYN_QR_D 0, 1, 2, 3
#define SYN_QR_X 4, 5, 6, 7
#define REC_QR_DEFINE() {}
#define REC_QR_STRIDE 2
#define REC_QR_DEFINE() {}
#define REC_QR_X 0, 1
#define REC_QR_Y 2, 3
#define REC_QR_D 4, 5
#define REC_QR_T 4, 5
#define SYN_PQR_DEFINE() {}
#define SYN_PQR_D 0, 1, 2, 3
#define SYN_PQR_X 4, 5, 6, 7
#define REC_PQR_DEFINE() {}
#define REC_PQR_STRIDE 2
#define REC_PQR_DEFINE() {}
#define REC_PQR_X 0, 1
#define REC_PQR_Y 2, 3
#define REC_PQR_Z 4, 5
#define REC_PQR_D 6, 7
#define REC_PQR_XS 6, 7
#define REC_PQR_YS 8, 9
@ -403,13 +430,8 @@ DEFINE_REC_METHODS(ssse3);
static boolean_t
raidz_will_ssse3_work(void)
{
/* ABD Bringup -- vector code not ready */
#if 1
return (B_FALSE);
#else
return (zfs_sse_available() && zfs_sse2_available() &&
zfs_ssse3_available());
#endif
}
const raidz_impl_ops_t vdev_raidz_ssse3_impl = {