ABD Vectorized raidz

Enable vectorized raidz code on ABD buffers.  The avx512f,
avx512bw, neon and aarch64_neonx2 are disabled in this commit.
With the exception of avx512bw these implementations are
updated for ABD in the subsequent commits.

Signed-off-by: Gvozden Neskovic <neskovic@gmail.com>
This commit is contained in:
Gvozden Neskovic 2016-08-24 15:51:33 +02:00 committed by Brian Behlendorf
parent a206522c4f
commit cbf484f8ad
13 changed files with 1438 additions and 1051 deletions

View File

@ -23,8 +23,6 @@
* Copyright (C) 2016 Gvozden Nešković. All rights reserved. * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
*/ */
#ifdef _ABD_READY_
#include <sys/zfs_context.h> #include <sys/zfs_context.h>
#include <sys/time.h> #include <sys/time.h>
#include <sys/wait.h> #include <sys/wait.h>
@ -55,18 +53,18 @@ bench_init_raidz_map(void)
/* /*
* To permit larger column sizes these have to be done * To permit larger column sizes these have to be done
* allocated using aligned alloc instead of zio_data_buf_alloc * allocated using aligned alloc instead of zio_abd_buf_alloc
*/ */
zio_bench.io_data = raidz_alloc(max_data_size); zio_bench.io_abd = raidz_alloc(max_data_size);
init_zio_data(&zio_bench); init_zio_abd(&zio_bench);
} }
static void static void
bench_fini_raidz_maps(void) bench_fini_raidz_maps(void)
{ {
/* tear down golden zio */ /* tear down golden zio */
raidz_free(zio_bench.io_data, max_data_size); raidz_free(zio_bench.io_abd, max_data_size);
bzero(&zio_bench, sizeof (zio_t)); bzero(&zio_bench, sizeof (zio_t));
} }
@ -227,4 +225,3 @@ run_raidz_benchmark(void)
bench_fini_raidz_maps(); bench_fini_raidz_maps();
} }
#endif

View File

@ -32,16 +32,6 @@
#include <sys/vdev_raidz_impl.h> #include <sys/vdev_raidz_impl.h>
#include <assert.h> #include <assert.h>
#include <stdio.h> #include <stdio.h>
#ifndef _ABD_READY_
int
main(int argc, char **argv)
{
exit(0);
}
#else
#include "raidz_test.h" #include "raidz_test.h"
static int *rand_data; static int *rand_data;
@ -191,10 +181,10 @@ static void process_options(int argc, char **argv)
} }
} }
#define DATA_COL(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_data) #define DATA_COL(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_abd)
#define DATA_COL_SIZE(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_size) #define DATA_COL_SIZE(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_size)
#define CODE_COL(rm, i) ((rm)->rm_col[(i)].rc_data) #define CODE_COL(rm, i) ((rm)->rm_col[(i)].rc_abd)
#define CODE_COL_SIZE(rm, i) ((rm)->rm_col[(i)].rc_size) #define CODE_COL_SIZE(rm, i) ((rm)->rm_col[(i)].rc_size)
static int static int
@ -205,10 +195,9 @@ cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
VERIFY(parity >= 1 && parity <= 3); VERIFY(parity >= 1 && parity <= 3);
for (i = 0; i < parity; i++) { for (i = 0; i < parity; i++) {
if (0 != memcmp(CODE_COL(rm, i), CODE_COL(opts->rm_golden, i), if (abd_cmp(CODE_COL(rm, i), CODE_COL(opts->rm_golden, i))
CODE_COL_SIZE(rm, i))) { != 0) {
ret++; ret++;
LOG_OPT(D_DEBUG, opts, LOG_OPT(D_DEBUG, opts,
"\nParity block [%d] different!\n", i); "\nParity block [%d] different!\n", i);
} }
@ -223,8 +212,8 @@ cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
int dcols = opts->rm_golden->rm_cols - raidz_parity(opts->rm_golden); int dcols = opts->rm_golden->rm_cols - raidz_parity(opts->rm_golden);
for (i = 0; i < dcols; i++) { for (i = 0; i < dcols; i++) {
if (0 != memcmp(DATA_COL(opts->rm_golden, i), DATA_COL(rm, i), if (abd_cmp(DATA_COL(opts->rm_golden, i), DATA_COL(rm, i))
DATA_COL_SIZE(opts->rm_golden, i))) { != 0) {
ret++; ret++;
LOG_OPT(D_DEBUG, opts, LOG_OPT(D_DEBUG, opts,
@ -234,37 +223,55 @@ cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
return (ret); return (ret);
} }
static int
init_rand(void *data, size_t size, void *private)
{
int i;
int *dst = (int *) data;
for (i = 0; i < size / sizeof (int); i++)
dst[i] = rand_data[i];
return (0);
}
static int
corrupt_rand(void *data, size_t size, void *private)
{
int i;
int *dst = (int *) data;
for (i = 0; i < size / sizeof (int); i++)
dst[i] = rand();
return (0);
}
static void static void
corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt) corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt)
{ {
int i; int i;
int *dst;
raidz_col_t *col; raidz_col_t *col;
for (i = 0; i < cnt; i++) { for (i = 0; i < cnt; i++) {
col = &rm->rm_col[tgts[i]]; col = &rm->rm_col[tgts[i]];
dst = col->rc_data; abd_iterate_func(col->rc_abd, 0, col->rc_size, corrupt_rand,
for (i = 0; i < col->rc_size / sizeof (int); i++) NULL);
dst[i] = rand();
} }
} }
void void
init_zio_data(zio_t *zio) init_zio_abd(zio_t *zio)
{ {
int i; abd_iterate_func(zio->io_abd, 0, zio->io_size, init_rand, NULL);
int *dst = (int *) zio->io_data;
for (i = 0; i < zio->io_size / sizeof (int); i++) {
dst[i] = rand_data[i];
}
} }
static void static void
fini_raidz_map(zio_t **zio, raidz_map_t **rm) fini_raidz_map(zio_t **zio, raidz_map_t **rm)
{ {
vdev_raidz_map_free(*rm); vdev_raidz_map_free(*rm);
raidz_free((*zio)->io_data, (*zio)->io_size); raidz_free((*zio)->io_abd, (*zio)->io_size);
umem_free(*zio, sizeof (zio_t)); umem_free(*zio, sizeof (zio_t));
*zio = NULL; *zio = NULL;
@ -289,11 +296,11 @@ init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
opts->zio_golden->io_offset = zio_test->io_offset = opts->rto_offset; opts->zio_golden->io_offset = zio_test->io_offset = opts->rto_offset;
opts->zio_golden->io_size = zio_test->io_size = opts->rto_dsize; opts->zio_golden->io_size = zio_test->io_size = opts->rto_dsize;
opts->zio_golden->io_data = raidz_alloc(opts->rto_dsize); opts->zio_golden->io_abd = raidz_alloc(opts->rto_dsize);
zio_test->io_data = raidz_alloc(opts->rto_dsize); zio_test->io_abd = raidz_alloc(opts->rto_dsize);
init_zio_data(opts->zio_golden); init_zio_abd(opts->zio_golden);
init_zio_data(zio_test); init_zio_abd(zio_test);
VERIFY0(vdev_raidz_impl_set("original")); VERIFY0(vdev_raidz_impl_set("original"));
@ -336,8 +343,8 @@ init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
(*zio)->io_offset = 0; (*zio)->io_offset = 0;
(*zio)->io_size = alloc_dsize; (*zio)->io_size = alloc_dsize;
(*zio)->io_data = raidz_alloc(alloc_dsize); (*zio)->io_abd = raidz_alloc(alloc_dsize);
init_zio_data(*zio); init_zio_abd(*zio);
rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift, rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
total_ncols, parity); total_ncols, parity);
@ -792,4 +799,3 @@ main(int argc, char **argv)
return (err); return (err);
} }
#endif

View File

@ -104,11 +104,11 @@ static inline size_t ilog2(size_t a)
#define SEP "----------------\n" #define SEP "----------------\n"
#define raidz_alloc(size) zio_data_buf_alloc(size) #define raidz_alloc(size) abd_alloc(size, B_FALSE)
#define raidz_free(p, size) zio_data_buf_free(p, size) #define raidz_free(p, size) abd_free(p)
void init_zio_data(zio_t *zio); void init_zio_abd(zio_t *zio);
void run_raidz_benchmark(void); void run_raidz_benchmark(void);

View File

@ -44,16 +44,6 @@ static raidz_impl_ops_t vdev_raidz_fastest_impl = {
.name = "fastest" .name = "fastest"
}; };
/* ABD BRINGUP -- not ready yet */
#if 1
#ifdef HAVE_SSSE3
#undef HAVE_SSSE3
#endif
#ifdef HAVE_AVX2
#undef HAVE_AVX2
#endif
#endif
/* All compiled in implementations */ /* All compiled in implementations */
const raidz_impl_ops_t *raidz_all_maths[] = { const raidz_impl_ops_t *raidz_all_maths[] = {
&vdev_raidz_original_impl, &vdev_raidz_original_impl,
@ -68,14 +58,14 @@ const raidz_impl_ops_t *raidz_all_maths[] = {
&vdev_raidz_avx2_impl, &vdev_raidz_avx2_impl,
#endif #endif
#if defined(__x86_64) && defined(HAVE_AVX512F) /* only x86_64 for now */ #if defined(__x86_64) && defined(HAVE_AVX512F) /* only x86_64 for now */
&vdev_raidz_avx512f_impl, // &vdev_raidz_avx512f_impl,
#endif #endif
#if defined(__x86_64) && defined(HAVE_AVX512BW) /* only x86_64 for now */ #if defined(__x86_64) && defined(HAVE_AVX512BW) /* only x86_64 for now */
&vdev_raidz_avx512bw_impl, // &vdev_raidz_avx512bw_impl,
#endif #endif
#if defined(__aarch64__) #if defined(__aarch64__)
&vdev_raidz_aarch64_neon_impl, // &vdev_raidz_aarch64_neon_impl,
&vdev_raidz_aarch64_neonx2_impl, // &vdev_raidz_aarch64_neonx2_impl,
#endif #endif
}; };
@ -159,8 +149,6 @@ vdev_raidz_math_generate(raidz_map_t *rm)
{ {
raidz_gen_f gen_parity = NULL; raidz_gen_f gen_parity = NULL;
/* ABD Bringup -- vector code not ready */
#if 0
switch (raidz_parity(rm)) { switch (raidz_parity(rm)) {
case 1: case 1:
gen_parity = rm->rm_ops->gen[RAIDZ_GEN_P]; gen_parity = rm->rm_ops->gen[RAIDZ_GEN_P];
@ -177,7 +165,6 @@ vdev_raidz_math_generate(raidz_map_t *rm)
raidz_parity(rm)); raidz_parity(rm));
break; break;
} }
#endif
/* if method is NULL execute the original implementation */ /* if method is NULL execute the original implementation */
if (gen_parity == NULL) if (gen_parity == NULL)
@ -188,8 +175,6 @@ vdev_raidz_math_generate(raidz_map_t *rm)
return (0); return (0);
} }
/* ABD Bringup -- vector code not ready */
#if 0
static raidz_rec_f static raidz_rec_f
reconstruct_fun_p_sel(raidz_map_t *rm, const int *parity_valid, reconstruct_fun_p_sel(raidz_map_t *rm, const int *parity_valid,
const int nbaddata) const int nbaddata)
@ -244,7 +229,6 @@ reconstruct_fun_pqr_sel(raidz_map_t *rm, const int *parity_valid,
} }
return ((raidz_rec_f) NULL); return ((raidz_rec_f) NULL);
} }
#endif
/* /*
* Select data reconstruction method for raidz_map * Select data reconstruction method for raidz_map
@ -256,31 +240,28 @@ int
vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid, vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid,
const int *dt, const int nbaddata) const int *dt, const int nbaddata)
{ {
raidz_rec_f rec_data = NULL; raidz_rec_f rec_fn = NULL;
/* ABD Bringup -- vector code not ready */
#if 0
switch (raidz_parity(rm)) { switch (raidz_parity(rm)) {
case PARITY_P: case PARITY_P:
rec_data = reconstruct_fun_p_sel(rm, parity_valid, nbaddata); rec_fn = reconstruct_fun_p_sel(rm, parity_valid, nbaddata);
break; break;
case PARITY_PQ: case PARITY_PQ:
rec_data = reconstruct_fun_pq_sel(rm, parity_valid, nbaddata); rec_fn = reconstruct_fun_pq_sel(rm, parity_valid, nbaddata);
break; break;
case PARITY_PQR: case PARITY_PQR:
rec_data = reconstruct_fun_pqr_sel(rm, parity_valid, nbaddata); rec_fn = reconstruct_fun_pqr_sel(rm, parity_valid, nbaddata);
break; break;
default: default:
cmn_err(CE_PANIC, "invalid RAID-Z configuration %d", cmn_err(CE_PANIC, "invalid RAID-Z configuration %d",
raidz_parity(rm)); raidz_parity(rm));
break; break;
} }
#endif
if (rec_data == NULL) if (rec_fn == NULL)
return (RAIDZ_ORIGINAL_IMPL); return (RAIDZ_ORIGINAL_IMPL);
else else
return (rec_data(rm, dt)); return (rec_fn(rm, dt));
} }
const char *raidz_gen_name[] = { const char *raidz_gen_name[] = {

View File

@ -23,8 +23,9 @@
*/ */
#include <sys/isa_defs.h> #include <sys/isa_defs.h>
#include <sys/types.h>
#if defined(__aarch64__) #if 0 // defined(__aarch64__)
#include "vdev_raidz_math_aarch64_neon_common.h" #include "vdev_raidz_math_aarch64_neon_common.h"
@ -153,7 +154,7 @@ const raidz_impl_ops_t vdev_raidz_aarch64_neon_impl = {
#endif /* defined(__aarch64__) */ #endif /* defined(__aarch64__) */
#if defined(__aarch64__) #if 0 // defined(__aarch64__)
const uint8_t const uint8_t
__attribute__((aligned(256))) gf_clmul_mod_lt[4*256][16] = { __attribute__((aligned(256))) gf_clmul_mod_lt[4*256][16] = {

View File

@ -24,7 +24,7 @@
#include <sys/isa_defs.h> #include <sys/isa_defs.h>
#if defined(__aarch64__) #if 0 // defined(__aarch64__)
#include "vdev_raidz_math_aarch64_neon_common.h" #include "vdev_raidz_math_aarch64_neon_common.h"

View File

@ -334,59 +334,86 @@ static const uint8_t __attribute__((aligned(32))) _mul_mask = 0x0F;
kfpu_end(); \ kfpu_end(); \
} }
#define GEN_P_DEFINE() {}
#define SYN_STRIDE 4
#define ZERO_STRIDE 4
#define ZERO_DEFINE() {}
#define ZERO_D 0, 1, 2, 3
#define COPY_STRIDE 4
#define COPY_DEFINE() {}
#define COPY_D 0, 1, 2, 3
#define ADD_STRIDE 4
#define ADD_DEFINE() {}
#define ADD_D 0, 1, 2, 3
#define MUL_STRIDE 4
#define MUL_DEFINE() {}
#define MUL_D 0, 1, 2, 3
#define GEN_P_STRIDE 4 #define GEN_P_STRIDE 4
#define GEN_P_DEFINE() {}
#define GEN_P_P 0, 1, 2, 3 #define GEN_P_P 0, 1, 2, 3
#define GEN_PQ_DEFINE() {}
#define GEN_PQ_STRIDE 4 #define GEN_PQ_STRIDE 4
#define GEN_PQ_DEFINE() {}
#define GEN_PQ_D 0, 1, 2, 3 #define GEN_PQ_D 0, 1, 2, 3
#define GEN_PQ_P 4, 5, 6, 7 #define GEN_PQ_C 4, 5, 6, 7
#define GEN_PQ_Q 8, 9, 10, 11
#define GEN_PQR_STRIDE 4
#define GEN_PQR_DEFINE() {} #define GEN_PQR_DEFINE() {}
#define GEN_PQR_STRIDE 2 #define GEN_PQR_D 0, 1, 2, 3
#define GEN_PQR_D 0, 1 #define GEN_PQR_C 4, 5, 6, 7
#define GEN_PQR_P 2, 3
#define GEN_PQR_Q 4, 5
#define GEN_PQR_R 6, 7
#define REC_P_DEFINE() {} #define SYN_Q_DEFINE() {}
#define REC_P_STRIDE 4 #define SYN_Q_D 0, 1, 2, 3
#define REC_P_X 0, 1, 2, 3 #define SYN_Q_X 4, 5, 6, 7
#define REC_Q_DEFINE() {} #define SYN_R_DEFINE() {}
#define REC_Q_STRIDE 4 #define SYN_R_D 0, 1, 2, 3
#define REC_Q_X 0, 1, 2, 3 #define SYN_R_X 4, 5, 6, 7
#define REC_R_DEFINE() {} #define SYN_PQ_DEFINE() {}
#define REC_R_STRIDE 4 #define SYN_PQ_D 0, 1, 2, 3
#define REC_R_X 0, 1, 2, 3 #define SYN_PQ_X 4, 5, 6, 7
#define REC_PQ_DEFINE() {}
#define REC_PQ_STRIDE 2 #define REC_PQ_STRIDE 2
#define REC_PQ_DEFINE() {}
#define REC_PQ_X 0, 1 #define REC_PQ_X 0, 1
#define REC_PQ_Y 2, 3 #define REC_PQ_Y 2, 3
#define REC_PQ_D 4, 5 #define REC_PQ_T 4, 5
#define SYN_PR_DEFINE() {}
#define SYN_PR_D 0, 1, 2, 3
#define SYN_PR_X 4, 5, 6, 7
#define REC_PR_DEFINE() {}
#define REC_PR_STRIDE 2 #define REC_PR_STRIDE 2
#define REC_PR_DEFINE() {}
#define REC_PR_X 0, 1 #define REC_PR_X 0, 1
#define REC_PR_Y 2, 3 #define REC_PR_Y 2, 3
#define REC_PR_D 4, 5 #define REC_PR_T 4, 5
#define SYN_QR_DEFINE() {}
#define SYN_QR_D 0, 1, 2, 3
#define SYN_QR_X 4, 5, 6, 7
#define REC_QR_DEFINE() {}
#define REC_QR_STRIDE 2 #define REC_QR_STRIDE 2
#define REC_QR_DEFINE() {}
#define REC_QR_X 0, 1 #define REC_QR_X 0, 1
#define REC_QR_Y 2, 3 #define REC_QR_Y 2, 3
#define REC_QR_D 4, 5 #define REC_QR_T 4, 5
#define SYN_PQR_DEFINE() {}
#define SYN_PQR_D 0, 1, 2, 3
#define SYN_PQR_X 4, 5, 6, 7
#define REC_PQR_DEFINE() {}
#define REC_PQR_STRIDE 2 #define REC_PQR_STRIDE 2
#define REC_PQR_DEFINE() {}
#define REC_PQR_X 0, 1 #define REC_PQR_X 0, 1
#define REC_PQR_Y 2, 3 #define REC_PQR_Y 2, 3
#define REC_PQR_Z 4, 5 #define REC_PQR_Z 4, 5
#define REC_PQR_D 6, 7
#define REC_PQR_XS 6, 7 #define REC_PQR_XS 6, 7
#define REC_PQR_YS 8, 9 #define REC_PQR_YS 8, 9
@ -400,12 +427,7 @@ DEFINE_REC_METHODS(avx2);
static boolean_t static boolean_t
raidz_will_avx2_work(void) raidz_will_avx2_work(void)
{ {
/* ABD Bringup -- vector code not ready */
#if 1
return (B_FALSE);
#else
return (zfs_avx_available() && zfs_avx2_available()); return (zfs_avx_available() && zfs_avx2_available());
#endif
} }
const raidz_impl_ops_t vdev_raidz_avx2_impl = { const raidz_impl_ops_t vdev_raidz_avx2_impl = {

View File

@ -24,7 +24,7 @@
#include <sys/isa_defs.h> #include <sys/isa_defs.h>
#if defined(__x86_64) && defined(HAVE_AVX512BW) #if 0 // defined(__x86_64) && defined(HAVE_AVX512BW)
#include <sys/types.h> #include <sys/types.h>
#include <linux/simd_x86.h> #include <linux/simd_x86.h>
@ -345,6 +345,22 @@ static const uint8_t __attribute__((aligned(32))) _mul_mask = 0x0F;
kfpu_end(); \ kfpu_end(); \
} }
#define ZERO_STRIDE 4
#define ZERO_DEFINE() {}
#define ZERO_D 0, 1, 2, 3
#define COPY_STRIDE 4
#define COPY_DEFINE() {}
#define COPY_D 0, 1, 2, 3
#define ADD_STRIDE 4
#define ADD_DEFINE() {}
#define ADD_D 0, 1, 2, 3
#define MUL_STRIDE 4
#define MUL_DEFINE() {}
#define MUL_D 0, 1, 2, 3
#define GEN_P_DEFINE() {} #define GEN_P_DEFINE() {}
#define GEN_P_STRIDE 4 #define GEN_P_STRIDE 4
#define GEN_P_P 0, 1, 2, 3 #define GEN_P_P 0, 1, 2, 3

View File

@ -24,7 +24,7 @@
#include <sys/isa_defs.h> #include <sys/isa_defs.h>
#if defined(__x86_64) && defined(HAVE_AVX512F) #if 0 // defined(__x86_64) && defined(HAVE_AVX512F)
#include <sys/types.h> #include <sys/types.h>
#include <linux/simd_x86.h> #include <linux/simd_x86.h>
@ -437,6 +437,21 @@ typedef struct v {
kfpu_end(); \ kfpu_end(); \
} }
#define ZERO_STRIDE 4
#define ZERO_DEFINE() {}
#define ZERO_D 20, 21, 22, 23
#define COPY_STRIDE 4
#define COPY_DEFINE() {}
#define COPY_D 20, 21, 22, 23
#define ADD_STRIDE 4
#define ADD_DEFINE() {}
#define ADD_D 20, 21, 22, 23
#define MUL_STRIDE 4
#define MUL_DEFINE() {}
#define MUL_D 20, 21, 22, 23
/* /*
* This use zmm16-zmm31 registers to free up zmm0-zmm15 * This use zmm16-zmm31 registers to free up zmm0-zmm15
* to use with the AVX2 pshufb, see above * to use with the AVX2 pshufb, see above

File diff suppressed because it is too large Load Diff

View File

@ -24,6 +24,7 @@
*/ */
#include <sys/vdev_raidz_impl.h> #include <sys/vdev_raidz_impl.h>
/* /*
* Provide native CPU scalar routines. * Provide native CPU scalar routines.
* Support 32bit and 64bit CPUs. * Support 32bit and 64bit CPUs.
@ -153,72 +154,97 @@ static const struct {
#define raidz_math_begin() {} #define raidz_math_begin() {}
#define raidz_math_end() {} #define raidz_math_end() {}
#define GEN_P_DEFINE() v_t p0 #define SYN_STRIDE 1
#define ZERO_DEFINE() v_t d0
#define ZERO_STRIDE 1
#define ZERO_D d0
#define COPY_DEFINE() v_t d0
#define COPY_STRIDE 1
#define COPY_D d0
#define ADD_DEFINE() v_t d0
#define ADD_STRIDE 1
#define ADD_D d0
#define MUL_DEFINE() v_t d0
#define MUL_STRIDE 1
#define MUL_D d0
#define GEN_P_STRIDE 1 #define GEN_P_STRIDE 1
#define GEN_P_DEFINE() v_t p0
#define GEN_P_P p0 #define GEN_P_P p0
#define GEN_PQ_DEFINE() v_t d0, p0, q0
#define GEN_PQ_STRIDE 1 #define GEN_PQ_STRIDE 1
#define GEN_PQ_DEFINE() v_t d0, c0
#define GEN_PQ_D d0 #define GEN_PQ_D d0
#define GEN_PQ_P p0 #define GEN_PQ_C c0
#define GEN_PQ_Q q0
#define GEN_PQR_DEFINE() v_t d0, p0, q0, r0
#define GEN_PQR_STRIDE 1 #define GEN_PQR_STRIDE 1
#define GEN_PQR_DEFINE() v_t d0, c0
#define GEN_PQR_D d0 #define GEN_PQR_D d0
#define GEN_PQR_P p0 #define GEN_PQR_C c0
#define GEN_PQR_Q q0
#define GEN_PQR_R r0
#define REC_P_DEFINE() v_t x0 #define SYN_Q_DEFINE() v_t d0, x0
#define REC_P_STRIDE 1 #define SYN_Q_D d0
#define REC_P_X x0 #define SYN_Q_X x0
#define REC_Q_DEFINE() v_t x0
#define REC_Q_STRIDE 1
#define REC_Q_X x0
#define REC_R_DEFINE() v_t x0 #define SYN_R_DEFINE() v_t d0, x0
#define REC_R_STRIDE 1 #define SYN_R_D d0
#define REC_R_X x0 #define SYN_R_X x0
#define SYN_PQ_DEFINE() v_t d0, x0
#define SYN_PQ_D d0
#define SYN_PQ_X x0
#define REC_PQ_DEFINE() v_t x0, y0, d0
#define REC_PQ_STRIDE 1 #define REC_PQ_STRIDE 1
#define REC_PQ_DEFINE() v_t x0, y0, t0
#define REC_PQ_X x0 #define REC_PQ_X x0
#define REC_PQ_Y y0 #define REC_PQ_Y y0
#define REC_PQ_D d0 #define REC_PQ_T t0
#define SYN_PR_DEFINE() v_t d0, x0
#define SYN_PR_D d0
#define SYN_PR_X x0
#define REC_PR_DEFINE() v_t x0, y0, d0
#define REC_PR_STRIDE 1 #define REC_PR_STRIDE 1
#define REC_PR_DEFINE() v_t x0, y0, t0
#define REC_PR_X x0 #define REC_PR_X x0
#define REC_PR_Y y0 #define REC_PR_Y y0
#define REC_PR_D d0 #define REC_PR_T t0
#define SYN_QR_DEFINE() v_t d0, x0
#define SYN_QR_D d0
#define SYN_QR_X x0
#define REC_QR_DEFINE() v_t x0, y0, d0
#define REC_QR_STRIDE 1 #define REC_QR_STRIDE 1
#define REC_QR_DEFINE() v_t x0, y0, t0
#define REC_QR_X x0 #define REC_QR_X x0
#define REC_QR_Y y0 #define REC_QR_Y y0
#define REC_QR_D d0 #define REC_QR_T t0
#define SYN_PQR_DEFINE() v_t d0, x0
#define SYN_PQR_D d0
#define SYN_PQR_X x0
#define REC_PQR_DEFINE() v_t x0, y0, z0, d0, t0
#define REC_PQR_STRIDE 1 #define REC_PQR_STRIDE 1
#define REC_PQR_DEFINE() v_t x0, y0, z0, xs0, ys0
#define REC_PQR_X x0 #define REC_PQR_X x0
#define REC_PQR_Y y0 #define REC_PQR_Y y0
#define REC_PQR_Z z0 #define REC_PQR_Z z0
#define REC_PQR_D d0 #define REC_PQR_XS xs0
#define REC_PQR_XS d0 #define REC_PQR_YS ys0
#define REC_PQR_YS t0
#include "vdev_raidz_math_impl.h" #include "vdev_raidz_math_impl.h"
/*
* If compiled with -O0, gcc doesn't do any stack frame coalescing
* and -Wframe-larger-than=1024 is triggered in debug mode.
* Starting with gcc 4.8, new opt level -Og is introduced for debugging, which
* does not trigger this warning.
*/
#pragma GCC diagnostic ignored "-Wframe-larger-than="
DEFINE_GEN_METHODS(scalar); DEFINE_GEN_METHODS(scalar);
DEFINE_REC_METHODS(scalar); DEFINE_REC_METHODS(scalar);

View File

@ -236,6 +236,10 @@ typedef struct v {
#define MUL2(r...) \ #define MUL2(r...) \
{ \ { \
switch (REG_CNT(r)) { \ switch (REG_CNT(r)) { \
case 4: \
_MUL2_x2(VR0(r), VR1(r)); \
_MUL2_x2(VR2(r), VR3(r)); \
break; \
case 2: \ case 2: \
_MUL2_x2(VR0(r), VR1(r)); \ _MUL2_x2(VR0(r), VR1(r)); \
break; \ break; \
@ -271,8 +275,8 @@ typedef struct v {
if (x & 0x80) { MUL2(in); XOR(in, acc); } \ if (x & 0x80) { MUL2(in); XOR(in, acc); } \
} }
#define _mul_x1_in 9 #define _mul_x1_in 11
#define _mul_x1_acc 11 #define _mul_x1_acc 12
#define MUL_x1_DEFINE(x) \ #define MUL_x1_DEFINE(x) \
static void \ static void \
@ -533,61 +537,87 @@ gf_x2_mul_fns[256] = {
#define raidz_math_begin() kfpu_begin() #define raidz_math_begin() kfpu_begin()
#define raidz_math_end() kfpu_end() #define raidz_math_end() kfpu_end()
#define GEN_P_DEFINE() {} #define SYN_STRIDE 4
#define ZERO_STRIDE 4
#define ZERO_DEFINE() {}
#define ZERO_D 0, 1, 2, 3
#define COPY_STRIDE 4
#define COPY_DEFINE() {}
#define COPY_D 0, 1, 2, 3
#define ADD_STRIDE 4
#define ADD_DEFINE() {}
#define ADD_D 0, 1, 2, 3
#define MUL_STRIDE 2
#define MUL_DEFINE() {}
#define MUL_D 0, 1
#define GEN_P_STRIDE 4 #define GEN_P_STRIDE 4
#define GEN_P_DEFINE() {}
#define GEN_P_P 0, 1, 2, 3 #define GEN_P_P 0, 1, 2, 3
#define GEN_PQ_STRIDE 4
#define GEN_PQ_DEFINE() {} #define GEN_PQ_DEFINE() {}
#define GEN_PQ_STRIDE 2 #define GEN_PQ_D 0, 1, 2, 3
#define GEN_PQ_D 0, 1 #define GEN_PQ_C 4, 5, 6, 7
#define GEN_PQ_P 2, 3
#define GEN_PQ_Q 4, 5
#define GEN_PQR_STRIDE 4
#define GEN_PQR_DEFINE() {} #define GEN_PQR_DEFINE() {}
#define GEN_PQR_STRIDE 2 #define GEN_PQR_D 0, 1, 2, 3
#define GEN_PQR_D 0, 1 #define GEN_PQR_C 4, 5, 6, 7
#define GEN_PQR_P 2, 3
#define GEN_PQR_Q 4, 5
#define GEN_PQR_R 6, 7
#define REC_P_DEFINE() {} #define SYN_Q_DEFINE() {}
#define REC_P_STRIDE 4 #define SYN_Q_D 0, 1, 2, 3
#define REC_P_X 0, 1, 2, 3 #define SYN_Q_X 4, 5, 6, 7
#define REC_Q_DEFINE() {} #define SYN_R_DEFINE() {}
#define REC_Q_STRIDE 2 #define SYN_R_D 0, 1, 2, 3
#define REC_Q_X 0, 1 #define SYN_R_X 4, 5, 6, 7
#define REC_R_DEFINE() {} #define SYN_PQ_DEFINE() {}
#define REC_R_STRIDE 2 #define SYN_PQ_D 0, 1, 2, 3
#define REC_R_X 0, 1 #define SYN_PQ_X 4, 5, 6, 7
#define REC_PQ_DEFINE() {}
#define REC_PQ_STRIDE 2 #define REC_PQ_STRIDE 2
#define REC_PQ_DEFINE() {}
#define REC_PQ_X 0, 1 #define REC_PQ_X 0, 1
#define REC_PQ_Y 2, 3 #define REC_PQ_Y 2, 3
#define REC_PQ_D 4, 5 #define REC_PQ_T 4, 5
#define SYN_PR_DEFINE() {}
#define SYN_PR_D 0, 1, 2, 3
#define SYN_PR_X 4, 5, 6, 7
#define REC_PR_DEFINE() {}
#define REC_PR_STRIDE 2 #define REC_PR_STRIDE 2
#define REC_PR_DEFINE() {}
#define REC_PR_X 0, 1 #define REC_PR_X 0, 1
#define REC_PR_Y 2, 3 #define REC_PR_Y 2, 3
#define REC_PR_D 4, 5 #define REC_PR_T 4, 5
#define SYN_QR_DEFINE() {}
#define SYN_QR_D 0, 1, 2, 3
#define SYN_QR_X 4, 5, 6, 7
#define REC_QR_DEFINE() {}
#define REC_QR_STRIDE 2 #define REC_QR_STRIDE 2
#define REC_QR_DEFINE() {}
#define REC_QR_X 0, 1 #define REC_QR_X 0, 1
#define REC_QR_Y 2, 3 #define REC_QR_Y 2, 3
#define REC_QR_D 4, 5 #define REC_QR_T 4, 5
#define SYN_PQR_DEFINE() {}
#define SYN_PQR_D 0, 1, 2, 3
#define SYN_PQR_X 4, 5, 6, 7
#define REC_PQR_DEFINE() {}
#define REC_PQR_STRIDE 1 #define REC_PQR_STRIDE 1
#define REC_PQR_DEFINE() {}
#define REC_PQR_X 0 #define REC_PQR_X 0
#define REC_PQR_Y 1 #define REC_PQR_Y 1
#define REC_PQR_Z 2 #define REC_PQR_Z 2
#define REC_PQR_D 3 #define REC_PQR_XS 3
#define REC_PQR_XS 4 #define REC_PQR_YS 4
#define REC_PQR_YS 5
#include <sys/vdev_raidz_impl.h> #include <sys/vdev_raidz_impl.h>

View File

@ -337,59 +337,86 @@ typedef struct v {
#define raidz_math_begin() kfpu_begin() #define raidz_math_begin() kfpu_begin()
#define raidz_math_end() kfpu_end() #define raidz_math_end() kfpu_end()
#define GEN_P_DEFINE() {}
#define SYN_STRIDE 4
#define ZERO_STRIDE 4
#define ZERO_DEFINE() {}
#define ZERO_D 0, 1, 2, 3
#define COPY_STRIDE 4
#define COPY_DEFINE() {}
#define COPY_D 0, 1, 2, 3
#define ADD_STRIDE 4
#define ADD_DEFINE() {}
#define ADD_D 0, 1, 2, 3
#define MUL_STRIDE 4
#define MUL_DEFINE() {}
#define MUL_D 0, 1, 2, 3
#define GEN_P_STRIDE 4 #define GEN_P_STRIDE 4
#define GEN_P_DEFINE() {}
#define GEN_P_P 0, 1, 2, 3 #define GEN_P_P 0, 1, 2, 3
#define GEN_PQ_DEFINE() {}
#define GEN_PQ_STRIDE 4 #define GEN_PQ_STRIDE 4
#define GEN_PQ_DEFINE() {}
#define GEN_PQ_D 0, 1, 2, 3 #define GEN_PQ_D 0, 1, 2, 3
#define GEN_PQ_P 4, 5, 6, 7 #define GEN_PQ_C 4, 5, 6, 7
#define GEN_PQ_Q 8, 9, 10, 11
#define GEN_PQR_STRIDE 4
#define GEN_PQR_DEFINE() {} #define GEN_PQR_DEFINE() {}
#define GEN_PQR_STRIDE 2 #define GEN_PQR_D 0, 1, 2, 3
#define GEN_PQR_D 0, 1 #define GEN_PQR_C 4, 5, 6, 7
#define GEN_PQR_P 2, 3
#define GEN_PQR_Q 4, 5
#define GEN_PQR_R 6, 7
#define REC_P_DEFINE() {} #define SYN_Q_DEFINE() {}
#define REC_P_STRIDE 4 #define SYN_Q_D 0, 1, 2, 3
#define REC_P_X 0, 1, 2, 3 #define SYN_Q_X 4, 5, 6, 7
#define REC_Q_DEFINE() {} #define SYN_R_DEFINE() {}
#define REC_Q_STRIDE 4 #define SYN_R_D 0, 1, 2, 3
#define REC_Q_X 0, 1, 2, 3 #define SYN_R_X 4, 5, 6, 7
#define REC_R_DEFINE() {} #define SYN_PQ_DEFINE() {}
#define REC_R_STRIDE 4 #define SYN_PQ_D 0, 1, 2, 3
#define REC_R_X 0, 1, 2, 3 #define SYN_PQ_X 4, 5, 6, 7
#define REC_PQ_DEFINE() {}
#define REC_PQ_STRIDE 2 #define REC_PQ_STRIDE 2
#define REC_PQ_DEFINE() {}
#define REC_PQ_X 0, 1 #define REC_PQ_X 0, 1
#define REC_PQ_Y 2, 3 #define REC_PQ_Y 2, 3
#define REC_PQ_D 4, 5 #define REC_PQ_T 4, 5
#define SYN_PR_DEFINE() {}
#define SYN_PR_D 0, 1, 2, 3
#define SYN_PR_X 4, 5, 6, 7
#define REC_PR_DEFINE() {}
#define REC_PR_STRIDE 2 #define REC_PR_STRIDE 2
#define REC_PR_DEFINE() {}
#define REC_PR_X 0, 1 #define REC_PR_X 0, 1
#define REC_PR_Y 2, 3 #define REC_PR_Y 2, 3
#define REC_PR_D 4, 5 #define REC_PR_T 4, 5
#define SYN_QR_DEFINE() {}
#define SYN_QR_D 0, 1, 2, 3
#define SYN_QR_X 4, 5, 6, 7
#define REC_QR_DEFINE() {}
#define REC_QR_STRIDE 2 #define REC_QR_STRIDE 2
#define REC_QR_DEFINE() {}
#define REC_QR_X 0, 1 #define REC_QR_X 0, 1
#define REC_QR_Y 2, 3 #define REC_QR_Y 2, 3
#define REC_QR_D 4, 5 #define REC_QR_T 4, 5
#define SYN_PQR_DEFINE() {}
#define SYN_PQR_D 0, 1, 2, 3
#define SYN_PQR_X 4, 5, 6, 7
#define REC_PQR_DEFINE() {}
#define REC_PQR_STRIDE 2 #define REC_PQR_STRIDE 2
#define REC_PQR_DEFINE() {}
#define REC_PQR_X 0, 1 #define REC_PQR_X 0, 1
#define REC_PQR_Y 2, 3 #define REC_PQR_Y 2, 3
#define REC_PQR_Z 4, 5 #define REC_PQR_Z 4, 5
#define REC_PQR_D 6, 7
#define REC_PQR_XS 6, 7 #define REC_PQR_XS 6, 7
#define REC_PQR_YS 8, 9 #define REC_PQR_YS 8, 9
@ -403,13 +430,8 @@ DEFINE_REC_METHODS(ssse3);
static boolean_t static boolean_t
raidz_will_ssse3_work(void) raidz_will_ssse3_work(void)
{ {
/* ABD Bringup -- vector code not ready */
#if 1
return (B_FALSE);
#else
return (zfs_sse_available() && zfs_sse2_available() && return (zfs_sse_available() && zfs_sse2_available() &&
zfs_ssse3_available()); zfs_ssse3_available());
#endif
} }
const raidz_impl_ops_t vdev_raidz_ssse3_impl = { const raidz_impl_ops_t vdev_raidz_ssse3_impl = {