Fix BLAKE3 tuneable and module loading on Linux and FreeBSD

Apply similar options to BLAKE3 as it is done for zfs_fletcher_4_impl.

The zfs module parameter on Linux changes from icp_blake3_impl to
zfs_blake3_impl.

You can check and set it on Linux via sysfs like this:
```
[bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl
cycle [fastest] generic sse2 sse41 avx2

[bash]# echo sse2 > /sys/module/zfs/parameters/zfs_blake3_impl
[bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl
cycle fastest generic [sse2] sse41 avx2
```

The modprobe module parameters may also be used now:
```
[bash]# modprobe zfs zfs_blake3_impl=sse41
[bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl
cycle fastest generic sse2 [sse41] avx2
```

On FreeBSD the BLAKE3 implementation can be set via sysctl like this:
```
[bsd]# sysctl vfs.zfs.blake3_impl
vfs.zfs.blake3_impl: cycle [fastest] generic sse2 sse41 avx2
[bsd]# sysctl vfs.zfs.blake3_impl=sse2
vfs.zfs.blake3_impl: cycle [fastest] generic sse2 sse41 avx2 \
  -> cycle fastest generic [sse2] sse41 avx2
```

This commit changes also some Blake3 internals like these:
- blake3_impl_ops_t was renamed to blake3_ops_t
- all functions are named blake3_impl_NAME() now

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Co-authored-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13725
This commit is contained in:
Tino Reichardt 2022-08-03 18:36:41 +02:00 committed by Brian Behlendorf
parent 7dee043af5
commit 75e8b5ad84
10 changed files with 273 additions and 197 deletions

View File

@ -6413,7 +6413,7 @@ ztest_blake3(ztest_ds_t *zd, uint64_t id)
void *res2 = &zc_res2; void *res2 = &zc_res2;
/* BLAKE3_KEY_LEN = 32 */ /* BLAKE3_KEY_LEN = 32 */
VERIFY0(blake3_set_impl_name("generic")); VERIFY0(blake3_impl_setname("generic"));
templ = abd_checksum_blake3_tmpl_init(&salt); templ = abd_checksum_blake3_tmpl_init(&salt);
Blake3_InitKeyed(&ctx, salt_ptr); Blake3_InitKeyed(&ctx, salt_ptr);
Blake3_Update(&ctx, buf, size); Blake3_Update(&ctx, buf, size);
@ -6422,7 +6422,7 @@ ztest_blake3(ztest_ds_t *zd, uint64_t id)
ZIO_CHECKSUM_BSWAP(&zc_ref2); ZIO_CHECKSUM_BSWAP(&zc_ref2);
abd_checksum_blake3_tmpl_free(templ); abd_checksum_blake3_tmpl_free(templ);
VERIFY0(blake3_set_impl_name("cycle")); VERIFY0(blake3_impl_setname("cycle"));
while (run_count-- > 0) { while (run_count-- > 0) {
/* Test current implementation */ /* Test current implementation */

View File

@ -31,10 +31,6 @@
#include <sys/sysctl.h> #include <sys/sysctl.h>
#define EXPORT_SYMBOL(x)
#define module_param(a, b, c)
#define MODULE_PARM_DESC(a, b)
#define ZMOD_RW CTLFLAG_RWTUN #define ZMOD_RW CTLFLAG_RWTUN
#define ZMOD_RD CTLFLAG_RDTUN #define ZMOD_RD CTLFLAG_RDTUN
@ -98,6 +94,9 @@
#define fletcher_4_param_set_args(var) \ #define fletcher_4_param_set_args(var) \
CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A" CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A"
#define blake3_param_set_args(var) \
CTLTYPE_STRING, NULL, 0, blake3_param, "A"
#include <sys/kernel.h> #include <sys/kernel.h>
#define module_init(fn) \ #define module_init(fn) \
static void \ static void \

View File

@ -72,7 +72,7 @@ typedef struct {
*/ */
uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN]; uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
/* const blake3_impl_ops_t *ops */ /* const blake3_ops_t *ops */
const void *ops; const void *ops;
} BLAKE3_CTX; } BLAKE3_CTX;
@ -97,26 +97,23 @@ extern void **blake3_per_cpu_ctx;
extern void blake3_per_cpu_ctx_init(void); extern void blake3_per_cpu_ctx_init(void);
extern void blake3_per_cpu_ctx_fini(void); extern void blake3_per_cpu_ctx_fini(void);
/* return number of supported implementations */ /* get count of supported implementations */
extern int blake3_get_impl_count(void); extern uint32_t blake3_impl_getcnt(void);
/* return id of selected implementation */ /* get id of selected implementation */
extern int blake3_get_impl_id(void); extern uint32_t blake3_impl_getid(void);
/* return name of selected implementation */ /* get name of selected implementation */
extern const char *blake3_get_impl_name(void); extern const char *blake3_impl_getname(void);
/* setup id as fastest implementation */ /* setup id as fastest implementation */
extern void blake3_set_impl_fastest(uint32_t id); extern void blake3_impl_set_fastest(uint32_t id);
/* set implementation by id */ /* set implementation by id */
extern void blake3_set_impl_id(uint32_t id); extern void blake3_impl_setid(uint32_t id);
/* set implementation by name */ /* set implementation by name */
extern int blake3_set_impl_name(const char *name); extern int blake3_impl_setname(const char *name);
/* set startup implementation */
extern void blake3_setup_impl(void);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -129,7 +129,7 @@ static output_t make_output(const uint32_t input_cv[8],
* bytes. For that reason, chaining values in the CV stack are represented as * bytes. For that reason, chaining values in the CV stack are represented as
* bytes. * bytes.
*/ */
static void output_chaining_value(const blake3_impl_ops_t *ops, static void output_chaining_value(const blake3_ops_t *ops,
const output_t *ctx, uint8_t cv[32]) const output_t *ctx, uint8_t cv[32])
{ {
uint32_t cv_words[8]; uint32_t cv_words[8];
@ -139,7 +139,7 @@ static void output_chaining_value(const blake3_impl_ops_t *ops,
store_cv_words(cv, cv_words); store_cv_words(cv, cv_words);
} }
static void output_root_bytes(const blake3_impl_ops_t *ops, const output_t *ctx, static void output_root_bytes(const blake3_ops_t *ops, const output_t *ctx,
uint64_t seek, uint8_t *out, size_t out_len) uint64_t seek, uint8_t *out, size_t out_len)
{ {
uint64_t output_block_counter = seek / 64; uint64_t output_block_counter = seek / 64;
@ -163,7 +163,7 @@ static void output_root_bytes(const blake3_impl_ops_t *ops, const output_t *ctx,
} }
} }
static void chunk_state_update(const blake3_impl_ops_t *ops, static void chunk_state_update(const blake3_ops_t *ops,
blake3_chunk_state_t *ctx, const uint8_t *input, size_t input_len) blake3_chunk_state_t *ctx, const uint8_t *input, size_t input_len)
{ {
if (ctx->buf_len > 0) { if (ctx->buf_len > 0) {
@ -230,7 +230,7 @@ static size_t left_len(size_t content_len)
* number of chunks hashed. These chunks are never the root and never empty; * number of chunks hashed. These chunks are never the root and never empty;
* those cases use a different codepath. * those cases use a different codepath.
*/ */
static size_t compress_chunks_parallel(const blake3_impl_ops_t *ops, static size_t compress_chunks_parallel(const blake3_ops_t *ops,
const uint8_t *input, size_t input_len, const uint32_t key[8], const uint8_t *input, size_t input_len, const uint32_t key[8],
uint64_t chunk_counter, uint8_t flags, uint8_t *out) uint64_t chunk_counter, uint8_t flags, uint8_t *out)
{ {
@ -274,7 +274,7 @@ static size_t compress_chunks_parallel(const blake3_impl_ops_t *ops,
* return it as an additional output.) These parents are never the root and * return it as an additional output.) These parents are never the root and
* never empty; those cases use a different codepath. * never empty; those cases use a different codepath.
*/ */
static size_t compress_parents_parallel(const blake3_impl_ops_t *ops, static size_t compress_parents_parallel(const blake3_ops_t *ops,
const uint8_t *child_chaining_values, size_t num_chaining_values, const uint8_t *child_chaining_values, size_t num_chaining_values,
const uint32_t key[8], uint8_t flags, uint8_t *out) const uint32_t key[8], uint8_t flags, uint8_t *out)
{ {
@ -320,7 +320,7 @@ static size_t compress_parents_parallel(const blake3_impl_ops_t *ops,
* of implementing this special rule? Because we don't want to limit SIMD or * of implementing this special rule? Because we don't want to limit SIMD or
* multi-threading parallelism for that update(). * multi-threading parallelism for that update().
*/ */
static size_t blake3_compress_subtree_wide(const blake3_impl_ops_t *ops, static size_t blake3_compress_subtree_wide(const blake3_ops_t *ops,
const uint8_t *input, size_t input_len, const uint32_t key[8], const uint8_t *input, size_t input_len, const uint32_t key[8],
uint64_t chunk_counter, uint8_t flags, uint8_t *out) uint64_t chunk_counter, uint8_t flags, uint8_t *out)
{ {
@ -406,7 +406,7 @@ static size_t blake3_compress_subtree_wide(const blake3_impl_ops_t *ops,
* As with compress_subtree_wide(), this function is not used on inputs of 1 * As with compress_subtree_wide(), this function is not used on inputs of 1
* chunk or less. That's a different codepath. * chunk or less. That's a different codepath.
*/ */
static void compress_subtree_to_parent_node(const blake3_impl_ops_t *ops, static void compress_subtree_to_parent_node(const blake3_ops_t *ops,
const uint8_t *input, size_t input_len, const uint32_t key[8], const uint8_t *input, size_t input_len, const uint32_t key[8],
uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN]) uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN])
{ {

View File

@ -192,7 +192,7 @@ static inline boolean_t blake3_is_generic_supported(void)
return (B_TRUE); return (B_TRUE);
} }
const blake3_impl_ops_t blake3_generic_impl = { const blake3_ops_t blake3_generic_impl = {
.compress_in_place = blake3_compress_in_place_generic, .compress_in_place = blake3_compress_in_place_generic,
.compress_xof = blake3_compress_xof_generic, .compress_xof = blake3_compress_xof_generic,
.hash_many = blake3_hash_many_generic, .hash_many = blake3_hash_many_generic,

View File

@ -28,7 +28,7 @@
#include "blake3_impl.h" #include "blake3_impl.h"
static const blake3_impl_ops_t *const blake3_impls[] = { static const blake3_ops_t *const blake3_impls[] = {
&blake3_generic_impl, &blake3_generic_impl,
#if defined(__aarch64__) || \ #if defined(__aarch64__) || \
(defined(__x86_64) && defined(HAVE_SSE2)) || \ (defined(__x86_64) && defined(HAVE_SSE2)) || \
@ -48,160 +48,199 @@ static const blake3_impl_ops_t *const blake3_impls[] = {
#endif #endif
}; };
/* this pointer holds current ops for implementation */ /* Select BLAKE3 implementation */
static const blake3_impl_ops_t *blake3_selected_impl = &blake3_generic_impl;
/* special implementation selections */
#define IMPL_FASTEST (UINT32_MAX) #define IMPL_FASTEST (UINT32_MAX)
#define IMPL_CYCLE (UINT32_MAX-1) #define IMPL_CYCLE (UINT32_MAX - 1)
#define IMPL_USER (UINT32_MAX-2)
#define IMPL_PARAM (UINT32_MAX-3)
#define IMPL_READ(i) (*(volatile uint32_t *) &(i)) #define IMPL_READ(i) (*(volatile uint32_t *) &(i))
static uint32_t icp_blake3_impl = IMPL_FASTEST;
#define BLAKE3_IMPL_NAME_MAX 16 /* Indicate that benchmark has been done */
static boolean_t blake3_initialized = B_FALSE;
/* id of fastest implementation */ /* Implementation that contains the fastest methods */
static uint32_t blake3_fastest_id = 0; static blake3_ops_t blake3_fastest_impl = {
.name = "fastest"
};
/* currently used id */ /* Hold all supported implementations */
static uint32_t blake3_current_id = 0; static const blake3_ops_t *blake3_supp_impls[ARRAY_SIZE(blake3_impls)];
static uint32_t blake3_supp_impls_cnt = 0;
/* id of module parameter (-1 == unused) */ /* Currently selected implementation */
static int blake3_param_id = -1; static uint32_t blake3_impl_chosen = IMPL_FASTEST;
/* return number of supported implementations */ static struct blake3_impl_selector {
int const char *name;
blake3_get_impl_count(void) uint32_t sel;
} blake3_impl_selectors[] = {
{ "cycle", IMPL_CYCLE },
{ "fastest", IMPL_FASTEST }
};
/* check the supported implementations */
static void blake3_impl_init(void)
{ {
static int impls = 0; int i, c;
int i;
if (impls) /* init only once */
return (impls); if (likely(blake3_initialized))
return;
for (i = 0; i < ARRAY_SIZE(blake3_impls); i++) { /* move supported implementations into blake3_supp_impls */
if (!blake3_impls[i]->is_supported()) continue; for (i = 0, c = 0; i < ARRAY_SIZE(blake3_impls); i++) {
impls++; const blake3_ops_t *impl = blake3_impls[i];
if (impl->is_supported && impl->is_supported())
blake3_supp_impls[c++] = impl;
} }
blake3_supp_impls_cnt = c;
return (impls); /* first init generic impl, may be changed via set_fastest() */
memcpy(&blake3_fastest_impl, blake3_impls[0],
sizeof (blake3_fastest_impl));
blake3_initialized = B_TRUE;
} }
/* return id of selected implementation */ /* get number of supported implementations */
int uint32_t
blake3_get_impl_id(void) blake3_impl_getcnt(void)
{ {
return (blake3_current_id); blake3_impl_init();
return (blake3_supp_impls_cnt);
} }
/* return name of selected implementation */ /* get id of selected implementation */
uint32_t
blake3_impl_getid(void)
{
return (IMPL_READ(blake3_impl_chosen));
}
/* get name of selected implementation */
const char * const char *
blake3_get_impl_name(void) blake3_impl_getname(void)
{ {
return (blake3_selected_impl->name); uint32_t impl = IMPL_READ(blake3_impl_chosen);
blake3_impl_init();
switch (impl) {
case IMPL_FASTEST:
return ("fastest");
case IMPL_CYCLE:
return ("cycle");
default:
return (blake3_supp_impls[impl]->name);
}
} }
/* setup id as fastest implementation */ /* setup id as fastest implementation */
void void
blake3_set_impl_fastest(uint32_t id) blake3_impl_set_fastest(uint32_t id)
{ {
blake3_fastest_id = id; /* setup fastest impl */
memcpy(&blake3_fastest_impl, blake3_supp_impls[id],
sizeof (blake3_fastest_impl));
} }
/* set implementation by id */ /* set implementation by id */
void void
blake3_set_impl_id(uint32_t id) blake3_impl_setid(uint32_t id)
{ {
int i, cid; blake3_impl_init();
switch (id) {
/* select fastest */ case IMPL_FASTEST:
if (id == IMPL_FASTEST) atomic_swap_32(&blake3_impl_chosen, IMPL_FASTEST);
id = blake3_fastest_id; break;
case IMPL_CYCLE:
/* select next or first */ atomic_swap_32(&blake3_impl_chosen, IMPL_CYCLE);
if (id == IMPL_CYCLE) break;
id = (++blake3_current_id) % blake3_get_impl_count(); default:
ASSERT3U(id, >=, 0);
/* 0..N for the real impl */ ASSERT3U(id, <, blake3_supp_impls_cnt);
for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) { atomic_swap_32(&blake3_impl_chosen, id);
if (!blake3_impls[i]->is_supported()) continue; break;
if (cid == id) {
blake3_current_id = cid;
blake3_selected_impl = blake3_impls[i];
return;
}
cid++;
} }
} }
/* set implementation by name */ /* set implementation by name */
int int
blake3_set_impl_name(const char *name) blake3_impl_setname(const char *val)
{ {
int i, cid; uint32_t impl = IMPL_READ(blake3_impl_chosen);
size_t val_len;
int i, err = -EINVAL;
if (strcmp(name, "fastest") == 0) { blake3_impl_init();
atomic_swap_32(&icp_blake3_impl, IMPL_FASTEST); val_len = strlen(val);
blake3_set_impl_id(IMPL_FASTEST); while ((val_len > 0) && !!isspace(val[val_len-1])) /* trim '\n' */
return (0); val_len--;
} else if (strcmp(name, "cycle") == 0) {
atomic_swap_32(&icp_blake3_impl, IMPL_CYCLE); /* check mandatory implementations */
blake3_set_impl_id(IMPL_CYCLE); for (i = 0; i < ARRAY_SIZE(blake3_impl_selectors); i++) {
return (0); const char *name = blake3_impl_selectors[i].name;
if (val_len == strlen(name) &&
strncmp(val, name, val_len) == 0) {
impl = blake3_impl_selectors[i].sel;
err = 0;
break;
}
} }
for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) { if (err != 0 && blake3_initialized) {
if (!blake3_impls[i]->is_supported()) continue; /* check all supported implementations */
if (strcmp(name, blake3_impls[i]->name) == 0) { for (i = 0; i < blake3_supp_impls_cnt; i++) {
if (icp_blake3_impl == IMPL_PARAM) { const char *name = blake3_supp_impls[i]->name;
blake3_param_id = cid;
return (0); if (val_len == strlen(name) &&
strncmp(val, name, val_len) == 0) {
impl = i;
err = 0;
break;
} }
blake3_selected_impl = blake3_impls[i];
blake3_current_id = cid;
return (0);
} }
cid++;
} }
return (-EINVAL); if (err == 0) {
atomic_swap_32(&blake3_impl_chosen, impl);
}
return (err);
} }
/* setup implementation */ const blake3_ops_t *
void
blake3_setup_impl(void)
{
switch (IMPL_READ(icp_blake3_impl)) {
case IMPL_PARAM:
blake3_set_impl_id(blake3_param_id);
atomic_swap_32(&icp_blake3_impl, IMPL_USER);
break;
case IMPL_FASTEST:
blake3_set_impl_id(IMPL_FASTEST);
break;
case IMPL_CYCLE:
blake3_set_impl_id(IMPL_CYCLE);
break;
default:
blake3_set_impl_id(blake3_current_id);
break;
}
}
/* return selected implementation */
const blake3_impl_ops_t *
blake3_impl_get_ops(void) blake3_impl_get_ops(void)
{ {
/* each call to ops will cycle */ const blake3_ops_t *ops = NULL;
if (icp_blake3_impl == IMPL_CYCLE) uint32_t impl = IMPL_READ(blake3_impl_chosen);
blake3_set_impl_id(IMPL_CYCLE);
return (blake3_selected_impl); blake3_impl_init();
switch (impl) {
case IMPL_FASTEST:
ASSERT(blake3_initialized);
ops = &blake3_fastest_impl;
break;
case IMPL_CYCLE:
/* Cycle through supported implementations */
ASSERT(blake3_initialized);
ASSERT3U(blake3_supp_impls_cnt, >, 0);
static uint32_t cycle_count = 0;
uint32_t idx = (++cycle_count) % blake3_supp_impls_cnt;
ops = blake3_supp_impls[idx];
break;
default:
ASSERT3U(blake3_supp_impls_cnt, >, 0);
ASSERT3U(impl, <, blake3_supp_impls_cnt);
ops = blake3_supp_impls[impl];
break;
}
ASSERT3P(ops, !=, NULL);
return (ops);
} }
#if defined(_KERNEL) #if defined(_KERNEL)
void **blake3_per_cpu_ctx; void **blake3_per_cpu_ctx;
void void
@ -215,6 +254,9 @@ blake3_per_cpu_ctx_init(void)
blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX), blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX),
KM_SLEEP); KM_SLEEP);
} }
/* init once in kernel mode */
blake3_impl_init();
} }
void void
@ -227,58 +269,94 @@ blake3_per_cpu_ctx_fini(void)
memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *)); memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *));
kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *)); kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *));
} }
#endif
#if defined(_KERNEL) && defined(__linux__) #define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ")
static int
icp_blake3_impl_set(const char *name, zfs_kernel_param_t *kp)
{
char req_name[BLAKE3_IMPL_NAME_MAX];
size_t i;
/* sanitize input */ #if defined(__linux__)
i = strnlen(name, BLAKE3_IMPL_NAME_MAX);
if (i == 0 || i >= BLAKE3_IMPL_NAME_MAX)
return (-EINVAL);
strlcpy(req_name, name, BLAKE3_IMPL_NAME_MAX);
while (i > 0 && isspace(req_name[i-1]))
i--;
req_name[i] = '\0';
atomic_swap_32(&icp_blake3_impl, IMPL_PARAM);
return (blake3_set_impl_name(req_name));
}
static int static int
icp_blake3_impl_get(char *buffer, zfs_kernel_param_t *kp) blake3_param_get(char *buffer, zfs_kernel_param_t *unused)
{ {
int i, cid, cnt = 0; const uint32_t impl = IMPL_READ(blake3_impl_chosen);
char *fmt; char *fmt;
int cnt = 0;
/* cycling */ /* cycling */
fmt = (icp_blake3_impl == IMPL_CYCLE) ? "[cycle] " : "cycle "; fmt = IMPL_FMT(impl, IMPL_CYCLE);
cnt += sprintf(buffer + cnt, fmt); cnt += sprintf(buffer + cnt, fmt, "cycle");
/* fastest one */ /* list fastest */
fmt = (icp_blake3_impl == IMPL_FASTEST) ? "[fastest] " : "fastest "; fmt = IMPL_FMT(impl, IMPL_FASTEST);
cnt += sprintf(buffer + cnt, fmt); cnt += sprintf(buffer + cnt, fmt, "fastest");
/* user selected */ /* list all supported implementations */
for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) { for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) {
if (!blake3_impls[i]->is_supported()) continue; fmt = IMPL_FMT(impl, i);
fmt = (icp_blake3_impl == IMPL_USER && cnt += sprintf(buffer + cnt, fmt,
cid == blake3_current_id) ? "[%s] " : "%s "; blake3_supp_impls[i]->name);
cnt += sprintf(buffer + cnt, fmt, blake3_impls[i]->name);
cid++;
} }
buffer[cnt] = 0;
return (cnt); return (cnt);
} }
module_param_call(icp_blake3_impl, icp_blake3_impl_set, icp_blake3_impl_get, static int
NULL, 0644); blake3_param_set(const char *val, zfs_kernel_param_t *unused)
MODULE_PARM_DESC(icp_blake3_impl, "Select BLAKE3 implementation."); {
(void) unused;
return (blake3_impl_setname(val));
}
#elif defined(__FreeBSD__)
#include <sys/sbuf.h>
static int
blake3_param(ZFS_MODULE_PARAM_ARGS)
{
int err;
if (req->newptr == NULL) {
const uint32_t impl = IMPL_READ(blake3_impl_chosen);
const int init_buflen = 64;
const char *fmt;
struct sbuf *s;
s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req);
/* cycling */
fmt = IMPL_FMT(impl, IMPL_CYCLE);
(void) sbuf_printf(s, fmt, "cycle");
/* list fastest */
fmt = IMPL_FMT(impl, IMPL_FASTEST);
(void) sbuf_printf(s, fmt, "fastest");
/* list all supported implementations */
for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) {
fmt = IMPL_FMT(impl, i);
(void) sbuf_printf(s, fmt, blake3_supp_impls[i]->name);
}
err = sbuf_finish(s);
sbuf_delete(s);
return (err);
}
char buf[16];
err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
if (err) {
return (err);
}
return (-blake3_impl_setname(buf));
}
#endif
#undef IMPL_FMT
ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, blake3_impl,
blake3_param_set, blake3_param_get, ZMOD_RW, \
"Select BLAKE3 implementation.");
#endif #endif

View File

@ -62,31 +62,31 @@ typedef struct blake3_impl_ops {
blake3_is_supported_f is_supported; blake3_is_supported_f is_supported;
int degree; int degree;
const char *name; const char *name;
} blake3_impl_ops_t; } blake3_ops_t;
/* Return selected BLAKE3 implementation ops */ /* Return selected BLAKE3 implementation ops */
extern const blake3_impl_ops_t *blake3_impl_get_ops(void); extern const blake3_ops_t *blake3_impl_get_ops(void);
extern const blake3_impl_ops_t blake3_generic_impl; extern const blake3_ops_t blake3_generic_impl;
#if defined(__aarch64__) || \ #if defined(__aarch64__) || \
(defined(__x86_64) && defined(HAVE_SSE2)) || \ (defined(__x86_64) && defined(HAVE_SSE2)) || \
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
extern const blake3_impl_ops_t blake3_sse2_impl; extern const blake3_ops_t blake3_sse2_impl;
#endif #endif
#if defined(__aarch64__) || \ #if defined(__aarch64__) || \
(defined(__x86_64) && defined(HAVE_SSE4_1)) || \ (defined(__x86_64) && defined(HAVE_SSE4_1)) || \
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
extern const blake3_impl_ops_t blake3_sse41_impl; extern const blake3_ops_t blake3_sse41_impl;
#endif #endif
#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2) #if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
extern const blake3_impl_ops_t blake3_avx2_impl; extern const blake3_ops_t blake3_avx2_impl;
#endif #endif
#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL) #if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
extern const blake3_impl_ops_t blake3_avx512_impl; extern const blake3_ops_t blake3_avx512_impl;
#endif #endif
#if defined(__x86_64) #if defined(__x86_64)

View File

@ -81,7 +81,7 @@ static boolean_t blake3_is_sse2_supported(void)
#endif #endif
} }
const blake3_impl_ops_t blake3_sse2_impl = { const blake3_ops_t blake3_sse2_impl = {
.compress_in_place = blake3_compress_in_place_sse2, .compress_in_place = blake3_compress_in_place_sse2,
.compress_xof = blake3_compress_xof_sse2, .compress_xof = blake3_compress_xof_sse2,
.hash_many = blake3_hash_many_sse2, .hash_many = blake3_hash_many_sse2,
@ -147,7 +147,7 @@ static boolean_t blake3_is_sse41_supported(void)
#endif #endif
} }
const blake3_impl_ops_t blake3_sse41_impl = { const blake3_ops_t blake3_sse41_impl = {
.compress_in_place = blake3_compress_in_place_sse41, .compress_in_place = blake3_compress_in_place_sse41,
.compress_xof = blake3_compress_xof_sse41, .compress_xof = blake3_compress_xof_sse41,
.hash_many = blake3_hash_many_sse41, .hash_many = blake3_hash_many_sse41,
@ -179,7 +179,7 @@ static boolean_t blake3_is_avx2_supported(void)
zfs_avx2_available()); zfs_avx2_available());
} }
const blake3_impl_ops_t blake3_avx2_impl = { const blake3_ops_t blake3_avx2_impl = {
.compress_in_place = blake3_compress_in_place_sse41, .compress_in_place = blake3_compress_in_place_sse41,
.compress_xof = blake3_compress_xof_sse41, .compress_xof = blake3_compress_xof_sse41,
.hash_many = blake3_hash_many_avx2, .hash_many = blake3_hash_many_avx2,
@ -237,7 +237,7 @@ static boolean_t blake3_is_avx512_supported(void)
zfs_avx512vl_available()); zfs_avx512vl_available());
} }
const blake3_impl_ops_t blake3_avx512_impl = { const blake3_ops_t blake3_avx512_impl = {
.compress_in_place = blake3_compress_in_place_avx512, .compress_in_place = blake3_compress_in_place_avx512,
.compress_xof = blake3_compress_xof_avx512, .compress_xof = blake3_compress_xof_avx512,
.hash_many = blake3_hash_many_avx512, .hash_many = blake3_hash_many_avx512,

View File

@ -244,12 +244,13 @@ chksum_benchmark(void)
#endif #endif
chksum_stat_t *cs; chksum_stat_t *cs;
int cbid = 0, id; int cbid = 0;
uint64_t max = 0; uint64_t max = 0;
uint32_t id, id_save;
/* space for the benchmark times */ /* space for the benchmark times */
chksum_stat_cnt = 4; chksum_stat_cnt = 4;
chksum_stat_cnt += blake3_get_impl_count(); chksum_stat_cnt += blake3_impl_getcnt();
chksum_stat_data = (chksum_stat_t *)kmem_zalloc( chksum_stat_data = (chksum_stat_t *)kmem_zalloc(
sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP); sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP);
@ -290,20 +291,24 @@ chksum_benchmark(void)
chksum_benchit(cs); chksum_benchit(cs);
/* blake3 */ /* blake3 */
for (id = 0; id < blake3_get_impl_count(); id++) { id_save = blake3_impl_getid();
blake3_set_impl_id(id); for (id = 0; id < blake3_impl_getcnt(); id++) {
blake3_impl_setid(id);
cs = &chksum_stat_data[cbid++]; cs = &chksum_stat_data[cbid++];
cs->init = abd_checksum_blake3_tmpl_init; cs->init = abd_checksum_blake3_tmpl_init;
cs->func = abd_checksum_blake3_native; cs->func = abd_checksum_blake3_native;
cs->free = abd_checksum_blake3_tmpl_free; cs->free = abd_checksum_blake3_tmpl_free;
cs->name = "blake3"; cs->name = "blake3";
cs->impl = blake3_get_impl_name(); cs->impl = blake3_impl_getname();
chksum_benchit(cs); chksum_benchit(cs);
if (cs->bs256k > max) { if (cs->bs256k > max) {
max = cs->bs256k; max = cs->bs256k;
blake3_set_impl_fastest(id); blake3_impl_set_fastest(id);
} }
} }
/* restore initial value */
blake3_impl_setid(id_save);
} }
void void
@ -329,9 +334,6 @@ chksum_init(void)
chksum_kstat_addr); chksum_kstat_addr);
kstat_install(chksum_kstat); kstat_install(chksum_kstat);
} }
/* setup implementations */
blake3_setup_impl();
} }
void void

View File

@ -497,9 +497,9 @@ main(int argc, char *argv[])
} }
(void) printf("Running algorithm correctness tests:\n"); (void) printf("Running algorithm correctness tests:\n");
for (id = 0; id < blake3_get_impl_count(); id++) { for (id = 0; id < blake3_impl_getcnt(); id++) {
blake3_set_impl_id(id); blake3_impl_setid(id);
const char *name = blake3_get_impl_name(); const char *name = blake3_impl_getname();
dprintf("Result for BLAKE3-%s:\n", name); dprintf("Result for BLAKE3-%s:\n", name);
for (i = 0; TestArray[i].hash; i++) { for (i = 0; TestArray[i].hash; i++) {
blake3_test_t *cur = &TestArray[i]; blake3_test_t *cur = &TestArray[i];
@ -565,9 +565,9 @@ main(int argc, char *argv[])
} while (0) } while (0)
printf("Running performance tests (hashing 1024 MiB of data):\n"); printf("Running performance tests (hashing 1024 MiB of data):\n");
for (id = 0; id < blake3_get_impl_count(); id++) { for (id = 0; id < blake3_impl_getcnt(); id++) {
blake3_set_impl_id(id); blake3_impl_setid(id);
const char *name = blake3_get_impl_name(); const char *name = blake3_impl_getname();
BLAKE3_PERF_TEST(name, 256); BLAKE3_PERF_TEST(name, 256);
} }