Fix BLAKE3 tuneable and module loading on Linux and FreeBSD
Apply similar options to BLAKE3 as it is done for zfs_fletcher_4_impl. The zfs module parameter on Linux changes from icp_blake3_impl to zfs_blake3_impl. You can check and set it on Linux via sysfs like this: ``` [bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl cycle [fastest] generic sse2 sse41 avx2 [bash]# echo sse2 > /sys/module/zfs/parameters/zfs_blake3_impl [bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl cycle fastest generic [sse2] sse41 avx2 ``` The modprobe module parameters may also be used now: ``` [bash]# modprobe zfs zfs_blake3_impl=sse41 [bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl cycle fastest generic sse2 [sse41] avx2 ``` On FreeBSD the BLAKE3 implementation can be set via sysctl like this: ``` [bsd]# sysctl vfs.zfs.blake3_impl vfs.zfs.blake3_impl: cycle [fastest] generic sse2 sse41 avx2 [bsd]# sysctl vfs.zfs.blake3_impl=sse2 vfs.zfs.blake3_impl: cycle [fastest] generic sse2 sse41 avx2 \ -> cycle fastest generic [sse2] sse41 avx2 ``` This commit changes also some Blake3 internals like these: - blake3_impl_ops_t was renamed to blake3_ops_t - all functions are named blake3_impl_NAME() now Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Ryan Moeller <ryan@iXsystems.com> Co-authored-by: Ryan Moeller <ryan@iXsystems.com> Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de> Closes #13725
This commit is contained in:
parent
7dee043af5
commit
75e8b5ad84
|
@ -6413,7 +6413,7 @@ ztest_blake3(ztest_ds_t *zd, uint64_t id)
|
||||||
void *res2 = &zc_res2;
|
void *res2 = &zc_res2;
|
||||||
|
|
||||||
/* BLAKE3_KEY_LEN = 32 */
|
/* BLAKE3_KEY_LEN = 32 */
|
||||||
VERIFY0(blake3_set_impl_name("generic"));
|
VERIFY0(blake3_impl_setname("generic"));
|
||||||
templ = abd_checksum_blake3_tmpl_init(&salt);
|
templ = abd_checksum_blake3_tmpl_init(&salt);
|
||||||
Blake3_InitKeyed(&ctx, salt_ptr);
|
Blake3_InitKeyed(&ctx, salt_ptr);
|
||||||
Blake3_Update(&ctx, buf, size);
|
Blake3_Update(&ctx, buf, size);
|
||||||
|
@ -6422,7 +6422,7 @@ ztest_blake3(ztest_ds_t *zd, uint64_t id)
|
||||||
ZIO_CHECKSUM_BSWAP(&zc_ref2);
|
ZIO_CHECKSUM_BSWAP(&zc_ref2);
|
||||||
abd_checksum_blake3_tmpl_free(templ);
|
abd_checksum_blake3_tmpl_free(templ);
|
||||||
|
|
||||||
VERIFY0(blake3_set_impl_name("cycle"));
|
VERIFY0(blake3_impl_setname("cycle"));
|
||||||
while (run_count-- > 0) {
|
while (run_count-- > 0) {
|
||||||
|
|
||||||
/* Test current implementation */
|
/* Test current implementation */
|
||||||
|
|
|
@ -31,10 +31,6 @@
|
||||||
|
|
||||||
#include <sys/sysctl.h>
|
#include <sys/sysctl.h>
|
||||||
|
|
||||||
#define EXPORT_SYMBOL(x)
|
|
||||||
#define module_param(a, b, c)
|
|
||||||
#define MODULE_PARM_DESC(a, b)
|
|
||||||
|
|
||||||
#define ZMOD_RW CTLFLAG_RWTUN
|
#define ZMOD_RW CTLFLAG_RWTUN
|
||||||
#define ZMOD_RD CTLFLAG_RDTUN
|
#define ZMOD_RD CTLFLAG_RDTUN
|
||||||
|
|
||||||
|
@ -98,6 +94,9 @@
|
||||||
#define fletcher_4_param_set_args(var) \
|
#define fletcher_4_param_set_args(var) \
|
||||||
CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A"
|
CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A"
|
||||||
|
|
||||||
|
#define blake3_param_set_args(var) \
|
||||||
|
CTLTYPE_STRING, NULL, 0, blake3_param, "A"
|
||||||
|
|
||||||
#include <sys/kernel.h>
|
#include <sys/kernel.h>
|
||||||
#define module_init(fn) \
|
#define module_init(fn) \
|
||||||
static void \
|
static void \
|
||||||
|
|
|
@ -72,7 +72,7 @@ typedef struct {
|
||||||
*/
|
*/
|
||||||
uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
|
uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
|
||||||
|
|
||||||
/* const blake3_impl_ops_t *ops */
|
/* const blake3_ops_t *ops */
|
||||||
const void *ops;
|
const void *ops;
|
||||||
} BLAKE3_CTX;
|
} BLAKE3_CTX;
|
||||||
|
|
||||||
|
@ -97,26 +97,23 @@ extern void **blake3_per_cpu_ctx;
|
||||||
extern void blake3_per_cpu_ctx_init(void);
|
extern void blake3_per_cpu_ctx_init(void);
|
||||||
extern void blake3_per_cpu_ctx_fini(void);
|
extern void blake3_per_cpu_ctx_fini(void);
|
||||||
|
|
||||||
/* return number of supported implementations */
|
/* get count of supported implementations */
|
||||||
extern int blake3_get_impl_count(void);
|
extern uint32_t blake3_impl_getcnt(void);
|
||||||
|
|
||||||
/* return id of selected implementation */
|
/* get id of selected implementation */
|
||||||
extern int blake3_get_impl_id(void);
|
extern uint32_t blake3_impl_getid(void);
|
||||||
|
|
||||||
/* return name of selected implementation */
|
/* get name of selected implementation */
|
||||||
extern const char *blake3_get_impl_name(void);
|
extern const char *blake3_impl_getname(void);
|
||||||
|
|
||||||
/* setup id as fastest implementation */
|
/* setup id as fastest implementation */
|
||||||
extern void blake3_set_impl_fastest(uint32_t id);
|
extern void blake3_impl_set_fastest(uint32_t id);
|
||||||
|
|
||||||
/* set implementation by id */
|
/* set implementation by id */
|
||||||
extern void blake3_set_impl_id(uint32_t id);
|
extern void blake3_impl_setid(uint32_t id);
|
||||||
|
|
||||||
/* set implementation by name */
|
/* set implementation by name */
|
||||||
extern int blake3_set_impl_name(const char *name);
|
extern int blake3_impl_setname(const char *name);
|
||||||
|
|
||||||
/* set startup implementation */
|
|
||||||
extern void blake3_setup_impl(void);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -129,7 +129,7 @@ static output_t make_output(const uint32_t input_cv[8],
|
||||||
* bytes. For that reason, chaining values in the CV stack are represented as
|
* bytes. For that reason, chaining values in the CV stack are represented as
|
||||||
* bytes.
|
* bytes.
|
||||||
*/
|
*/
|
||||||
static void output_chaining_value(const blake3_impl_ops_t *ops,
|
static void output_chaining_value(const blake3_ops_t *ops,
|
||||||
const output_t *ctx, uint8_t cv[32])
|
const output_t *ctx, uint8_t cv[32])
|
||||||
{
|
{
|
||||||
uint32_t cv_words[8];
|
uint32_t cv_words[8];
|
||||||
|
@ -139,7 +139,7 @@ static void output_chaining_value(const blake3_impl_ops_t *ops,
|
||||||
store_cv_words(cv, cv_words);
|
store_cv_words(cv, cv_words);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void output_root_bytes(const blake3_impl_ops_t *ops, const output_t *ctx,
|
static void output_root_bytes(const blake3_ops_t *ops, const output_t *ctx,
|
||||||
uint64_t seek, uint8_t *out, size_t out_len)
|
uint64_t seek, uint8_t *out, size_t out_len)
|
||||||
{
|
{
|
||||||
uint64_t output_block_counter = seek / 64;
|
uint64_t output_block_counter = seek / 64;
|
||||||
|
@ -163,7 +163,7 @@ static void output_root_bytes(const blake3_impl_ops_t *ops, const output_t *ctx,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void chunk_state_update(const blake3_impl_ops_t *ops,
|
static void chunk_state_update(const blake3_ops_t *ops,
|
||||||
blake3_chunk_state_t *ctx, const uint8_t *input, size_t input_len)
|
blake3_chunk_state_t *ctx, const uint8_t *input, size_t input_len)
|
||||||
{
|
{
|
||||||
if (ctx->buf_len > 0) {
|
if (ctx->buf_len > 0) {
|
||||||
|
@ -230,7 +230,7 @@ static size_t left_len(size_t content_len)
|
||||||
* number of chunks hashed. These chunks are never the root and never empty;
|
* number of chunks hashed. These chunks are never the root and never empty;
|
||||||
* those cases use a different codepath.
|
* those cases use a different codepath.
|
||||||
*/
|
*/
|
||||||
static size_t compress_chunks_parallel(const blake3_impl_ops_t *ops,
|
static size_t compress_chunks_parallel(const blake3_ops_t *ops,
|
||||||
const uint8_t *input, size_t input_len, const uint32_t key[8],
|
const uint8_t *input, size_t input_len, const uint32_t key[8],
|
||||||
uint64_t chunk_counter, uint8_t flags, uint8_t *out)
|
uint64_t chunk_counter, uint8_t flags, uint8_t *out)
|
||||||
{
|
{
|
||||||
|
@ -274,7 +274,7 @@ static size_t compress_chunks_parallel(const blake3_impl_ops_t *ops,
|
||||||
* return it as an additional output.) These parents are never the root and
|
* return it as an additional output.) These parents are never the root and
|
||||||
* never empty; those cases use a different codepath.
|
* never empty; those cases use a different codepath.
|
||||||
*/
|
*/
|
||||||
static size_t compress_parents_parallel(const blake3_impl_ops_t *ops,
|
static size_t compress_parents_parallel(const blake3_ops_t *ops,
|
||||||
const uint8_t *child_chaining_values, size_t num_chaining_values,
|
const uint8_t *child_chaining_values, size_t num_chaining_values,
|
||||||
const uint32_t key[8], uint8_t flags, uint8_t *out)
|
const uint32_t key[8], uint8_t flags, uint8_t *out)
|
||||||
{
|
{
|
||||||
|
@ -320,7 +320,7 @@ static size_t compress_parents_parallel(const blake3_impl_ops_t *ops,
|
||||||
* of implementing this special rule? Because we don't want to limit SIMD or
|
* of implementing this special rule? Because we don't want to limit SIMD or
|
||||||
* multi-threading parallelism for that update().
|
* multi-threading parallelism for that update().
|
||||||
*/
|
*/
|
||||||
static size_t blake3_compress_subtree_wide(const blake3_impl_ops_t *ops,
|
static size_t blake3_compress_subtree_wide(const blake3_ops_t *ops,
|
||||||
const uint8_t *input, size_t input_len, const uint32_t key[8],
|
const uint8_t *input, size_t input_len, const uint32_t key[8],
|
||||||
uint64_t chunk_counter, uint8_t flags, uint8_t *out)
|
uint64_t chunk_counter, uint8_t flags, uint8_t *out)
|
||||||
{
|
{
|
||||||
|
@ -406,7 +406,7 @@ static size_t blake3_compress_subtree_wide(const blake3_impl_ops_t *ops,
|
||||||
* As with compress_subtree_wide(), this function is not used on inputs of 1
|
* As with compress_subtree_wide(), this function is not used on inputs of 1
|
||||||
* chunk or less. That's a different codepath.
|
* chunk or less. That's a different codepath.
|
||||||
*/
|
*/
|
||||||
static void compress_subtree_to_parent_node(const blake3_impl_ops_t *ops,
|
static void compress_subtree_to_parent_node(const blake3_ops_t *ops,
|
||||||
const uint8_t *input, size_t input_len, const uint32_t key[8],
|
const uint8_t *input, size_t input_len, const uint32_t key[8],
|
||||||
uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN])
|
uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN])
|
||||||
{
|
{
|
||||||
|
|
|
@ -192,7 +192,7 @@ static inline boolean_t blake3_is_generic_supported(void)
|
||||||
return (B_TRUE);
|
return (B_TRUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
const blake3_impl_ops_t blake3_generic_impl = {
|
const blake3_ops_t blake3_generic_impl = {
|
||||||
.compress_in_place = blake3_compress_in_place_generic,
|
.compress_in_place = blake3_compress_in_place_generic,
|
||||||
.compress_xof = blake3_compress_xof_generic,
|
.compress_xof = blake3_compress_xof_generic,
|
||||||
.hash_many = blake3_hash_many_generic,
|
.hash_many = blake3_hash_many_generic,
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
|
|
||||||
#include "blake3_impl.h"
|
#include "blake3_impl.h"
|
||||||
|
|
||||||
static const blake3_impl_ops_t *const blake3_impls[] = {
|
static const blake3_ops_t *const blake3_impls[] = {
|
||||||
&blake3_generic_impl,
|
&blake3_generic_impl,
|
||||||
#if defined(__aarch64__) || \
|
#if defined(__aarch64__) || \
|
||||||
(defined(__x86_64) && defined(HAVE_SSE2)) || \
|
(defined(__x86_64) && defined(HAVE_SSE2)) || \
|
||||||
|
@ -48,160 +48,199 @@ static const blake3_impl_ops_t *const blake3_impls[] = {
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
/* this pointer holds current ops for implementation */
|
/* Select BLAKE3 implementation */
|
||||||
static const blake3_impl_ops_t *blake3_selected_impl = &blake3_generic_impl;
|
|
||||||
|
|
||||||
/* special implementation selections */
|
|
||||||
#define IMPL_FASTEST (UINT32_MAX)
|
#define IMPL_FASTEST (UINT32_MAX)
|
||||||
#define IMPL_CYCLE (UINT32_MAX-1)
|
#define IMPL_CYCLE (UINT32_MAX - 1)
|
||||||
#define IMPL_USER (UINT32_MAX-2)
|
|
||||||
#define IMPL_PARAM (UINT32_MAX-3)
|
|
||||||
|
|
||||||
#define IMPL_READ(i) (*(volatile uint32_t *) &(i))
|
#define IMPL_READ(i) (*(volatile uint32_t *) &(i))
|
||||||
static uint32_t icp_blake3_impl = IMPL_FASTEST;
|
|
||||||
|
|
||||||
#define BLAKE3_IMPL_NAME_MAX 16
|
/* Indicate that benchmark has been done */
|
||||||
|
static boolean_t blake3_initialized = B_FALSE;
|
||||||
|
|
||||||
/* id of fastest implementation */
|
/* Implementation that contains the fastest methods */
|
||||||
static uint32_t blake3_fastest_id = 0;
|
static blake3_ops_t blake3_fastest_impl = {
|
||||||
|
.name = "fastest"
|
||||||
|
};
|
||||||
|
|
||||||
/* currently used id */
|
/* Hold all supported implementations */
|
||||||
static uint32_t blake3_current_id = 0;
|
static const blake3_ops_t *blake3_supp_impls[ARRAY_SIZE(blake3_impls)];
|
||||||
|
static uint32_t blake3_supp_impls_cnt = 0;
|
||||||
|
|
||||||
/* id of module parameter (-1 == unused) */
|
/* Currently selected implementation */
|
||||||
static int blake3_param_id = -1;
|
static uint32_t blake3_impl_chosen = IMPL_FASTEST;
|
||||||
|
|
||||||
/* return number of supported implementations */
|
static struct blake3_impl_selector {
|
||||||
int
|
const char *name;
|
||||||
blake3_get_impl_count(void)
|
uint32_t sel;
|
||||||
|
} blake3_impl_selectors[] = {
|
||||||
|
{ "cycle", IMPL_CYCLE },
|
||||||
|
{ "fastest", IMPL_FASTEST }
|
||||||
|
};
|
||||||
|
|
||||||
|
/* check the supported implementations */
|
||||||
|
static void blake3_impl_init(void)
|
||||||
{
|
{
|
||||||
static int impls = 0;
|
int i, c;
|
||||||
int i;
|
|
||||||
|
|
||||||
if (impls)
|
/* init only once */
|
||||||
return (impls);
|
if (likely(blake3_initialized))
|
||||||
|
return;
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(blake3_impls); i++) {
|
/* move supported implementations into blake3_supp_impls */
|
||||||
if (!blake3_impls[i]->is_supported()) continue;
|
for (i = 0, c = 0; i < ARRAY_SIZE(blake3_impls); i++) {
|
||||||
impls++;
|
const blake3_ops_t *impl = blake3_impls[i];
|
||||||
|
|
||||||
|
if (impl->is_supported && impl->is_supported())
|
||||||
|
blake3_supp_impls[c++] = impl;
|
||||||
}
|
}
|
||||||
|
blake3_supp_impls_cnt = c;
|
||||||
|
|
||||||
return (impls);
|
/* first init generic impl, may be changed via set_fastest() */
|
||||||
|
memcpy(&blake3_fastest_impl, blake3_impls[0],
|
||||||
|
sizeof (blake3_fastest_impl));
|
||||||
|
blake3_initialized = B_TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* return id of selected implementation */
|
/* get number of supported implementations */
|
||||||
int
|
uint32_t
|
||||||
blake3_get_impl_id(void)
|
blake3_impl_getcnt(void)
|
||||||
{
|
{
|
||||||
return (blake3_current_id);
|
blake3_impl_init();
|
||||||
|
return (blake3_supp_impls_cnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* return name of selected implementation */
|
/* get id of selected implementation */
|
||||||
|
uint32_t
|
||||||
|
blake3_impl_getid(void)
|
||||||
|
{
|
||||||
|
return (IMPL_READ(blake3_impl_chosen));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* get name of selected implementation */
|
||||||
const char *
|
const char *
|
||||||
blake3_get_impl_name(void)
|
blake3_impl_getname(void)
|
||||||
{
|
{
|
||||||
return (blake3_selected_impl->name);
|
uint32_t impl = IMPL_READ(blake3_impl_chosen);
|
||||||
|
|
||||||
|
blake3_impl_init();
|
||||||
|
switch (impl) {
|
||||||
|
case IMPL_FASTEST:
|
||||||
|
return ("fastest");
|
||||||
|
case IMPL_CYCLE:
|
||||||
|
return ("cycle");
|
||||||
|
default:
|
||||||
|
return (blake3_supp_impls[impl]->name);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* setup id as fastest implementation */
|
/* setup id as fastest implementation */
|
||||||
void
|
void
|
||||||
blake3_set_impl_fastest(uint32_t id)
|
blake3_impl_set_fastest(uint32_t id)
|
||||||
{
|
{
|
||||||
blake3_fastest_id = id;
|
/* setup fastest impl */
|
||||||
|
memcpy(&blake3_fastest_impl, blake3_supp_impls[id],
|
||||||
|
sizeof (blake3_fastest_impl));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* set implementation by id */
|
/* set implementation by id */
|
||||||
void
|
void
|
||||||
blake3_set_impl_id(uint32_t id)
|
blake3_impl_setid(uint32_t id)
|
||||||
{
|
{
|
||||||
int i, cid;
|
blake3_impl_init();
|
||||||
|
switch (id) {
|
||||||
/* select fastest */
|
case IMPL_FASTEST:
|
||||||
if (id == IMPL_FASTEST)
|
atomic_swap_32(&blake3_impl_chosen, IMPL_FASTEST);
|
||||||
id = blake3_fastest_id;
|
break;
|
||||||
|
case IMPL_CYCLE:
|
||||||
/* select next or first */
|
atomic_swap_32(&blake3_impl_chosen, IMPL_CYCLE);
|
||||||
if (id == IMPL_CYCLE)
|
break;
|
||||||
id = (++blake3_current_id) % blake3_get_impl_count();
|
default:
|
||||||
|
ASSERT3U(id, >=, 0);
|
||||||
/* 0..N for the real impl */
|
ASSERT3U(id, <, blake3_supp_impls_cnt);
|
||||||
for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
|
atomic_swap_32(&blake3_impl_chosen, id);
|
||||||
if (!blake3_impls[i]->is_supported()) continue;
|
break;
|
||||||
if (cid == id) {
|
|
||||||
blake3_current_id = cid;
|
|
||||||
blake3_selected_impl = blake3_impls[i];
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
cid++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* set implementation by name */
|
/* set implementation by name */
|
||||||
int
|
int
|
||||||
blake3_set_impl_name(const char *name)
|
blake3_impl_setname(const char *val)
|
||||||
{
|
{
|
||||||
int i, cid;
|
uint32_t impl = IMPL_READ(blake3_impl_chosen);
|
||||||
|
size_t val_len;
|
||||||
|
int i, err = -EINVAL;
|
||||||
|
|
||||||
if (strcmp(name, "fastest") == 0) {
|
blake3_impl_init();
|
||||||
atomic_swap_32(&icp_blake3_impl, IMPL_FASTEST);
|
val_len = strlen(val);
|
||||||
blake3_set_impl_id(IMPL_FASTEST);
|
while ((val_len > 0) && !!isspace(val[val_len-1])) /* trim '\n' */
|
||||||
return (0);
|
val_len--;
|
||||||
} else if (strcmp(name, "cycle") == 0) {
|
|
||||||
atomic_swap_32(&icp_blake3_impl, IMPL_CYCLE);
|
/* check mandatory implementations */
|
||||||
blake3_set_impl_id(IMPL_CYCLE);
|
for (i = 0; i < ARRAY_SIZE(blake3_impl_selectors); i++) {
|
||||||
return (0);
|
const char *name = blake3_impl_selectors[i].name;
|
||||||
|
|
||||||
|
if (val_len == strlen(name) &&
|
||||||
|
strncmp(val, name, val_len) == 0) {
|
||||||
|
impl = blake3_impl_selectors[i].sel;
|
||||||
|
err = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
|
if (err != 0 && blake3_initialized) {
|
||||||
if (!blake3_impls[i]->is_supported()) continue;
|
/* check all supported implementations */
|
||||||
if (strcmp(name, blake3_impls[i]->name) == 0) {
|
for (i = 0; i < blake3_supp_impls_cnt; i++) {
|
||||||
if (icp_blake3_impl == IMPL_PARAM) {
|
const char *name = blake3_supp_impls[i]->name;
|
||||||
blake3_param_id = cid;
|
|
||||||
return (0);
|
if (val_len == strlen(name) &&
|
||||||
|
strncmp(val, name, val_len) == 0) {
|
||||||
|
impl = i;
|
||||||
|
err = 0;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
blake3_selected_impl = blake3_impls[i];
|
|
||||||
blake3_current_id = cid;
|
|
||||||
return (0);
|
|
||||||
}
|
}
|
||||||
cid++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return (-EINVAL);
|
if (err == 0) {
|
||||||
|
atomic_swap_32(&blake3_impl_chosen, impl);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* setup implementation */
|
const blake3_ops_t *
|
||||||
void
|
|
||||||
blake3_setup_impl(void)
|
|
||||||
{
|
|
||||||
switch (IMPL_READ(icp_blake3_impl)) {
|
|
||||||
case IMPL_PARAM:
|
|
||||||
blake3_set_impl_id(blake3_param_id);
|
|
||||||
atomic_swap_32(&icp_blake3_impl, IMPL_USER);
|
|
||||||
break;
|
|
||||||
case IMPL_FASTEST:
|
|
||||||
blake3_set_impl_id(IMPL_FASTEST);
|
|
||||||
break;
|
|
||||||
case IMPL_CYCLE:
|
|
||||||
blake3_set_impl_id(IMPL_CYCLE);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
blake3_set_impl_id(blake3_current_id);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* return selected implementation */
|
|
||||||
const blake3_impl_ops_t *
|
|
||||||
blake3_impl_get_ops(void)
|
blake3_impl_get_ops(void)
|
||||||
{
|
{
|
||||||
/* each call to ops will cycle */
|
const blake3_ops_t *ops = NULL;
|
||||||
if (icp_blake3_impl == IMPL_CYCLE)
|
uint32_t impl = IMPL_READ(blake3_impl_chosen);
|
||||||
blake3_set_impl_id(IMPL_CYCLE);
|
|
||||||
|
|
||||||
return (blake3_selected_impl);
|
blake3_impl_init();
|
||||||
|
switch (impl) {
|
||||||
|
case IMPL_FASTEST:
|
||||||
|
ASSERT(blake3_initialized);
|
||||||
|
ops = &blake3_fastest_impl;
|
||||||
|
break;
|
||||||
|
case IMPL_CYCLE:
|
||||||
|
/* Cycle through supported implementations */
|
||||||
|
ASSERT(blake3_initialized);
|
||||||
|
ASSERT3U(blake3_supp_impls_cnt, >, 0);
|
||||||
|
static uint32_t cycle_count = 0;
|
||||||
|
uint32_t idx = (++cycle_count) % blake3_supp_impls_cnt;
|
||||||
|
ops = blake3_supp_impls[idx];
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ASSERT3U(blake3_supp_impls_cnt, >, 0);
|
||||||
|
ASSERT3U(impl, <, blake3_supp_impls_cnt);
|
||||||
|
ops = blake3_supp_impls[impl];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT3P(ops, !=, NULL);
|
||||||
|
return (ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
|
|
||||||
void **blake3_per_cpu_ctx;
|
void **blake3_per_cpu_ctx;
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -215,6 +254,9 @@ blake3_per_cpu_ctx_init(void)
|
||||||
blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX),
|
blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX),
|
||||||
KM_SLEEP);
|
KM_SLEEP);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* init once in kernel mode */
|
||||||
|
blake3_impl_init();
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -227,58 +269,94 @@ blake3_per_cpu_ctx_fini(void)
|
||||||
memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *));
|
memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *));
|
||||||
kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *));
|
kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *));
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(_KERNEL) && defined(__linux__)
|
#define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ")
|
||||||
static int
|
|
||||||
icp_blake3_impl_set(const char *name, zfs_kernel_param_t *kp)
|
|
||||||
{
|
|
||||||
char req_name[BLAKE3_IMPL_NAME_MAX];
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
/* sanitize input */
|
#if defined(__linux__)
|
||||||
i = strnlen(name, BLAKE3_IMPL_NAME_MAX);
|
|
||||||
if (i == 0 || i >= BLAKE3_IMPL_NAME_MAX)
|
|
||||||
return (-EINVAL);
|
|
||||||
|
|
||||||
strlcpy(req_name, name, BLAKE3_IMPL_NAME_MAX);
|
|
||||||
while (i > 0 && isspace(req_name[i-1]))
|
|
||||||
i--;
|
|
||||||
req_name[i] = '\0';
|
|
||||||
|
|
||||||
atomic_swap_32(&icp_blake3_impl, IMPL_PARAM);
|
|
||||||
return (blake3_set_impl_name(req_name));
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
icp_blake3_impl_get(char *buffer, zfs_kernel_param_t *kp)
|
blake3_param_get(char *buffer, zfs_kernel_param_t *unused)
|
||||||
{
|
{
|
||||||
int i, cid, cnt = 0;
|
const uint32_t impl = IMPL_READ(blake3_impl_chosen);
|
||||||
char *fmt;
|
char *fmt;
|
||||||
|
int cnt = 0;
|
||||||
|
|
||||||
/* cycling */
|
/* cycling */
|
||||||
fmt = (icp_blake3_impl == IMPL_CYCLE) ? "[cycle] " : "cycle ";
|
fmt = IMPL_FMT(impl, IMPL_CYCLE);
|
||||||
cnt += sprintf(buffer + cnt, fmt);
|
cnt += sprintf(buffer + cnt, fmt, "cycle");
|
||||||
|
|
||||||
/* fastest one */
|
/* list fastest */
|
||||||
fmt = (icp_blake3_impl == IMPL_FASTEST) ? "[fastest] " : "fastest ";
|
fmt = IMPL_FMT(impl, IMPL_FASTEST);
|
||||||
cnt += sprintf(buffer + cnt, fmt);
|
cnt += sprintf(buffer + cnt, fmt, "fastest");
|
||||||
|
|
||||||
/* user selected */
|
/* list all supported implementations */
|
||||||
for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
|
for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) {
|
||||||
if (!blake3_impls[i]->is_supported()) continue;
|
fmt = IMPL_FMT(impl, i);
|
||||||
fmt = (icp_blake3_impl == IMPL_USER &&
|
cnt += sprintf(buffer + cnt, fmt,
|
||||||
cid == blake3_current_id) ? "[%s] " : "%s ";
|
blake3_supp_impls[i]->name);
|
||||||
cnt += sprintf(buffer + cnt, fmt, blake3_impls[i]->name);
|
|
||||||
cid++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
buffer[cnt] = 0;
|
|
||||||
|
|
||||||
return (cnt);
|
return (cnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
module_param_call(icp_blake3_impl, icp_blake3_impl_set, icp_blake3_impl_get,
|
static int
|
||||||
NULL, 0644);
|
blake3_param_set(const char *val, zfs_kernel_param_t *unused)
|
||||||
MODULE_PARM_DESC(icp_blake3_impl, "Select BLAKE3 implementation.");
|
{
|
||||||
|
(void) unused;
|
||||||
|
return (blake3_impl_setname(val));
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined(__FreeBSD__)
|
||||||
|
|
||||||
|
#include <sys/sbuf.h>
|
||||||
|
|
||||||
|
static int
|
||||||
|
blake3_param(ZFS_MODULE_PARAM_ARGS)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
|
||||||
|
if (req->newptr == NULL) {
|
||||||
|
const uint32_t impl = IMPL_READ(blake3_impl_chosen);
|
||||||
|
const int init_buflen = 64;
|
||||||
|
const char *fmt;
|
||||||
|
struct sbuf *s;
|
||||||
|
|
||||||
|
s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req);
|
||||||
|
|
||||||
|
/* cycling */
|
||||||
|
fmt = IMPL_FMT(impl, IMPL_CYCLE);
|
||||||
|
(void) sbuf_printf(s, fmt, "cycle");
|
||||||
|
|
||||||
|
/* list fastest */
|
||||||
|
fmt = IMPL_FMT(impl, IMPL_FASTEST);
|
||||||
|
(void) sbuf_printf(s, fmt, "fastest");
|
||||||
|
|
||||||
|
/* list all supported implementations */
|
||||||
|
for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) {
|
||||||
|
fmt = IMPL_FMT(impl, i);
|
||||||
|
(void) sbuf_printf(s, fmt, blake3_supp_impls[i]->name);
|
||||||
|
}
|
||||||
|
|
||||||
|
err = sbuf_finish(s);
|
||||||
|
sbuf_delete(s);
|
||||||
|
|
||||||
|
return (err);
|
||||||
|
}
|
||||||
|
|
||||||
|
char buf[16];
|
||||||
|
|
||||||
|
err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
|
||||||
|
if (err) {
|
||||||
|
return (err);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (-blake3_impl_setname(buf));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#undef IMPL_FMT
|
||||||
|
|
||||||
|
ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, blake3_impl,
|
||||||
|
blake3_param_set, blake3_param_get, ZMOD_RW, \
|
||||||
|
"Select BLAKE3 implementation.");
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -62,31 +62,31 @@ typedef struct blake3_impl_ops {
|
||||||
blake3_is_supported_f is_supported;
|
blake3_is_supported_f is_supported;
|
||||||
int degree;
|
int degree;
|
||||||
const char *name;
|
const char *name;
|
||||||
} blake3_impl_ops_t;
|
} blake3_ops_t;
|
||||||
|
|
||||||
/* Return selected BLAKE3 implementation ops */
|
/* Return selected BLAKE3 implementation ops */
|
||||||
extern const blake3_impl_ops_t *blake3_impl_get_ops(void);
|
extern const blake3_ops_t *blake3_impl_get_ops(void);
|
||||||
|
|
||||||
extern const blake3_impl_ops_t blake3_generic_impl;
|
extern const blake3_ops_t blake3_generic_impl;
|
||||||
|
|
||||||
#if defined(__aarch64__) || \
|
#if defined(__aarch64__) || \
|
||||||
(defined(__x86_64) && defined(HAVE_SSE2)) || \
|
(defined(__x86_64) && defined(HAVE_SSE2)) || \
|
||||||
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
||||||
extern const blake3_impl_ops_t blake3_sse2_impl;
|
extern const blake3_ops_t blake3_sse2_impl;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__aarch64__) || \
|
#if defined(__aarch64__) || \
|
||||||
(defined(__x86_64) && defined(HAVE_SSE4_1)) || \
|
(defined(__x86_64) && defined(HAVE_SSE4_1)) || \
|
||||||
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
||||||
extern const blake3_impl_ops_t blake3_sse41_impl;
|
extern const blake3_ops_t blake3_sse41_impl;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
|
#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
|
||||||
extern const blake3_impl_ops_t blake3_avx2_impl;
|
extern const blake3_ops_t blake3_avx2_impl;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
|
#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
|
||||||
extern const blake3_impl_ops_t blake3_avx512_impl;
|
extern const blake3_ops_t blake3_avx512_impl;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__x86_64)
|
#if defined(__x86_64)
|
||||||
|
|
|
@ -81,7 +81,7 @@ static boolean_t blake3_is_sse2_supported(void)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
const blake3_impl_ops_t blake3_sse2_impl = {
|
const blake3_ops_t blake3_sse2_impl = {
|
||||||
.compress_in_place = blake3_compress_in_place_sse2,
|
.compress_in_place = blake3_compress_in_place_sse2,
|
||||||
.compress_xof = blake3_compress_xof_sse2,
|
.compress_xof = blake3_compress_xof_sse2,
|
||||||
.hash_many = blake3_hash_many_sse2,
|
.hash_many = blake3_hash_many_sse2,
|
||||||
|
@ -147,7 +147,7 @@ static boolean_t blake3_is_sse41_supported(void)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
const blake3_impl_ops_t blake3_sse41_impl = {
|
const blake3_ops_t blake3_sse41_impl = {
|
||||||
.compress_in_place = blake3_compress_in_place_sse41,
|
.compress_in_place = blake3_compress_in_place_sse41,
|
||||||
.compress_xof = blake3_compress_xof_sse41,
|
.compress_xof = blake3_compress_xof_sse41,
|
||||||
.hash_many = blake3_hash_many_sse41,
|
.hash_many = blake3_hash_many_sse41,
|
||||||
|
@ -179,7 +179,7 @@ static boolean_t blake3_is_avx2_supported(void)
|
||||||
zfs_avx2_available());
|
zfs_avx2_available());
|
||||||
}
|
}
|
||||||
|
|
||||||
const blake3_impl_ops_t blake3_avx2_impl = {
|
const blake3_ops_t blake3_avx2_impl = {
|
||||||
.compress_in_place = blake3_compress_in_place_sse41,
|
.compress_in_place = blake3_compress_in_place_sse41,
|
||||||
.compress_xof = blake3_compress_xof_sse41,
|
.compress_xof = blake3_compress_xof_sse41,
|
||||||
.hash_many = blake3_hash_many_avx2,
|
.hash_many = blake3_hash_many_avx2,
|
||||||
|
@ -237,7 +237,7 @@ static boolean_t blake3_is_avx512_supported(void)
|
||||||
zfs_avx512vl_available());
|
zfs_avx512vl_available());
|
||||||
}
|
}
|
||||||
|
|
||||||
const blake3_impl_ops_t blake3_avx512_impl = {
|
const blake3_ops_t blake3_avx512_impl = {
|
||||||
.compress_in_place = blake3_compress_in_place_avx512,
|
.compress_in_place = blake3_compress_in_place_avx512,
|
||||||
.compress_xof = blake3_compress_xof_avx512,
|
.compress_xof = blake3_compress_xof_avx512,
|
||||||
.hash_many = blake3_hash_many_avx512,
|
.hash_many = blake3_hash_many_avx512,
|
||||||
|
|
|
@ -244,12 +244,13 @@ chksum_benchmark(void)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
chksum_stat_t *cs;
|
chksum_stat_t *cs;
|
||||||
int cbid = 0, id;
|
int cbid = 0;
|
||||||
uint64_t max = 0;
|
uint64_t max = 0;
|
||||||
|
uint32_t id, id_save;
|
||||||
|
|
||||||
/* space for the benchmark times */
|
/* space for the benchmark times */
|
||||||
chksum_stat_cnt = 4;
|
chksum_stat_cnt = 4;
|
||||||
chksum_stat_cnt += blake3_get_impl_count();
|
chksum_stat_cnt += blake3_impl_getcnt();
|
||||||
chksum_stat_data = (chksum_stat_t *)kmem_zalloc(
|
chksum_stat_data = (chksum_stat_t *)kmem_zalloc(
|
||||||
sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP);
|
sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP);
|
||||||
|
|
||||||
|
@ -290,20 +291,24 @@ chksum_benchmark(void)
|
||||||
chksum_benchit(cs);
|
chksum_benchit(cs);
|
||||||
|
|
||||||
/* blake3 */
|
/* blake3 */
|
||||||
for (id = 0; id < blake3_get_impl_count(); id++) {
|
id_save = blake3_impl_getid();
|
||||||
blake3_set_impl_id(id);
|
for (id = 0; id < blake3_impl_getcnt(); id++) {
|
||||||
|
blake3_impl_setid(id);
|
||||||
cs = &chksum_stat_data[cbid++];
|
cs = &chksum_stat_data[cbid++];
|
||||||
cs->init = abd_checksum_blake3_tmpl_init;
|
cs->init = abd_checksum_blake3_tmpl_init;
|
||||||
cs->func = abd_checksum_blake3_native;
|
cs->func = abd_checksum_blake3_native;
|
||||||
cs->free = abd_checksum_blake3_tmpl_free;
|
cs->free = abd_checksum_blake3_tmpl_free;
|
||||||
cs->name = "blake3";
|
cs->name = "blake3";
|
||||||
cs->impl = blake3_get_impl_name();
|
cs->impl = blake3_impl_getname();
|
||||||
chksum_benchit(cs);
|
chksum_benchit(cs);
|
||||||
if (cs->bs256k > max) {
|
if (cs->bs256k > max) {
|
||||||
max = cs->bs256k;
|
max = cs->bs256k;
|
||||||
blake3_set_impl_fastest(id);
|
blake3_impl_set_fastest(id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* restore initial value */
|
||||||
|
blake3_impl_setid(id_save);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -329,9 +334,6 @@ chksum_init(void)
|
||||||
chksum_kstat_addr);
|
chksum_kstat_addr);
|
||||||
kstat_install(chksum_kstat);
|
kstat_install(chksum_kstat);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* setup implementations */
|
|
||||||
blake3_setup_impl();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
@ -497,9 +497,9 @@ main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
(void) printf("Running algorithm correctness tests:\n");
|
(void) printf("Running algorithm correctness tests:\n");
|
||||||
for (id = 0; id < blake3_get_impl_count(); id++) {
|
for (id = 0; id < blake3_impl_getcnt(); id++) {
|
||||||
blake3_set_impl_id(id);
|
blake3_impl_setid(id);
|
||||||
const char *name = blake3_get_impl_name();
|
const char *name = blake3_impl_getname();
|
||||||
dprintf("Result for BLAKE3-%s:\n", name);
|
dprintf("Result for BLAKE3-%s:\n", name);
|
||||||
for (i = 0; TestArray[i].hash; i++) {
|
for (i = 0; TestArray[i].hash; i++) {
|
||||||
blake3_test_t *cur = &TestArray[i];
|
blake3_test_t *cur = &TestArray[i];
|
||||||
|
@ -565,9 +565,9 @@ main(int argc, char *argv[])
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
printf("Running performance tests (hashing 1024 MiB of data):\n");
|
printf("Running performance tests (hashing 1024 MiB of data):\n");
|
||||||
for (id = 0; id < blake3_get_impl_count(); id++) {
|
for (id = 0; id < blake3_impl_getcnt(); id++) {
|
||||||
blake3_set_impl_id(id);
|
blake3_impl_setid(id);
|
||||||
const char *name = blake3_get_impl_name();
|
const char *name = blake3_impl_getname();
|
||||||
BLAKE3_PERF_TEST(name, 256);
|
BLAKE3_PERF_TEST(name, 256);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue