Update BLAKE3 for using the new impl handling
This commit changes the BLAKE3 implementation handling and also the calls to it from the ztest command. Tested-by: Rich Ercolani <rincebrain@gmail.com> Tested-by: Sebastian Gottschall <s.gottschall@dd-wrt.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de> Closes #13741
This commit is contained in:
parent
4c5fec01a4
commit
f9f9bef22f
|
@ -135,6 +135,7 @@
|
|||
#include <libnvpair.h>
|
||||
#include <libzutil.h>
|
||||
#include <sys/crypto/icp.h>
|
||||
#include <sys/zfs_impl.h>
|
||||
#if (__GLIBC__ && !__UCLIBC__)
|
||||
#include <execinfo.h> /* for backtrace() */
|
||||
#endif
|
||||
|
@ -6410,6 +6411,7 @@ ztest_blake3(ztest_ds_t *zd, uint64_t id)
|
|||
int i, *ptr;
|
||||
uint32_t size;
|
||||
BLAKE3_CTX ctx;
|
||||
const zfs_impl_t *blake3 = zfs_impl_get_ops("blake3");
|
||||
|
||||
size = ztest_random_blocksize();
|
||||
buf = umem_alloc(size, UMEM_NOFAIL);
|
||||
|
@ -6434,7 +6436,7 @@ ztest_blake3(ztest_ds_t *zd, uint64_t id)
|
|||
void *res2 = &zc_res2;
|
||||
|
||||
/* BLAKE3_KEY_LEN = 32 */
|
||||
VERIFY0(blake3_impl_setname("generic"));
|
||||
VERIFY0(blake3->setname("generic"));
|
||||
templ = abd_checksum_blake3_tmpl_init(&salt);
|
||||
Blake3_InitKeyed(&ctx, salt_ptr);
|
||||
Blake3_Update(&ctx, buf, size);
|
||||
|
@ -6443,7 +6445,7 @@ ztest_blake3(ztest_ds_t *zd, uint64_t id)
|
|||
ZIO_CHECKSUM_BSWAP(&zc_ref2);
|
||||
abd_checksum_blake3_tmpl_free(templ);
|
||||
|
||||
VERIFY0(blake3_impl_setname("cycle"));
|
||||
VERIFY0(blake3->setname("cycle"));
|
||||
while (run_count-- > 0) {
|
||||
|
||||
/* Test current implementation */
|
||||
|
|
|
@ -22,11 +22,11 @@
|
|||
/*
|
||||
* Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
|
||||
* Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
|
||||
* Copyright (c) 2021 Tino Reichardt <milky-zfs@mcmilk.de>
|
||||
* Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
|
||||
*/
|
||||
|
||||
#ifndef BLAKE3_H
|
||||
#define BLAKE3_H
|
||||
#ifndef _SYS_BLAKE3_H
|
||||
#define _SYS_BLAKE3_H
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/types.h>
|
||||
|
@ -97,26 +97,8 @@ extern void **blake3_per_cpu_ctx;
|
|||
extern void blake3_per_cpu_ctx_init(void);
|
||||
extern void blake3_per_cpu_ctx_fini(void);
|
||||
|
||||
/* get count of supported implementations */
|
||||
extern uint32_t blake3_impl_getcnt(void);
|
||||
|
||||
/* get id of selected implementation */
|
||||
extern uint32_t blake3_impl_getid(void);
|
||||
|
||||
/* get name of selected implementation */
|
||||
extern const char *blake3_impl_getname(void);
|
||||
|
||||
/* setup id as fastest implementation */
|
||||
extern void blake3_impl_set_fastest(uint32_t id);
|
||||
|
||||
/* set implementation by id */
|
||||
extern void blake3_impl_setid(uint32_t id);
|
||||
|
||||
/* set implementation by name */
|
||||
extern int blake3_impl_setname(const char *name);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* BLAKE3_H */
|
||||
#endif /* _SYS_BLAKE3_H */
|
||||
|
|
|
@ -432,7 +432,7 @@ static void hasher_init_base(BLAKE3_CTX *ctx, const uint32_t key[8],
|
|||
memcpy(ctx->key, key, BLAKE3_KEY_LEN);
|
||||
chunk_state_init(&ctx->chunk, key, flags);
|
||||
ctx->cv_stack_len = 0;
|
||||
ctx->ops = blake3_impl_get_ops();
|
||||
ctx->ops = blake3_get_ops();
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -187,7 +187,8 @@ static inline void blake3_hash_many_generic(const uint8_t * const *inputs,
|
|||
}
|
||||
}
|
||||
|
||||
static inline boolean_t blake3_is_generic_supported(void)
|
||||
/* the generic implementation is always okay */
|
||||
static boolean_t blake3_is_supported(void)
|
||||
{
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
@ -196,7 +197,7 @@ const blake3_ops_t blake3_generic_impl = {
|
|||
.compress_in_place = blake3_compress_in_place_generic,
|
||||
.compress_xof = blake3_compress_xof_generic,
|
||||
.hash_many = blake3_hash_many_generic,
|
||||
.is_supported = blake3_is_generic_supported,
|
||||
.is_supported = blake3_is_supported,
|
||||
.degree = 4,
|
||||
.name = "generic"
|
||||
};
|
||||
|
|
|
@ -24,10 +24,237 @@
|
|||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
#include <sys/zfs_impl.h>
|
||||
#include <sys/blake3.h>
|
||||
#include <sys/simd.h>
|
||||
|
||||
#include "blake3_impl.h"
|
||||
|
||||
#if defined(__aarch64__) || \
|
||||
(defined(__x86_64) && defined(HAVE_SSE2)) || \
|
||||
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
||||
|
||||
extern void zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags);
|
||||
|
||||
extern void zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64]);
|
||||
|
||||
extern void zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
||||
|
||||
static void blake3_compress_in_place_sse2(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter,
|
||||
flags);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static void blake3_compress_xof_sse2(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64]) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags,
|
||||
out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static void blake3_hash_many_sse2(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end, out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static boolean_t blake3_is_sse2_supported(void)
|
||||
{
|
||||
#if defined(__x86_64)
|
||||
return (kfpu_allowed() && zfs_sse2_available());
|
||||
#elif defined(__PPC64__)
|
||||
return (kfpu_allowed() && zfs_vsx_available());
|
||||
#else
|
||||
return (kfpu_allowed());
|
||||
#endif
|
||||
}
|
||||
|
||||
const blake3_ops_t blake3_sse2_impl = {
|
||||
.compress_in_place = blake3_compress_in_place_sse2,
|
||||
.compress_xof = blake3_compress_xof_sse2,
|
||||
.hash_many = blake3_hash_many_sse2,
|
||||
.is_supported = blake3_is_sse2_supported,
|
||||
.degree = 4,
|
||||
.name = "sse2"
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || \
|
||||
(defined(__x86_64) && defined(HAVE_SSE2)) || \
|
||||
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
||||
|
||||
extern void zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags);
|
||||
|
||||
extern void zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64]);
|
||||
|
||||
extern void zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
||||
|
||||
static void blake3_compress_in_place_sse41(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter,
|
||||
flags);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static void blake3_compress_xof_sse41(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64]) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags,
|
||||
out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static void blake3_hash_many_sse41(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end, out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static boolean_t blake3_is_sse41_supported(void)
|
||||
{
|
||||
#if defined(__x86_64)
|
||||
return (kfpu_allowed() && zfs_sse4_1_available());
|
||||
#elif defined(__PPC64__)
|
||||
return (kfpu_allowed() && zfs_vsx_available());
|
||||
#else
|
||||
return (kfpu_allowed());
|
||||
#endif
|
||||
}
|
||||
|
||||
const blake3_ops_t blake3_sse41_impl = {
|
||||
.compress_in_place = blake3_compress_in_place_sse41,
|
||||
.compress_xof = blake3_compress_xof_sse41,
|
||||
.hash_many = blake3_hash_many_sse41,
|
||||
.is_supported = blake3_is_sse41_supported,
|
||||
.degree = 4,
|
||||
.name = "sse41"
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
|
||||
extern void zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
||||
|
||||
static void blake3_hash_many_avx2(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end, out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static boolean_t blake3_is_avx2_supported(void)
|
||||
{
|
||||
return (kfpu_allowed() && zfs_sse4_1_available() &&
|
||||
zfs_avx2_available());
|
||||
}
|
||||
|
||||
const blake3_ops_t
|
||||
blake3_avx2_impl = {
|
||||
.compress_in_place = blake3_compress_in_place_sse41,
|
||||
.compress_xof = blake3_compress_xof_sse41,
|
||||
.hash_many = blake3_hash_many_avx2,
|
||||
.is_supported = blake3_is_avx2_supported,
|
||||
.degree = 8,
|
||||
.name = "avx2"
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
|
||||
extern void zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags);
|
||||
|
||||
extern void zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64]);
|
||||
|
||||
extern void zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
||||
|
||||
static void blake3_compress_in_place_avx512(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter,
|
||||
flags);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static void blake3_compress_xof_avx512(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64]) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags,
|
||||
out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static void blake3_hash_many_avx512(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end, out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static boolean_t blake3_is_avx512_supported(void)
|
||||
{
|
||||
return (kfpu_allowed() && zfs_avx512f_available() &&
|
||||
zfs_avx512vl_available());
|
||||
}
|
||||
|
||||
const blake3_ops_t blake3_avx512_impl = {
|
||||
.compress_in_place = blake3_compress_in_place_avx512,
|
||||
.compress_xof = blake3_compress_xof_avx512,
|
||||
.hash_many = blake3_hash_many_avx512,
|
||||
.is_supported = blake3_is_avx512_supported,
|
||||
.degree = 16,
|
||||
.name = "avx512"
|
||||
};
|
||||
#endif
|
||||
|
||||
extern const blake3_ops_t blake3_generic_impl;
|
||||
|
||||
static const blake3_ops_t *const blake3_impls[] = {
|
||||
&blake3_generic_impl,
|
||||
#if defined(__aarch64__) || \
|
||||
|
@ -48,198 +275,15 @@ static const blake3_ops_t *const blake3_impls[] = {
|
|||
#endif
|
||||
};
|
||||
|
||||
/* Select BLAKE3 implementation */
|
||||
#define IMPL_FASTEST (UINT32_MAX)
|
||||
#define IMPL_CYCLE (UINT32_MAX - 1)
|
||||
|
||||
#define IMPL_READ(i) (*(volatile uint32_t *) &(i))
|
||||
|
||||
/* Indicate that benchmark has been done */
|
||||
static boolean_t blake3_initialized = B_FALSE;
|
||||
|
||||
/* Implementation that contains the fastest methods */
|
||||
static blake3_ops_t blake3_fastest_impl = {
|
||||
.name = "fastest"
|
||||
};
|
||||
|
||||
/* Hold all supported implementations */
|
||||
static const blake3_ops_t *blake3_supp_impls[ARRAY_SIZE(blake3_impls)];
|
||||
static uint32_t blake3_supp_impls_cnt = 0;
|
||||
|
||||
/* Currently selected implementation */
|
||||
static uint32_t blake3_impl_chosen = IMPL_FASTEST;
|
||||
|
||||
static struct blake3_impl_selector {
|
||||
const char *name;
|
||||
uint32_t sel;
|
||||
} blake3_impl_selectors[] = {
|
||||
{ "cycle", IMPL_CYCLE },
|
||||
{ "fastest", IMPL_FASTEST }
|
||||
};
|
||||
|
||||
/* check the supported implementations */
|
||||
static void blake3_impl_init(void)
|
||||
{
|
||||
int i, c;
|
||||
|
||||
/* init only once */
|
||||
if (likely(blake3_initialized))
|
||||
return;
|
||||
|
||||
/* move supported implementations into blake3_supp_impls */
|
||||
for (i = 0, c = 0; i < ARRAY_SIZE(blake3_impls); i++) {
|
||||
const blake3_ops_t *impl = blake3_impls[i];
|
||||
|
||||
if (impl->is_supported && impl->is_supported())
|
||||
blake3_supp_impls[c++] = impl;
|
||||
}
|
||||
blake3_supp_impls_cnt = c;
|
||||
|
||||
/* first init generic impl, may be changed via set_fastest() */
|
||||
memcpy(&blake3_fastest_impl, blake3_impls[0],
|
||||
sizeof (blake3_fastest_impl));
|
||||
blake3_initialized = B_TRUE;
|
||||
}
|
||||
|
||||
/* get number of supported implementations */
|
||||
uint32_t
|
||||
blake3_impl_getcnt(void)
|
||||
{
|
||||
blake3_impl_init();
|
||||
return (blake3_supp_impls_cnt);
|
||||
}
|
||||
|
||||
/* get id of selected implementation */
|
||||
uint32_t
|
||||
blake3_impl_getid(void)
|
||||
{
|
||||
return (IMPL_READ(blake3_impl_chosen));
|
||||
}
|
||||
|
||||
/* get name of selected implementation */
|
||||
const char *
|
||||
blake3_impl_getname(void)
|
||||
{
|
||||
uint32_t impl = IMPL_READ(blake3_impl_chosen);
|
||||
|
||||
blake3_impl_init();
|
||||
switch (impl) {
|
||||
case IMPL_FASTEST:
|
||||
return ("fastest");
|
||||
case IMPL_CYCLE:
|
||||
return ("cycle");
|
||||
default:
|
||||
return (blake3_supp_impls[impl]->name);
|
||||
}
|
||||
}
|
||||
|
||||
/* setup id as fastest implementation */
|
||||
void
|
||||
blake3_impl_set_fastest(uint32_t id)
|
||||
{
|
||||
/* setup fastest impl */
|
||||
memcpy(&blake3_fastest_impl, blake3_supp_impls[id],
|
||||
sizeof (blake3_fastest_impl));
|
||||
}
|
||||
|
||||
/* set implementation by id */
|
||||
void
|
||||
blake3_impl_setid(uint32_t id)
|
||||
{
|
||||
blake3_impl_init();
|
||||
switch (id) {
|
||||
case IMPL_FASTEST:
|
||||
atomic_swap_32(&blake3_impl_chosen, IMPL_FASTEST);
|
||||
break;
|
||||
case IMPL_CYCLE:
|
||||
atomic_swap_32(&blake3_impl_chosen, IMPL_CYCLE);
|
||||
break;
|
||||
default:
|
||||
ASSERT3U(id, <, blake3_supp_impls_cnt);
|
||||
atomic_swap_32(&blake3_impl_chosen, id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* set implementation by name */
|
||||
int
|
||||
blake3_impl_setname(const char *val)
|
||||
{
|
||||
uint32_t impl = IMPL_READ(blake3_impl_chosen);
|
||||
size_t val_len;
|
||||
int i, err = -EINVAL;
|
||||
|
||||
blake3_impl_init();
|
||||
val_len = strlen(val);
|
||||
while ((val_len > 0) && !!isspace(val[val_len-1])) /* trim '\n' */
|
||||
val_len--;
|
||||
|
||||
/* check mandatory implementations */
|
||||
for (i = 0; i < ARRAY_SIZE(blake3_impl_selectors); i++) {
|
||||
const char *name = blake3_impl_selectors[i].name;
|
||||
|
||||
if (val_len == strlen(name) &&
|
||||
strncmp(val, name, val_len) == 0) {
|
||||
impl = blake3_impl_selectors[i].sel;
|
||||
err = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (err != 0 && blake3_initialized) {
|
||||
/* check all supported implementations */
|
||||
for (i = 0; i < blake3_supp_impls_cnt; i++) {
|
||||
const char *name = blake3_supp_impls[i]->name;
|
||||
|
||||
if (val_len == strlen(name) &&
|
||||
strncmp(val, name, val_len) == 0) {
|
||||
impl = i;
|
||||
err = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (err == 0) {
|
||||
atomic_swap_32(&blake3_impl_chosen, impl);
|
||||
}
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
const blake3_ops_t *
|
||||
blake3_impl_get_ops(void)
|
||||
{
|
||||
const blake3_ops_t *ops = NULL;
|
||||
uint32_t impl = IMPL_READ(blake3_impl_chosen);
|
||||
|
||||
blake3_impl_init();
|
||||
switch (impl) {
|
||||
case IMPL_FASTEST:
|
||||
ASSERT(blake3_initialized);
|
||||
ops = &blake3_fastest_impl;
|
||||
break;
|
||||
case IMPL_CYCLE:
|
||||
/* Cycle through supported implementations */
|
||||
ASSERT(blake3_initialized);
|
||||
ASSERT3U(blake3_supp_impls_cnt, >, 0);
|
||||
static uint32_t cycle_count = 0;
|
||||
uint32_t idx = (++cycle_count) % blake3_supp_impls_cnt;
|
||||
ops = blake3_supp_impls[idx];
|
||||
break;
|
||||
default:
|
||||
ASSERT3U(blake3_supp_impls_cnt, >, 0);
|
||||
ASSERT3U(impl, <, blake3_supp_impls_cnt);
|
||||
ops = blake3_supp_impls[impl];
|
||||
break;
|
||||
}
|
||||
|
||||
ASSERT3P(ops, !=, NULL);
|
||||
return (ops);
|
||||
}
|
||||
|
||||
#if defined(_KERNEL)
|
||||
/* use the generic implementation functions */
|
||||
#define IMPL_NAME "blake3"
|
||||
#define IMPL_OPS_T blake3_ops_t
|
||||
#define IMPL_ARRAY blake3_impls
|
||||
#define IMPL_GET_OPS blake3_get_ops
|
||||
#define ZFS_IMPL_OPS zfs_blake3_ops
|
||||
#include <generic_impl.c>
|
||||
|
||||
#ifdef _KERNEL
|
||||
void **blake3_per_cpu_ctx;
|
||||
|
||||
void
|
||||
|
@ -253,9 +297,6 @@ blake3_per_cpu_ctx_init(void)
|
|||
blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX),
|
||||
KM_SLEEP);
|
||||
}
|
||||
|
||||
/* init once in kernel mode */
|
||||
blake3_impl_init();
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -276,7 +317,7 @@ blake3_per_cpu_ctx_fini(void)
|
|||
static int
|
||||
blake3_param_get(char *buffer, zfs_kernel_param_t *unused)
|
||||
{
|
||||
const uint32_t impl = IMPL_READ(blake3_impl_chosen);
|
||||
const uint32_t impl = IMPL_READ(generic_impl_chosen);
|
||||
char *fmt;
|
||||
int cnt = 0;
|
||||
|
||||
|
@ -289,10 +330,11 @@ blake3_param_get(char *buffer, zfs_kernel_param_t *unused)
|
|||
cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, "fastest");
|
||||
|
||||
/* list all supported implementations */
|
||||
for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) {
|
||||
generic_impl_init();
|
||||
for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {
|
||||
fmt = IMPL_FMT(impl, i);
|
||||
cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
|
||||
blake3_supp_impls[i]->name);
|
||||
blake3_impls[i]->name);
|
||||
}
|
||||
|
||||
return (cnt);
|
||||
|
@ -302,7 +344,7 @@ static int
|
|||
blake3_param_set(const char *val, zfs_kernel_param_t *unused)
|
||||
{
|
||||
(void) unused;
|
||||
return (blake3_impl_setname(val));
|
||||
return (generic_impl_setname(val));
|
||||
}
|
||||
|
||||
#elif defined(__FreeBSD__)
|
||||
|
@ -314,8 +356,9 @@ blake3_param(ZFS_MODULE_PARAM_ARGS)
|
|||
{
|
||||
int err;
|
||||
|
||||
generic_impl_init();
|
||||
if (req->newptr == NULL) {
|
||||
const uint32_t impl = IMPL_READ(blake3_impl_chosen);
|
||||
const uint32_t impl = IMPL_READ(generic_impl_chosen);
|
||||
const int init_buflen = 64;
|
||||
const char *fmt;
|
||||
struct sbuf *s;
|
||||
|
@ -331,9 +374,9 @@ blake3_param(ZFS_MODULE_PARAM_ARGS)
|
|||
(void) sbuf_printf(s, fmt, "fastest");
|
||||
|
||||
/* list all supported implementations */
|
||||
for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) {
|
||||
for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {
|
||||
fmt = IMPL_FMT(impl, i);
|
||||
(void) sbuf_printf(s, fmt, blake3_supp_impls[i]->name);
|
||||
(void) sbuf_printf(s, fmt, generic_supp_impls[i]->name);
|
||||
}
|
||||
|
||||
err = sbuf_finish(s);
|
||||
|
@ -349,7 +392,7 @@ blake3_param(ZFS_MODULE_PARAM_ARGS)
|
|||
return (err);
|
||||
}
|
||||
|
||||
return (-blake3_impl_setname(buf));
|
||||
return (-generic_impl_setname(buf));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -25,14 +25,13 @@
|
|||
* Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
|
||||
*/
|
||||
|
||||
#ifndef BLAKE3_IMPL_H
|
||||
#ifndef BLAKE3_IMPL_H
|
||||
#define BLAKE3_IMPL_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/blake3.h>
|
||||
#include <sys/simd.h>
|
||||
#include <sys/asm_linkage.h>
|
||||
|
@ -56,7 +55,7 @@ typedef void (*blake3_hash_many_f)(const uint8_t * const *inputs,
|
|||
|
||||
typedef boolean_t (*blake3_is_supported_f)(void);
|
||||
|
||||
typedef struct blake3_impl_ops {
|
||||
typedef struct {
|
||||
blake3_compress_in_place_f compress_in_place;
|
||||
blake3_compress_xof_f compress_xof;
|
||||
blake3_hash_many_f hash_many;
|
||||
|
@ -65,30 +64,8 @@ typedef struct blake3_impl_ops {
|
|||
const char *name;
|
||||
} blake3_ops_t;
|
||||
|
||||
/* Return selected BLAKE3 implementation ops */
|
||||
extern const blake3_ops_t *blake3_impl_get_ops(void);
|
||||
|
||||
extern const blake3_ops_t blake3_generic_impl;
|
||||
|
||||
#if defined(__aarch64__) || \
|
||||
(defined(__x86_64) && defined(HAVE_SSE2)) || \
|
||||
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
||||
extern const blake3_ops_t blake3_sse2_impl;
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || \
|
||||
(defined(__x86_64) && defined(HAVE_SSE4_1)) || \
|
||||
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
||||
extern const blake3_ops_t blake3_sse41_impl;
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
|
||||
extern const blake3_ops_t blake3_avx2_impl;
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
|
||||
extern const blake3_ops_t blake3_avx512_impl;
|
||||
#endif
|
||||
/* return selected BLAKE3 implementation ops */
|
||||
extern const blake3_ops_t *blake3_get_ops(void);
|
||||
|
||||
#if defined(__x86_64)
|
||||
#define MAX_SIMD_DEGREE 16
|
||||
|
|
|
@ -1,248 +0,0 @@
|
|||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
|
||||
*/
|
||||
|
||||
#include "blake3_impl.h"
|
||||
|
||||
#if defined(__aarch64__) || \
|
||||
(defined(__x86_64) && defined(HAVE_SSE2)) || \
|
||||
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
||||
|
||||
extern void ASMABI zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags);
|
||||
|
||||
extern void ASMABI zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64]);
|
||||
|
||||
extern void ASMABI zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
||||
|
||||
static void blake3_compress_in_place_sse2(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter,
|
||||
flags);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static void blake3_compress_xof_sse2(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64]) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags,
|
||||
out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static void blake3_hash_many_sse2(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end, out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static boolean_t blake3_is_sse2_supported(void)
|
||||
{
|
||||
#if defined(__x86_64)
|
||||
return (kfpu_allowed() && zfs_sse2_available());
|
||||
#elif defined(__PPC64__) && defined(__linux__)
|
||||
return (kfpu_allowed() && zfs_vsx_available());
|
||||
#else
|
||||
return (kfpu_allowed());
|
||||
#endif
|
||||
}
|
||||
|
||||
const blake3_ops_t blake3_sse2_impl = {
|
||||
.compress_in_place = blake3_compress_in_place_sse2,
|
||||
.compress_xof = blake3_compress_xof_sse2,
|
||||
.hash_many = blake3_hash_many_sse2,
|
||||
.is_supported = blake3_is_sse2_supported,
|
||||
.degree = 4,
|
||||
.name = "sse2"
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || \
|
||||
(defined(__x86_64) && defined(HAVE_SSE2)) || \
|
||||
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
||||
|
||||
extern void ASMABI zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags);
|
||||
|
||||
extern void ASMABI zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64]);
|
||||
|
||||
extern void ASMABI zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
||||
|
||||
static void blake3_compress_in_place_sse41(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter,
|
||||
flags);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static void blake3_compress_xof_sse41(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64]) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags,
|
||||
out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static void blake3_hash_many_sse41(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end, out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static boolean_t blake3_is_sse41_supported(void)
|
||||
{
|
||||
#if defined(__x86_64)
|
||||
return (kfpu_allowed() && zfs_sse4_1_available());
|
||||
#elif defined(__PPC64__) && defined(__linux__)
|
||||
return (kfpu_allowed() && zfs_vsx_available());
|
||||
#else
|
||||
return (kfpu_allowed());
|
||||
#endif
|
||||
}
|
||||
|
||||
const blake3_ops_t blake3_sse41_impl = {
|
||||
.compress_in_place = blake3_compress_in_place_sse41,
|
||||
.compress_xof = blake3_compress_xof_sse41,
|
||||
.hash_many = blake3_hash_many_sse41,
|
||||
.is_supported = blake3_is_sse41_supported,
|
||||
.degree = 4,
|
||||
.name = "sse41"
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
|
||||
extern void ASMABI zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
||||
|
||||
static void blake3_hash_many_avx2(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end, out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static boolean_t blake3_is_avx2_supported(void)
|
||||
{
|
||||
return (kfpu_allowed() && zfs_sse4_1_available() &&
|
||||
zfs_avx2_available());
|
||||
}
|
||||
|
||||
const blake3_ops_t blake3_avx2_impl = {
|
||||
.compress_in_place = blake3_compress_in_place_sse41,
|
||||
.compress_xof = blake3_compress_xof_sse41,
|
||||
.hash_many = blake3_hash_many_avx2,
|
||||
.is_supported = blake3_is_avx2_supported,
|
||||
.degree = 8,
|
||||
.name = "avx2"
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
|
||||
extern void ASMABI zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags);
|
||||
|
||||
extern void ASMABI zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64]);
|
||||
|
||||
extern void ASMABI zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
||||
|
||||
static void blake3_compress_in_place_avx512(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter,
|
||||
flags);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static void blake3_compress_xof_avx512(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64]) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags,
|
||||
out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static void blake3_hash_many_avx512(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end, out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static boolean_t blake3_is_avx512_supported(void)
|
||||
{
|
||||
return (kfpu_allowed() && zfs_avx512f_available() &&
|
||||
zfs_avx512vl_available());
|
||||
}
|
||||
|
||||
const blake3_ops_t blake3_avx512_impl = {
|
||||
.compress_in_place = blake3_compress_in_place_avx512,
|
||||
.compress_xof = blake3_compress_xof_avx512,
|
||||
.hash_many = blake3_hash_many_avx512,
|
||||
.is_supported = blake3_is_avx512_supported,
|
||||
.degree = 16,
|
||||
.name = "avx512"
|
||||
};
|
||||
#endif
|
|
@ -31,6 +31,8 @@
|
|||
#include <sys/time.h>
|
||||
#include <sys/blake3.h>
|
||||
|
||||
#include <sys/zfs_impl.h>
|
||||
|
||||
/*
|
||||
* set it to a define for debugging
|
||||
*/
|
||||
|
@ -485,10 +487,14 @@ main(int argc, char *argv[])
|
|||
uint8_t buffer[102400];
|
||||
uint64_t cpu_mhz = 0;
|
||||
int id, i, j;
|
||||
const zfs_impl_t *blake3 = zfs_impl_get_ops("blake3");
|
||||
|
||||
if (argc == 2)
|
||||
cpu_mhz = atoi(argv[1]);
|
||||
|
||||
if (!blake3)
|
||||
return (1);
|
||||
|
||||
/* fill test message */
|
||||
for (i = 0, j = 0; i < sizeof (buffer); i++, j++) {
|
||||
if (j == 251)
|
||||
|
@ -497,9 +503,9 @@ main(int argc, char *argv[])
|
|||
}
|
||||
|
||||
(void) printf("Running algorithm correctness tests:\n");
|
||||
for (id = 0; id < blake3_impl_getcnt(); id++) {
|
||||
blake3_impl_setid(id);
|
||||
const char *name = blake3_impl_getname();
|
||||
for (id = 0; id < blake3->getcnt(); id++) {
|
||||
blake3->setid(id);
|
||||
const char *name = blake3->getname();
|
||||
dprintf("Result for BLAKE3-%s:\n", name);
|
||||
for (i = 0; TestArray[i].hash; i++) {
|
||||
blake3_test_t *cur = &TestArray[i];
|
||||
|
@ -565,9 +571,9 @@ main(int argc, char *argv[])
|
|||
} while (0)
|
||||
|
||||
printf("Running performance tests (hashing 1024 MiB of data):\n");
|
||||
for (id = 0; id < blake3_impl_getcnt(); id++) {
|
||||
blake3_impl_setid(id);
|
||||
const char *name = blake3_impl_getname();
|
||||
for (id = 0; id < blake3->getcnt(); id++) {
|
||||
blake3->setid(id);
|
||||
const char *name = blake3->getname();
|
||||
BLAKE3_PERF_TEST(name, 256);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue