Skip checksum benchmarks on systems with slow cpu
The checksum benchmarking on module load may take a really long time on embedded systems with a slow cpu. Avoid all benchmarks >= 1MiB on systems, where EdonR is slower then 300 MiB/s. This limit is currently hardcoded via the define LIMIT_PERF_MBS. This is the new benchmark output of a slow Intel Atom: ``` implementation 1k 4k 16k 64k 256k 1m 4m 16m edonr-generic 209 257 268 259 262 0 0 0 skein-generic 129 150 151 150 150 0 0 0 sha256-generic 50 55 56 56 56 0 0 0 sha512-generic 76 86 88 89 88 0 0 0 blake3-generic 63 62 62 62 61 0 0 0 blake3-sse2 114 292 301 307 309 0 0 0 ``` Reviewed-by: Sebastian Gottschall <s.gottschall@dd-wrt.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de> Closes #13695
This commit is contained in:
parent
51946eda70
commit
68aa3379ec
|
@ -31,7 +31,8 @@
|
||||||
|
|
||||||
#include <sys/blake3.h>
|
#include <sys/blake3.h>
|
||||||
|
|
||||||
static kstat_t *chksum_kstat = NULL;
|
/* limit benchmarking to max 256KiB, when EdonR is slower then this: */
|
||||||
|
#define LIMIT_PERF_MBS 300
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const char *name;
|
const char *name;
|
||||||
|
@ -50,8 +51,9 @@ typedef struct {
|
||||||
zio_checksum_tmpl_free_t *(free);
|
zio_checksum_tmpl_free_t *(free);
|
||||||
} chksum_stat_t;
|
} chksum_stat_t;
|
||||||
|
|
||||||
static int chksum_stat_cnt = 0;
|
|
||||||
static chksum_stat_t *chksum_stat_data = 0;
|
static chksum_stat_t *chksum_stat_data = 0;
|
||||||
|
static int chksum_stat_cnt = 0;
|
||||||
|
static kstat_t *chksum_kstat = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* i3-1005G1 test output:
|
* i3-1005G1 test output:
|
||||||
|
@ -75,7 +77,7 @@ static chksum_stat_t *chksum_stat_data = 0;
|
||||||
* blake3-avx512 473 2687 4905 5836 5844 5643 5374
|
* blake3-avx512 473 2687 4905 5836 5844 5643 5374
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
chksum_stat_kstat_headers(char *buf, size_t size)
|
chksum_kstat_headers(char *buf, size_t size)
|
||||||
{
|
{
|
||||||
ssize_t off = 0;
|
ssize_t off = 0;
|
||||||
|
|
||||||
|
@ -93,7 +95,7 @@ chksum_stat_kstat_headers(char *buf, size_t size)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
chksum_stat_kstat_data(char *buf, size_t size, void *data)
|
chksum_kstat_data(char *buf, size_t size, void *data)
|
||||||
{
|
{
|
||||||
chksum_stat_t *cs;
|
chksum_stat_t *cs;
|
||||||
ssize_t off = 0;
|
ssize_t off = 0;
|
||||||
|
@ -123,7 +125,7 @@ chksum_stat_kstat_data(char *buf, size_t size, void *data)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *
|
static void *
|
||||||
chksum_stat_kstat_addr(kstat_t *ksp, loff_t n)
|
chksum_kstat_addr(kstat_t *ksp, loff_t n)
|
||||||
{
|
{
|
||||||
if (n < chksum_stat_cnt)
|
if (n < chksum_stat_cnt)
|
||||||
ksp->ks_private = (void *)(chksum_stat_data + n);
|
ksp->ks_private = (void *)(chksum_stat_data + n);
|
||||||
|
@ -176,17 +178,21 @@ chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round,
|
||||||
*result = run_bw/1024/1024; /* MiB/s */
|
*result = run_bw/1024/1024; /* MiB/s */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define LIMIT_INIT 0
|
||||||
|
#define LIMIT_NEEDED 1
|
||||||
|
#define LIMIT_NOLIMIT 2
|
||||||
|
|
||||||
static void
|
static void
|
||||||
chksum_benchit(chksum_stat_t *cs)
|
chksum_benchit(chksum_stat_t *cs)
|
||||||
{
|
{
|
||||||
abd_t *abd;
|
abd_t *abd;
|
||||||
void *ctx = 0;
|
void *ctx = 0;
|
||||||
void *salt = &cs->salt.zcs_bytes;
|
void *salt = &cs->salt.zcs_bytes;
|
||||||
|
static int chksum_stat_limit = LIMIT_INIT;
|
||||||
|
|
||||||
memset(salt, 0, sizeof (cs->salt.zcs_bytes));
|
memset(salt, 0, sizeof (cs->salt.zcs_bytes));
|
||||||
if (cs->init) {
|
if (cs->init)
|
||||||
ctx = cs->init(&cs->salt);
|
ctx = cs->init(&cs->salt);
|
||||||
}
|
|
||||||
|
|
||||||
/* allocate test memory via abd linear interface */
|
/* allocate test memory via abd linear interface */
|
||||||
abd = abd_alloc_linear(1<<20, B_FALSE);
|
abd = abd_alloc_linear(1<<20, B_FALSE);
|
||||||
|
@ -195,6 +201,20 @@ chksum_benchit(chksum_stat_t *cs)
|
||||||
chksum_run(cs, abd, ctx, 3, &cs->bs16k);
|
chksum_run(cs, abd, ctx, 3, &cs->bs16k);
|
||||||
chksum_run(cs, abd, ctx, 4, &cs->bs64k);
|
chksum_run(cs, abd, ctx, 4, &cs->bs64k);
|
||||||
chksum_run(cs, abd, ctx, 5, &cs->bs256k);
|
chksum_run(cs, abd, ctx, 5, &cs->bs256k);
|
||||||
|
|
||||||
|
/* check if we ran on a slow cpu */
|
||||||
|
if (chksum_stat_limit == LIMIT_INIT) {
|
||||||
|
if (cs->bs1k < LIMIT_PERF_MBS) {
|
||||||
|
chksum_stat_limit = LIMIT_NEEDED;
|
||||||
|
} else {
|
||||||
|
chksum_stat_limit = LIMIT_NOLIMIT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* skip benchmarks >= 1MiB when the CPU is to slow */
|
||||||
|
if (chksum_stat_limit == LIMIT_NEEDED)
|
||||||
|
goto abort;
|
||||||
|
|
||||||
chksum_run(cs, abd, ctx, 6, &cs->bs1m);
|
chksum_run(cs, abd, ctx, 6, &cs->bs1m);
|
||||||
abd_free(abd);
|
abd_free(abd);
|
||||||
|
|
||||||
|
@ -202,13 +222,14 @@ chksum_benchit(chksum_stat_t *cs)
|
||||||
abd = abd_alloc(1<<24, B_FALSE);
|
abd = abd_alloc(1<<24, B_FALSE);
|
||||||
chksum_run(cs, abd, ctx, 7, &cs->bs4m);
|
chksum_run(cs, abd, ctx, 7, &cs->bs4m);
|
||||||
chksum_run(cs, abd, ctx, 8, &cs->bs16m);
|
chksum_run(cs, abd, ctx, 8, &cs->bs16m);
|
||||||
|
|
||||||
|
abort:
|
||||||
abd_free(abd);
|
abd_free(abd);
|
||||||
|
|
||||||
/* free up temp memory */
|
/* free up temp memory */
|
||||||
if (cs->free) {
|
if (cs->free)
|
||||||
cs->free(ctx);
|
cs->free(ctx);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize and benchmark all supported implementations.
|
* Initialize and benchmark all supported implementations.
|
||||||
|
@ -232,7 +253,7 @@ chksum_benchmark(void)
|
||||||
chksum_stat_data = (chksum_stat_t *)kmem_zalloc(
|
chksum_stat_data = (chksum_stat_t *)kmem_zalloc(
|
||||||
sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP);
|
sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP);
|
||||||
|
|
||||||
/* edonr */
|
/* edonr - needs to be the first one here (slow CPU check) */
|
||||||
cs = &chksum_stat_data[cbid++];
|
cs = &chksum_stat_data[cbid++];
|
||||||
cs->init = abd_checksum_edonr_tmpl_init;
|
cs->init = abd_checksum_edonr_tmpl_init;
|
||||||
cs->func = abd_checksum_edonr_native;
|
cs->func = abd_checksum_edonr_native;
|
||||||
|
@ -303,9 +324,9 @@ chksum_init(void)
|
||||||
chksum_kstat->ks_data = NULL;
|
chksum_kstat->ks_data = NULL;
|
||||||
chksum_kstat->ks_ndata = UINT32_MAX;
|
chksum_kstat->ks_ndata = UINT32_MAX;
|
||||||
kstat_set_raw_ops(chksum_kstat,
|
kstat_set_raw_ops(chksum_kstat,
|
||||||
chksum_stat_kstat_headers,
|
chksum_kstat_headers,
|
||||||
chksum_stat_kstat_data,
|
chksum_kstat_data,
|
||||||
chksum_stat_kstat_addr);
|
chksum_kstat_addr);
|
||||||
kstat_install(chksum_kstat);
|
kstat_install(chksum_kstat);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue