Skip checksum benchmarks on systems with slow cpu
The checksum benchmarking on module load may take a really long time on embedded systems with a slow cpu. Avoid all benchmarks >= 1MiB on systems, where EdonR is slower then 300 MiB/s. This limit is currently hardcoded via the define LIMIT_PERF_MBS. This is the new benchmark output of a slow Intel Atom: ``` implementation 1k 4k 16k 64k 256k 1m 4m 16m edonr-generic 209 257 268 259 262 0 0 0 skein-generic 129 150 151 150 150 0 0 0 sha256-generic 50 55 56 56 56 0 0 0 sha512-generic 76 86 88 89 88 0 0 0 blake3-generic 63 62 62 62 61 0 0 0 blake3-sse2 114 292 301 307 309 0 0 0 ``` Reviewed-by: Sebastian Gottschall <s.gottschall@dd-wrt.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de> Closes #13695
This commit is contained in:
parent
51946eda70
commit
68aa3379ec
|
@ -31,7 +31,8 @@
|
|||
|
||||
#include <sys/blake3.h>
|
||||
|
||||
static kstat_t *chksum_kstat = NULL;
|
||||
/* limit benchmarking to max 256KiB, when EdonR is slower then this: */
|
||||
#define LIMIT_PERF_MBS 300
|
||||
|
||||
typedef struct {
|
||||
const char *name;
|
||||
|
@ -50,8 +51,9 @@ typedef struct {
|
|||
zio_checksum_tmpl_free_t *(free);
|
||||
} chksum_stat_t;
|
||||
|
||||
static int chksum_stat_cnt = 0;
|
||||
static chksum_stat_t *chksum_stat_data = 0;
|
||||
static int chksum_stat_cnt = 0;
|
||||
static kstat_t *chksum_kstat = NULL;
|
||||
|
||||
/*
|
||||
* i3-1005G1 test output:
|
||||
|
@ -75,7 +77,7 @@ static chksum_stat_t *chksum_stat_data = 0;
|
|||
* blake3-avx512 473 2687 4905 5836 5844 5643 5374
|
||||
*/
|
||||
static int
|
||||
chksum_stat_kstat_headers(char *buf, size_t size)
|
||||
chksum_kstat_headers(char *buf, size_t size)
|
||||
{
|
||||
ssize_t off = 0;
|
||||
|
||||
|
@ -93,7 +95,7 @@ chksum_stat_kstat_headers(char *buf, size_t size)
|
|||
}
|
||||
|
||||
static int
|
||||
chksum_stat_kstat_data(char *buf, size_t size, void *data)
|
||||
chksum_kstat_data(char *buf, size_t size, void *data)
|
||||
{
|
||||
chksum_stat_t *cs;
|
||||
ssize_t off = 0;
|
||||
|
@ -123,7 +125,7 @@ chksum_stat_kstat_data(char *buf, size_t size, void *data)
|
|||
}
|
||||
|
||||
static void *
|
||||
chksum_stat_kstat_addr(kstat_t *ksp, loff_t n)
|
||||
chksum_kstat_addr(kstat_t *ksp, loff_t n)
|
||||
{
|
||||
if (n < chksum_stat_cnt)
|
||||
ksp->ks_private = (void *)(chksum_stat_data + n);
|
||||
|
@ -176,17 +178,21 @@ chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round,
|
|||
*result = run_bw/1024/1024; /* MiB/s */
|
||||
}
|
||||
|
||||
#define LIMIT_INIT 0
|
||||
#define LIMIT_NEEDED 1
|
||||
#define LIMIT_NOLIMIT 2
|
||||
|
||||
static void
|
||||
chksum_benchit(chksum_stat_t *cs)
|
||||
{
|
||||
abd_t *abd;
|
||||
void *ctx = 0;
|
||||
void *salt = &cs->salt.zcs_bytes;
|
||||
static int chksum_stat_limit = LIMIT_INIT;
|
||||
|
||||
memset(salt, 0, sizeof (cs->salt.zcs_bytes));
|
||||
if (cs->init) {
|
||||
if (cs->init)
|
||||
ctx = cs->init(&cs->salt);
|
||||
}
|
||||
|
||||
/* allocate test memory via abd linear interface */
|
||||
abd = abd_alloc_linear(1<<20, B_FALSE);
|
||||
|
@ -195,6 +201,20 @@ chksum_benchit(chksum_stat_t *cs)
|
|||
chksum_run(cs, abd, ctx, 3, &cs->bs16k);
|
||||
chksum_run(cs, abd, ctx, 4, &cs->bs64k);
|
||||
chksum_run(cs, abd, ctx, 5, &cs->bs256k);
|
||||
|
||||
/* check if we ran on a slow cpu */
|
||||
if (chksum_stat_limit == LIMIT_INIT) {
|
||||
if (cs->bs1k < LIMIT_PERF_MBS) {
|
||||
chksum_stat_limit = LIMIT_NEEDED;
|
||||
} else {
|
||||
chksum_stat_limit = LIMIT_NOLIMIT;
|
||||
}
|
||||
}
|
||||
|
||||
/* skip benchmarks >= 1MiB when the CPU is to slow */
|
||||
if (chksum_stat_limit == LIMIT_NEEDED)
|
||||
goto abort;
|
||||
|
||||
chksum_run(cs, abd, ctx, 6, &cs->bs1m);
|
||||
abd_free(abd);
|
||||
|
||||
|
@ -202,13 +222,14 @@ chksum_benchit(chksum_stat_t *cs)
|
|||
abd = abd_alloc(1<<24, B_FALSE);
|
||||
chksum_run(cs, abd, ctx, 7, &cs->bs4m);
|
||||
chksum_run(cs, abd, ctx, 8, &cs->bs16m);
|
||||
|
||||
abort:
|
||||
abd_free(abd);
|
||||
|
||||
/* free up temp memory */
|
||||
if (cs->free) {
|
||||
if (cs->free)
|
||||
cs->free(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize and benchmark all supported implementations.
|
||||
|
@ -232,7 +253,7 @@ chksum_benchmark(void)
|
|||
chksum_stat_data = (chksum_stat_t *)kmem_zalloc(
|
||||
sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP);
|
||||
|
||||
/* edonr */
|
||||
/* edonr - needs to be the first one here (slow CPU check) */
|
||||
cs = &chksum_stat_data[cbid++];
|
||||
cs->init = abd_checksum_edonr_tmpl_init;
|
||||
cs->func = abd_checksum_edonr_native;
|
||||
|
@ -303,9 +324,9 @@ chksum_init(void)
|
|||
chksum_kstat->ks_data = NULL;
|
||||
chksum_kstat->ks_ndata = UINT32_MAX;
|
||||
kstat_set_raw_ops(chksum_kstat,
|
||||
chksum_stat_kstat_headers,
|
||||
chksum_stat_kstat_data,
|
||||
chksum_stat_kstat_addr);
|
||||
chksum_kstat_headers,
|
||||
chksum_kstat_data,
|
||||
chksum_kstat_addr);
|
||||
kstat_install(chksum_kstat);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue