Skip checksum benchmarks on systems with slow cpu

The checksum benchmarking on module load may take a really long time
on embedded systems with a slow cpu. Avoid all benchmarks >= 1MiB on
systems, where EdonR is slower then 300 MiB/s.

This limit is currently hardcoded via the define LIMIT_PERF_MBS.

This is the new benchmark output of a slow Intel Atom:

```
 implementation    1k    4k   16k   64k  256k    1m    4m   16m
 edonr-generic    209   257   268   259   262     0     0     0
 skein-generic    129   150   151   150   150     0     0     0
 sha256-generic    50    55    56    56    56     0     0     0
 sha512-generic    76    86    88    89    88     0     0     0
 blake3-generic    63    62    62    62    61     0     0     0
 blake3-sse2      114   292   301   307   309     0     0     0
```

Reviewed-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13695
This commit is contained in:
Tino Reichardt 2022-08-01 18:51:45 +02:00 committed by GitHub
parent 51946eda70
commit 68aa3379ec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 34 additions and 13 deletions

View File

@ -31,7 +31,8 @@
#include <sys/blake3.h>
static kstat_t *chksum_kstat = NULL;
/* limit benchmarking to max 256KiB, when EdonR is slower then this: */
#define LIMIT_PERF_MBS 300
typedef struct {
const char *name;
@ -50,8 +51,9 @@ typedef struct {
zio_checksum_tmpl_free_t *(free);
} chksum_stat_t;
static int chksum_stat_cnt = 0;
static chksum_stat_t *chksum_stat_data = 0;
static int chksum_stat_cnt = 0;
static kstat_t *chksum_kstat = NULL;
/*
* i3-1005G1 test output:
@ -75,7 +77,7 @@ static chksum_stat_t *chksum_stat_data = 0;
* blake3-avx512 473 2687 4905 5836 5844 5643 5374
*/
static int
chksum_stat_kstat_headers(char *buf, size_t size)
chksum_kstat_headers(char *buf, size_t size)
{
ssize_t off = 0;
@ -93,7 +95,7 @@ chksum_stat_kstat_headers(char *buf, size_t size)
}
static int
chksum_stat_kstat_data(char *buf, size_t size, void *data)
chksum_kstat_data(char *buf, size_t size, void *data)
{
chksum_stat_t *cs;
ssize_t off = 0;
@ -123,7 +125,7 @@ chksum_stat_kstat_data(char *buf, size_t size, void *data)
}
static void *
chksum_stat_kstat_addr(kstat_t *ksp, loff_t n)
chksum_kstat_addr(kstat_t *ksp, loff_t n)
{
if (n < chksum_stat_cnt)
ksp->ks_private = (void *)(chksum_stat_data + n);
@ -176,17 +178,21 @@ chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round,
*result = run_bw/1024/1024; /* MiB/s */
}
#define LIMIT_INIT 0
#define LIMIT_NEEDED 1
#define LIMIT_NOLIMIT 2
static void
chksum_benchit(chksum_stat_t *cs)
{
abd_t *abd;
void *ctx = 0;
void *salt = &cs->salt.zcs_bytes;
static int chksum_stat_limit = LIMIT_INIT;
memset(salt, 0, sizeof (cs->salt.zcs_bytes));
if (cs->init) {
if (cs->init)
ctx = cs->init(&cs->salt);
}
/* allocate test memory via abd linear interface */
abd = abd_alloc_linear(1<<20, B_FALSE);
@ -195,6 +201,20 @@ chksum_benchit(chksum_stat_t *cs)
chksum_run(cs, abd, ctx, 3, &cs->bs16k);
chksum_run(cs, abd, ctx, 4, &cs->bs64k);
chksum_run(cs, abd, ctx, 5, &cs->bs256k);
/* check if we ran on a slow cpu */
if (chksum_stat_limit == LIMIT_INIT) {
if (cs->bs1k < LIMIT_PERF_MBS) {
chksum_stat_limit = LIMIT_NEEDED;
} else {
chksum_stat_limit = LIMIT_NOLIMIT;
}
}
/* skip benchmarks >= 1MiB when the CPU is to slow */
if (chksum_stat_limit == LIMIT_NEEDED)
goto abort;
chksum_run(cs, abd, ctx, 6, &cs->bs1m);
abd_free(abd);
@ -202,13 +222,14 @@ chksum_benchit(chksum_stat_t *cs)
abd = abd_alloc(1<<24, B_FALSE);
chksum_run(cs, abd, ctx, 7, &cs->bs4m);
chksum_run(cs, abd, ctx, 8, &cs->bs16m);
abort:
abd_free(abd);
/* free up temp memory */
if (cs->free) {
if (cs->free)
cs->free(ctx);
}
}
/*
* Initialize and benchmark all supported implementations.
@ -232,7 +253,7 @@ chksum_benchmark(void)
chksum_stat_data = (chksum_stat_t *)kmem_zalloc(
sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP);
/* edonr */
/* edonr - needs to be the first one here (slow CPU check) */
cs = &chksum_stat_data[cbid++];
cs->init = abd_checksum_edonr_tmpl_init;
cs->func = abd_checksum_edonr_native;
@ -303,9 +324,9 @@ chksum_init(void)
chksum_kstat->ks_data = NULL;
chksum_kstat->ks_ndata = UINT32_MAX;
kstat_set_raw_ops(chksum_kstat,
chksum_stat_kstat_headers,
chksum_stat_kstat_data,
chksum_stat_kstat_addr);
chksum_kstat_headers,
chksum_kstat_data,
chksum_kstat_addr);
kstat_install(chksum_kstat);
}