ddt: cleanup the stats & histogram code

Both the API and the code were kinda mangled and I was really struggling
to follow it. The worst offender was the old ddt_stat_add(); after
fixing it up the rest of the changes are mostly knock-on effects and
targets of opportunity.

Note that the old ddt_stat_add() was safe against overflows - it could
produce crazy numbers, but the compiler wouldn't do anything stupid. The
assertions in ddt_stat_sub() go a lot of the way to protecting against
this; getting in a position where overflows are a problem is definitely
a programming error.

Also expanding ddt_stat_add() and ddt_histogram_empty() produces less
efficient assembly. I'm not bothered about this right now though; these
should not be hot functions, and if they are we'll optimise them later.
If we have to go back to the old form, we'll comment it like crazy.

Finally, I've removed the assertion that the bucket will never be
negative, as it will soon be possible to have entries with zero
refcounts: an entry for a block that is no longer on the pool, but is on
the log waiting to be synced out. It might be better to have a separate
bucket for these, since they're still using real space on disk, but
ultimately these stats are driving UI, and for now I've chosen to keep
them matching how they've looked in the past, as well as match the
operators mental model - pool usage is managed elsewhere.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: iXsystems, Inc.
Closes #15895
This commit is contained in:
Rob Norris 2023-06-15 17:19:41 +10:00 committed by Brian Behlendorf
parent f4aeb23f52
commit 27e9cb5f80
5 changed files with 114 additions and 51 deletions

View File

@ -7357,29 +7357,27 @@ dump_simulated_ddt(spa_t *spa)
spa_config_exit(spa, SCL_CONFIG, FTAG); spa_config_exit(spa, SCL_CONFIG, FTAG);
while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) { while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
ddt_stat_t dds;
uint64_t refcnt = zdde->zdde_ref_blocks; uint64_t refcnt = zdde->zdde_ref_blocks;
ASSERT(refcnt != 0); ASSERT(refcnt != 0);
dds.dds_blocks = zdde->zdde_ref_blocks / refcnt; ddt_stat_t *dds = &ddh_total.ddh_stat[highbit64(refcnt) - 1];
dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
dds.dds_psize = zdde->zdde_ref_psize / refcnt;
dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
dds.dds_ref_blocks = zdde->zdde_ref_blocks; dds->dds_blocks += zdde->zdde_ref_blocks / refcnt;
dds.dds_ref_lsize = zdde->zdde_ref_lsize; dds->dds_lsize += zdde->zdde_ref_lsize / refcnt;
dds.dds_ref_psize = zdde->zdde_ref_psize; dds->dds_psize += zdde->zdde_ref_psize / refcnt;
dds.dds_ref_dsize = zdde->zdde_ref_dsize; dds->dds_dsize += zdde->zdde_ref_dsize / refcnt;
ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1], dds->dds_ref_blocks += zdde->zdde_ref_blocks;
&dds, 0); dds->dds_ref_lsize += zdde->zdde_ref_lsize;
dds->dds_ref_psize += zdde->zdde_ref_psize;
dds->dds_ref_dsize += zdde->zdde_ref_dsize;
umem_free(zdde, sizeof (*zdde)); umem_free(zdde, sizeof (*zdde));
} }
avl_destroy(&t); avl_destroy(&t);
ddt_histogram_stat(&dds_total, &ddh_total); ddt_histogram_total(&dds_total, &ddh_total);
(void) printf("Simulated DDT histogram:\n"); (void) printf("Simulated DDT histogram:\n");

View File

@ -318,9 +318,15 @@ extern uint64_t ddt_phys_birth(const ddt_univ_phys_t *ddp,
extern int ddt_phys_dva_count(const ddt_univ_phys_t *ddp, ddt_phys_variant_t v, extern int ddt_phys_dva_count(const ddt_univ_phys_t *ddp, ddt_phys_variant_t v,
boolean_t encrypted); boolean_t encrypted);
extern void ddt_histogram_add_entry(ddt_t *ddt, ddt_histogram_t *ddh,
const ddt_lightweight_entry_t *ddlwe);
extern void ddt_histogram_sub_entry(ddt_t *ddt, ddt_histogram_t *ddh,
const ddt_lightweight_entry_t *ddlwe);
extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src); extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src);
extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh); extern void ddt_histogram_total(ddt_stat_t *dds, const ddt_histogram_t *ddh);
extern boolean_t ddt_histogram_empty(const ddt_histogram_t *ddh); extern boolean_t ddt_histogram_empty(const ddt_histogram_t *ddh);
extern void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo); extern void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo);
extern uint64_t ddt_get_ddt_dsize(spa_t *spa); extern uint64_t ddt_get_ddt_dsize(spa_t *spa);
extern void ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh); extern void ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh);

View File

@ -77,8 +77,6 @@ typedef struct {
extern const ddt_ops_t ddt_zap_ops; extern const ddt_ops_t ddt_zap_ops;
extern void ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg);
/* /*
* These are only exposed so that zdb can access them. Try not to use them * These are only exposed so that zdb can access them. Try not to use them
* outside of the DDT implementation proper, and if you do, consider moving * outside of the DDT implementation proper, and if you do, consider moving
@ -95,8 +93,6 @@ extern uint64_t ddt_phys_total_refcnt(const ddt_t *ddt, const ddt_entry_t *dde);
extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp); extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);
extern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg);
extern void ddt_object_name(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz, extern void ddt_object_name(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
char *name); char *name);
extern int ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz, extern int ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,

View File

@ -992,7 +992,18 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
/* Flag cleanup required */ /* Flag cleanup required */
dde->dde_flags |= DDE_FLAG_OVERQUOTA; dde->dde_flags |= DDE_FLAG_OVERQUOTA;
} else if (error == 0) { } else if (error == 0) {
ddt_stat_update(ddt, dde, -1ULL); /*
* The histograms only track inactive (stored) blocks.
* We've just put an entry onto the live list, so we need to
* remove its counts. When its synced back, it'll be re-added
* to the right one.
*/
ddt_histogram_t *ddh =
&ddt->ddt_histogram[dde->dde_type][dde->dde_class];
ddt_lightweight_entry_t ddlwe;
DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, &ddlwe);
ddt_histogram_sub_entry(ddt, ddh, &ddlwe);
} }
/* Entry loaded, everyone can proceed now */ /* Entry loaded, everyone can proceed now */
@ -1527,11 +1538,18 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
if (total_refcnt != 0) { if (total_refcnt != 0) {
dde->dde_type = ntype; dde->dde_type = ntype;
dde->dde_class = nclass; dde->dde_class = nclass;
ddt_stat_update(ddt, dde, 0);
if (!ddt_object_exists(ddt, ntype, nclass)) if (!ddt_object_exists(ddt, ntype, nclass))
ddt_object_create(ddt, ntype, nclass, tx); ddt_object_create(ddt, ntype, nclass, tx);
VERIFY0(ddt_object_update(ddt, ntype, nclass, dde, tx)); VERIFY0(ddt_object_update(ddt, ntype, nclass, dde, tx));
ddt_lightweight_entry_t ddlwe;
DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, &ddlwe);
ddt_histogram_t *ddh =
&ddt->ddt_histogram[ntype][nclass];
ddt_histogram_add_entry(ddt, ddh, &ddlwe);
/* /*
* If the class changes, the order that we scan this bp * If the class changes, the order that we scan this bp
* changes. If it decreases, we could miss it, so * changes. If it decreases, we could miss it, so
@ -1540,8 +1558,6 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
* traversing.) * traversing.)
*/ */
if (nclass < oclass) { if (nclass < oclass) {
ddt_lightweight_entry_t ddlwe;
DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, &ddlwe);
dsl_scan_ddt_entry(dp->dp_scan, dsl_scan_ddt_entry(dp->dp_scan,
ddt->ddt_checksum, ddt, &ddlwe, tx); ddt->ddt_checksum, ddt, &ddlwe, tx);
} }

View File

@ -33,24 +33,24 @@
#include <sys/ddt_impl.h> #include <sys/ddt_impl.h>
static void static void
ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds) ddt_stat_generate(ddt_t *ddt, const ddt_lightweight_entry_t *ddlwe,
ddt_stat_t *dds)
{ {
spa_t *spa = ddt->ddt_spa; spa_t *spa = ddt->ddt_spa;
ddt_key_t *ddk = &dde->dde_key; uint64_t lsize = DDK_GET_LSIZE(&ddlwe->ddlwe_key);
uint64_t lsize = DDK_GET_LSIZE(ddk); uint64_t psize = DDK_GET_PSIZE(&ddlwe->ddlwe_key);
uint64_t psize = DDK_GET_PSIZE(ddk);
memset(dds, 0, sizeof (*dds)); memset(dds, 0, sizeof (*dds));
for (int p = 0; p < DDT_NPHYS(ddt); p++) { for (int p = 0; p < ddlwe->ddlwe_nphys; p++) {
const ddt_univ_phys_t *ddp = dde->dde_phys; const ddt_univ_phys_t *ddp = &ddlwe->ddlwe_phys;
ddt_phys_variant_t v = DDT_PHYS_VARIANT(ddt, p); ddt_phys_variant_t v = DDT_PHYS_VARIANT(ddt, p);
if (ddt_phys_birth(ddp, v) == 0) if (ddt_phys_birth(ddp, v) == 0)
continue; continue;
int ndvas = ddt_phys_dva_count(ddp, v, int ndvas = ddt_phys_dva_count(ddp, v,
DDK_GET_CRYPT(&dde->dde_key)); DDK_GET_CRYPT(&ddlwe->ddlwe_key));
const dva_t *dvas = (ddt->ddt_flags & DDT_FLAG_FLAT) ? const dva_t *dvas = (ddt->ddt_flags & DDT_FLAG_FLAT) ?
ddp->ddp_flat.ddp_dva : ddp->ddp_trad[p].ddp_dva; ddp->ddp_flat.ddp_dva : ddp->ddp_trad[p].ddp_dva;
@ -72,61 +72,108 @@ ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds)
} }
} }
void static void
ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg) ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src)
{ {
const uint64_t *s = (const uint64_t *)src; dst->dds_blocks += src->dds_blocks;
uint64_t *d = (uint64_t *)dst; dst->dds_lsize += src->dds_lsize;
uint64_t *d_end = (uint64_t *)(dst + 1); dst->dds_psize += src->dds_psize;
dst->dds_dsize += src->dds_dsize;
dst->dds_ref_blocks += src->dds_ref_blocks;
dst->dds_ref_lsize += src->dds_ref_lsize;
dst->dds_ref_psize += src->dds_ref_psize;
dst->dds_ref_dsize += src->dds_ref_dsize;
}
ASSERT(neg == 0 || neg == -1ULL); /* add or subtract */ static void
ddt_stat_sub(ddt_stat_t *dst, const ddt_stat_t *src)
{
/* This caught more during development than you might expect... */
ASSERT3U(dst->dds_blocks, >=, src->dds_blocks);
ASSERT3U(dst->dds_lsize, >=, src->dds_lsize);
ASSERT3U(dst->dds_psize, >=, src->dds_psize);
ASSERT3U(dst->dds_dsize, >=, src->dds_dsize);
ASSERT3U(dst->dds_ref_blocks, >=, src->dds_ref_blocks);
ASSERT3U(dst->dds_ref_lsize, >=, src->dds_ref_lsize);
ASSERT3U(dst->dds_ref_psize, >=, src->dds_ref_psize);
ASSERT3U(dst->dds_ref_dsize, >=, src->dds_ref_dsize);
for (int i = 0; i < d_end - d; i++) dst->dds_blocks -= src->dds_blocks;
d[i] += (s[i] ^ neg) - neg; dst->dds_lsize -= src->dds_lsize;
dst->dds_psize -= src->dds_psize;
dst->dds_dsize -= src->dds_dsize;
dst->dds_ref_blocks -= src->dds_ref_blocks;
dst->dds_ref_lsize -= src->dds_ref_lsize;
dst->dds_ref_psize -= src->dds_ref_psize;
dst->dds_ref_dsize -= src->dds_ref_dsize;
} }
void void
ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg) ddt_histogram_add_entry(ddt_t *ddt, ddt_histogram_t *ddh,
const ddt_lightweight_entry_t *ddlwe)
{ {
ddt_stat_t dds; ddt_stat_t dds;
ddt_histogram_t *ddh;
int bucket; int bucket;
ddt_stat_generate(ddt, dde, &dds); ddt_stat_generate(ddt, ddlwe, &dds);
bucket = highbit64(dds.dds_ref_blocks) - 1; bucket = highbit64(dds.dds_ref_blocks) - 1;
ASSERT3U(bucket, >=, 0); if (bucket < 0)
return;
ddh = &ddt->ddt_histogram[dde->dde_type][dde->dde_class]; ddt_stat_add(&ddh->ddh_stat[bucket], &dds);
}
ddt_stat_add(&ddh->ddh_stat[bucket], &dds, neg); void
ddt_histogram_sub_entry(ddt_t *ddt, ddt_histogram_t *ddh,
const ddt_lightweight_entry_t *ddlwe)
{
ddt_stat_t dds;
int bucket;
ddt_stat_generate(ddt, ddlwe, &dds);
bucket = highbit64(dds.dds_ref_blocks) - 1;
if (bucket < 0)
return;
ddt_stat_sub(&ddh->ddh_stat[bucket], &dds);
} }
void void
ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src) ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src)
{ {
for (int h = 0; h < 64; h++) for (int h = 0; h < 64; h++)
ddt_stat_add(&dst->ddh_stat[h], &src->ddh_stat[h], 0); ddt_stat_add(&dst->ddh_stat[h], &src->ddh_stat[h]);
} }
void void
ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh) ddt_histogram_total(ddt_stat_t *dds, const ddt_histogram_t *ddh)
{ {
memset(dds, 0, sizeof (*dds)); memset(dds, 0, sizeof (*dds));
for (int h = 0; h < 64; h++) for (int h = 0; h < 64; h++)
ddt_stat_add(dds, &ddh->ddh_stat[h], 0); ddt_stat_add(dds, &ddh->ddh_stat[h]);
} }
boolean_t boolean_t
ddt_histogram_empty(const ddt_histogram_t *ddh) ddt_histogram_empty(const ddt_histogram_t *ddh)
{ {
const uint64_t *s = (const uint64_t *)ddh; for (int h = 0; h < 64; h++) {
const uint64_t *s_end = (const uint64_t *)(ddh + 1); const ddt_stat_t *dds = &ddh->ddh_stat[h];
if (dds->dds_blocks == 0 &&
dds->dds_lsize == 0 &&
dds->dds_psize == 0 &&
dds->dds_dsize == 0 &&
dds->dds_ref_blocks == 0 &&
dds->dds_ref_lsize == 0 &&
dds->dds_ref_psize == 0 &&
dds->dds_ref_dsize == 0)
continue;
while (s < s_end)
if (*s++ != 0)
return (B_FALSE); return (B_FALSE);
}
return (B_TRUE); return (B_TRUE);
} }
@ -222,7 +269,7 @@ ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total)
ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP); ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
ddt_get_dedup_histogram(spa, ddh_total); ddt_get_dedup_histogram(spa, ddh_total);
ddt_histogram_stat(dds_total, ddh_total); ddt_histogram_total(dds_total, ddh_total);
kmem_free(ddh_total, sizeof (ddt_histogram_t)); kmem_free(ddh_total, sizeof (ddt_histogram_t));
} }