zdb: include cloned blocks in block statistics

This gives `zdb -b` support for clone blocks.

Previously, it didn't know what clones were, so would count their space
allocation multiple times and then report leaked space (or, in debug,
would assert trying to claim blocks a second time).

This commit fixes those bugs, and reports the number of clones and the
space "used" (saved) by them.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Kay Pedersen <mail@mkwg.de>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-By: OpenDrives Inc.
Sponsored-By: Klara Inc.
Closes 
This commit is contained in:
Rob N 2023-08-02 01:56:30 +10:00 committed by GitHub
parent a21ca18d4d
commit 114a39964f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 108 additions and 1 deletions
cmd/zdb
include/sys
module/zfs

View File

@ -79,6 +79,7 @@
#include <sys/dsl_crypt.h> #include <sys/dsl_crypt.h>
#include <sys/dsl_scan.h> #include <sys/dsl_scan.h>
#include <sys/btree.h> #include <sys/btree.h>
#include <sys/brt.h>
#include <zfs_comutil.h> #include <zfs_comutil.h>
#include <sys/zstd/zstd.h> #include <sys/zstd/zstd.h>
@ -5342,12 +5343,20 @@ static const char *zdb_ot_extname[] = {
#define ZB_TOTAL DN_MAX_LEVELS #define ZB_TOTAL DN_MAX_LEVELS
#define SPA_MAX_FOR_16M (SPA_MAXBLOCKSHIFT+1) #define SPA_MAX_FOR_16M (SPA_MAXBLOCKSHIFT+1)
typedef struct zdb_brt_entry {
dva_t zbre_dva;
uint64_t zbre_refcount;
avl_node_t zbre_node;
} zdb_brt_entry_t;
typedef struct zdb_cb { typedef struct zdb_cb {
zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1]; zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
uint64_t zcb_removing_size; uint64_t zcb_removing_size;
uint64_t zcb_checkpoint_size; uint64_t zcb_checkpoint_size;
uint64_t zcb_dedup_asize; uint64_t zcb_dedup_asize;
uint64_t zcb_dedup_blocks; uint64_t zcb_dedup_blocks;
uint64_t zcb_clone_asize;
uint64_t zcb_clone_blocks;
uint64_t zcb_psize_count[SPA_MAX_FOR_16M]; uint64_t zcb_psize_count[SPA_MAX_FOR_16M];
uint64_t zcb_lsize_count[SPA_MAX_FOR_16M]; uint64_t zcb_lsize_count[SPA_MAX_FOR_16M];
uint64_t zcb_asize_count[SPA_MAX_FOR_16M]; uint64_t zcb_asize_count[SPA_MAX_FOR_16M];
@ -5368,6 +5377,8 @@ typedef struct zdb_cb {
int zcb_haderrors; int zcb_haderrors;
spa_t *zcb_spa; spa_t *zcb_spa;
uint32_t **zcb_vd_obsolete_counts; uint32_t **zcb_vd_obsolete_counts;
avl_tree_t zcb_brt;
boolean_t zcb_brt_is_active;
} zdb_cb_t; } zdb_cb_t;
/* test if two DVA offsets from same vdev are within the same metaslab */ /* test if two DVA offsets from same vdev are within the same metaslab */
@ -5662,6 +5673,45 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp); zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp);
zcb->zcb_asize_total += BP_GET_ASIZE(bp); zcb->zcb_asize_total += BP_GET_ASIZE(bp);
if (zcb->zcb_brt_is_active && brt_maybe_exists(zcb->zcb_spa, bp)) {
/*
* Cloned blocks are special. We need to count them, so we can
* later uncount them when reporting leaked space, and we must
* only claim them them once.
*
* To do this, we keep our own in-memory BRT. For each block
* we haven't seen before, we look it up in the real BRT and
* if its there, we note it and its refcount then proceed as
* normal. If we see the block again, we count it as a clone
* and then give it no further consideration.
*/
zdb_brt_entry_t zbre_search, *zbre;
avl_index_t where;
zbre_search.zbre_dva = bp->blk_dva[0];
zbre = avl_find(&zcb->zcb_brt, &zbre_search, &where);
if (zbre != NULL) {
zcb->zcb_clone_asize += BP_GET_ASIZE(bp);
zcb->zcb_clone_blocks++;
zbre->zbre_refcount--;
if (zbre->zbre_refcount == 0) {
avl_remove(&zcb->zcb_brt, zbre);
umem_free(zbre, sizeof (zdb_brt_entry_t));
}
return;
}
uint64_t crefcnt = brt_entry_get_refcount(zcb->zcb_spa, bp);
if (crefcnt > 0) {
zbre = umem_zalloc(sizeof (zdb_brt_entry_t),
UMEM_NOFAIL);
zbre->zbre_dva = bp->blk_dva[0];
zbre->zbre_refcount = crefcnt;
avl_insert(&zcb->zcb_brt, zbre, where);
}
}
if (dump_opt['L']) if (dump_opt['L'])
return; return;
@ -6664,6 +6714,20 @@ deleted_livelists_dump_mos(spa_t *spa)
iterate_deleted_livelists(spa, dump_livelist_cb, NULL); iterate_deleted_livelists(spa, dump_livelist_cb, NULL);
} }
static int
zdb_brt_entry_compare(const void *zcn1, const void *zcn2)
{
const dva_t *dva1 = &((const zdb_brt_entry_t *)zcn1)->zbre_dva;
const dva_t *dva2 = &((const zdb_brt_entry_t *)zcn2)->zbre_dva;
int cmp;
cmp = TREE_CMP(DVA_GET_VDEV(dva1), DVA_GET_VDEV(dva2));
if (cmp == 0)
cmp = TREE_CMP(DVA_GET_OFFSET(dva1), DVA_GET_OFFSET(dva2));
return (cmp);
}
static int static int
dump_block_stats(spa_t *spa) dump_block_stats(spa_t *spa)
{ {
@ -6678,6 +6742,13 @@ dump_block_stats(spa_t *spa)
zcb = umem_zalloc(sizeof (zdb_cb_t), UMEM_NOFAIL); zcb = umem_zalloc(sizeof (zdb_cb_t), UMEM_NOFAIL);
if (spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING)) {
avl_create(&zcb->zcb_brt, zdb_brt_entry_compare,
sizeof (zdb_brt_entry_t),
offsetof(zdb_brt_entry_t, zbre_node));
zcb->zcb_brt_is_active = B_TRUE;
}
(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n", (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
(dump_opt['c'] || !dump_opt['L']) ? "to verify " : "", (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
(dump_opt['c'] == 1) ? "metadata " : "", (dump_opt['c'] == 1) ? "metadata " : "",
@ -6779,7 +6850,8 @@ dump_block_stats(spa_t *spa)
metaslab_class_get_alloc(spa_special_class(spa)) + metaslab_class_get_alloc(spa_special_class(spa)) +
metaslab_class_get_alloc(spa_dedup_class(spa)) + metaslab_class_get_alloc(spa_dedup_class(spa)) +
get_unflushed_alloc_space(spa); get_unflushed_alloc_space(spa);
total_found = tzb->zb_asize - zcb->zcb_dedup_asize + total_found =
tzb->zb_asize - zcb->zcb_dedup_asize - zcb->zcb_clone_asize +
zcb->zcb_removing_size + zcb->zcb_checkpoint_size; zcb->zcb_removing_size + zcb->zcb_checkpoint_size;
if (total_found == total_alloc && !dump_opt['L']) { if (total_found == total_alloc && !dump_opt['L']) {
@ -6820,6 +6892,9 @@ dump_block_stats(spa_t *spa)
"bp deduped:", (u_longlong_t)zcb->zcb_dedup_asize, "bp deduped:", (u_longlong_t)zcb->zcb_dedup_asize,
(u_longlong_t)zcb->zcb_dedup_blocks, (u_longlong_t)zcb->zcb_dedup_blocks,
(double)zcb->zcb_dedup_asize / tzb->zb_asize + 1.0); (double)zcb->zcb_dedup_asize / tzb->zb_asize + 1.0);
(void) printf("\t%-16s %14llu count: %6llu\n",
"bp cloned:", (u_longlong_t)zcb->zcb_clone_asize,
(u_longlong_t)zcb->zcb_clone_blocks);
(void) printf("\t%-16s %14llu used: %5.2f%%\n", "Normal class:", (void) printf("\t%-16s %14llu used: %5.2f%%\n", "Normal class:",
(u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space); (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);

View File

@ -36,6 +36,7 @@ extern "C" {
#endif #endif
extern boolean_t brt_entry_decref(spa_t *spa, const blkptr_t *bp); extern boolean_t brt_entry_decref(spa_t *spa, const blkptr_t *bp);
extern uint64_t brt_entry_get_refcount(spa_t *spa, const blkptr_t *bp);
extern uint64_t brt_get_dspace(spa_t *spa); extern uint64_t brt_get_dspace(spa_t *spa);
extern uint64_t brt_get_used(spa_t *spa); extern uint64_t brt_get_used(spa_t *spa);

View File

@ -1544,6 +1544,37 @@ out:
return (B_FALSE); return (B_FALSE);
} }
uint64_t
brt_entry_get_refcount(spa_t *spa, const blkptr_t *bp)
{
brt_t *brt = spa->spa_brt;
brt_vdev_t *brtvd;
brt_entry_t bre_search, *bre;
uint64_t vdevid, refcnt;
int error;
brt_entry_fill(bp, &bre_search, &vdevid);
brt_rlock(brt);
brtvd = brt_vdev(brt, vdevid);
ASSERT(brtvd != NULL);
bre = avl_find(&brtvd->bv_tree, &bre_search, NULL);
if (bre == NULL) {
error = brt_entry_lookup(brt, brtvd, &bre_search);
ASSERT(error == 0 || error == ENOENT);
if (error == ENOENT)
refcnt = 0;
else
refcnt = bre_search.bre_refcount;
} else
refcnt = bre->bre_refcount;
brt_unlock(brt);
return (refcnt);
}
static void static void
brt_prefetch(brt_t *brt, const blkptr_t *bp) brt_prefetch(brt_t *brt, const blkptr_t *bp)
{ {