From 114a39964f5c8d89558d2823f1211f2258825ec7 Mon Sep 17 00:00:00 2001 From: Rob N Date: Wed, 2 Aug 2023 01:56:30 +1000 Subject: [PATCH] zdb: include cloned blocks in block statistics This gives `zdb -b` support for clone blocks. Previously, it didn't know what clones were, so would count their space allocation multiple times and then report leaked space (or, in debug, would assert trying to claim blocks a second time). This commit fixes those bugs, and reports the number of clones and the space "used" (saved) by them. Reviewed-by: Brian Behlendorf Reviewed-by: Kay Pedersen Signed-off-by: Rob Norris Sponsored-By: OpenDrives Inc. Sponsored-By: Klara Inc. Closes #15123 --- cmd/zdb/zdb.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++- include/sys/brt.h | 1 + module/zfs/brt.c | 31 +++++++++++++++++++ 3 files changed, 108 insertions(+), 1 deletion(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 9568d2bbfe..4b9921d47b 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -79,6 +79,7 @@ #include #include #include +#include #include #include @@ -5342,12 +5343,20 @@ static const char *zdb_ot_extname[] = { #define ZB_TOTAL DN_MAX_LEVELS #define SPA_MAX_FOR_16M (SPA_MAXBLOCKSHIFT+1) +typedef struct zdb_brt_entry { + dva_t zbre_dva; + uint64_t zbre_refcount; + avl_node_t zbre_node; +} zdb_brt_entry_t; + typedef struct zdb_cb { zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1]; uint64_t zcb_removing_size; uint64_t zcb_checkpoint_size; uint64_t zcb_dedup_asize; uint64_t zcb_dedup_blocks; + uint64_t zcb_clone_asize; + uint64_t zcb_clone_blocks; uint64_t zcb_psize_count[SPA_MAX_FOR_16M]; uint64_t zcb_lsize_count[SPA_MAX_FOR_16M]; uint64_t zcb_asize_count[SPA_MAX_FOR_16M]; @@ -5368,6 +5377,8 @@ typedef struct zdb_cb { int zcb_haderrors; spa_t *zcb_spa; uint32_t **zcb_vd_obsolete_counts; + avl_tree_t zcb_brt; + boolean_t zcb_brt_is_active; } zdb_cb_t; /* test if two DVA offsets from same vdev are within the same metaslab */ @@ -5662,6 +5673,45 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp); zcb->zcb_asize_total += BP_GET_ASIZE(bp); + if (zcb->zcb_brt_is_active && brt_maybe_exists(zcb->zcb_spa, bp)) { + /* + * Cloned blocks are special. We need to count them, so we can + * later uncount them when reporting leaked space, and we must + * only claim them them once. + * + * To do this, we keep our own in-memory BRT. For each block + * we haven't seen before, we look it up in the real BRT and + * if its there, we note it and its refcount then proceed as + * normal. If we see the block again, we count it as a clone + * and then give it no further consideration. + */ + zdb_brt_entry_t zbre_search, *zbre; + avl_index_t where; + + zbre_search.zbre_dva = bp->blk_dva[0]; + zbre = avl_find(&zcb->zcb_brt, &zbre_search, &where); + if (zbre != NULL) { + zcb->zcb_clone_asize += BP_GET_ASIZE(bp); + zcb->zcb_clone_blocks++; + + zbre->zbre_refcount--; + if (zbre->zbre_refcount == 0) { + avl_remove(&zcb->zcb_brt, zbre); + umem_free(zbre, sizeof (zdb_brt_entry_t)); + } + return; + } + + uint64_t crefcnt = brt_entry_get_refcount(zcb->zcb_spa, bp); + if (crefcnt > 0) { + zbre = umem_zalloc(sizeof (zdb_brt_entry_t), + UMEM_NOFAIL); + zbre->zbre_dva = bp->blk_dva[0]; + zbre->zbre_refcount = crefcnt; + avl_insert(&zcb->zcb_brt, zbre, where); + } + } + if (dump_opt['L']) return; @@ -6664,6 +6714,20 @@ deleted_livelists_dump_mos(spa_t *spa) iterate_deleted_livelists(spa, dump_livelist_cb, NULL); } +static int +zdb_brt_entry_compare(const void *zcn1, const void *zcn2) +{ + const dva_t *dva1 = &((const zdb_brt_entry_t *)zcn1)->zbre_dva; + const dva_t *dva2 = &((const zdb_brt_entry_t *)zcn2)->zbre_dva; + int cmp; + + cmp = TREE_CMP(DVA_GET_VDEV(dva1), DVA_GET_VDEV(dva2)); + if (cmp == 0) + cmp = TREE_CMP(DVA_GET_OFFSET(dva1), DVA_GET_OFFSET(dva2)); + + return (cmp); +} + static int dump_block_stats(spa_t *spa) { @@ -6678,6 +6742,13 @@ dump_block_stats(spa_t *spa) zcb = umem_zalloc(sizeof (zdb_cb_t), UMEM_NOFAIL); + if (spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING)) { + avl_create(&zcb->zcb_brt, zdb_brt_entry_compare, + sizeof (zdb_brt_entry_t), + offsetof(zdb_brt_entry_t, zbre_node)); + zcb->zcb_brt_is_active = B_TRUE; + } + (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n", (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "", (dump_opt['c'] == 1) ? "metadata " : "", @@ -6779,7 +6850,8 @@ dump_block_stats(spa_t *spa) metaslab_class_get_alloc(spa_special_class(spa)) + metaslab_class_get_alloc(spa_dedup_class(spa)) + get_unflushed_alloc_space(spa); - total_found = tzb->zb_asize - zcb->zcb_dedup_asize + + total_found = + tzb->zb_asize - zcb->zcb_dedup_asize - zcb->zcb_clone_asize + zcb->zcb_removing_size + zcb->zcb_checkpoint_size; if (total_found == total_alloc && !dump_opt['L']) { @@ -6820,6 +6892,9 @@ dump_block_stats(spa_t *spa) "bp deduped:", (u_longlong_t)zcb->zcb_dedup_asize, (u_longlong_t)zcb->zcb_dedup_blocks, (double)zcb->zcb_dedup_asize / tzb->zb_asize + 1.0); + (void) printf("\t%-16s %14llu count: %6llu\n", + "bp cloned:", (u_longlong_t)zcb->zcb_clone_asize, + (u_longlong_t)zcb->zcb_clone_blocks); (void) printf("\t%-16s %14llu used: %5.2f%%\n", "Normal class:", (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space); diff --git a/include/sys/brt.h b/include/sys/brt.h index 0761159e3f..f73df95058 100644 --- a/include/sys/brt.h +++ b/include/sys/brt.h @@ -36,6 +36,7 @@ extern "C" { #endif extern boolean_t brt_entry_decref(spa_t *spa, const blkptr_t *bp); +extern uint64_t brt_entry_get_refcount(spa_t *spa, const blkptr_t *bp); extern uint64_t brt_get_dspace(spa_t *spa); extern uint64_t brt_get_used(spa_t *spa); diff --git a/module/zfs/brt.c b/module/zfs/brt.c index 877b503a1b..e8218fb268 100644 --- a/module/zfs/brt.c +++ b/module/zfs/brt.c @@ -1544,6 +1544,37 @@ out: return (B_FALSE); } +uint64_t +brt_entry_get_refcount(spa_t *spa, const blkptr_t *bp) +{ + brt_t *brt = spa->spa_brt; + brt_vdev_t *brtvd; + brt_entry_t bre_search, *bre; + uint64_t vdevid, refcnt; + int error; + + brt_entry_fill(bp, &bre_search, &vdevid); + + brt_rlock(brt); + + brtvd = brt_vdev(brt, vdevid); + ASSERT(brtvd != NULL); + + bre = avl_find(&brtvd->bv_tree, &bre_search, NULL); + if (bre == NULL) { + error = brt_entry_lookup(brt, brtvd, &bre_search); + ASSERT(error == 0 || error == ENOENT); + if (error == ENOENT) + refcnt = 0; + else + refcnt = bre_search.bre_refcount; + } else + refcnt = bre->bre_refcount; + + brt_unlock(brt); + return (refcnt); +} + static void brt_prefetch(brt_t *brt, const blkptr_t *bp) {