zdb: include cloned blocks in block statistics
This gives `zdb -b` support for clone blocks. Previously, it didn't know what clones were, so would count their space allocation multiple times and then report leaked space (or, in debug, would assert trying to claim blocks a second time). This commit fixes those bugs, and reports the number of clones and the space "used" (saved) by them. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Kay Pedersen <mail@mkwg.de> Signed-off-by: Rob Norris <rob.norris@klarasystems.com> Sponsored-By: OpenDrives Inc. Sponsored-By: Klara Inc. Closes #15123
This commit is contained in:
parent
a21ca18d4d
commit
114a39964f
|
@ -79,6 +79,7 @@
|
||||||
#include <sys/dsl_crypt.h>
|
#include <sys/dsl_crypt.h>
|
||||||
#include <sys/dsl_scan.h>
|
#include <sys/dsl_scan.h>
|
||||||
#include <sys/btree.h>
|
#include <sys/btree.h>
|
||||||
|
#include <sys/brt.h>
|
||||||
#include <zfs_comutil.h>
|
#include <zfs_comutil.h>
|
||||||
#include <sys/zstd/zstd.h>
|
#include <sys/zstd/zstd.h>
|
||||||
|
|
||||||
|
@ -5342,12 +5343,20 @@ static const char *zdb_ot_extname[] = {
|
||||||
#define ZB_TOTAL DN_MAX_LEVELS
|
#define ZB_TOTAL DN_MAX_LEVELS
|
||||||
#define SPA_MAX_FOR_16M (SPA_MAXBLOCKSHIFT+1)
|
#define SPA_MAX_FOR_16M (SPA_MAXBLOCKSHIFT+1)
|
||||||
|
|
||||||
|
typedef struct zdb_brt_entry {
|
||||||
|
dva_t zbre_dva;
|
||||||
|
uint64_t zbre_refcount;
|
||||||
|
avl_node_t zbre_node;
|
||||||
|
} zdb_brt_entry_t;
|
||||||
|
|
||||||
typedef struct zdb_cb {
|
typedef struct zdb_cb {
|
||||||
zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
|
zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
|
||||||
uint64_t zcb_removing_size;
|
uint64_t zcb_removing_size;
|
||||||
uint64_t zcb_checkpoint_size;
|
uint64_t zcb_checkpoint_size;
|
||||||
uint64_t zcb_dedup_asize;
|
uint64_t zcb_dedup_asize;
|
||||||
uint64_t zcb_dedup_blocks;
|
uint64_t zcb_dedup_blocks;
|
||||||
|
uint64_t zcb_clone_asize;
|
||||||
|
uint64_t zcb_clone_blocks;
|
||||||
uint64_t zcb_psize_count[SPA_MAX_FOR_16M];
|
uint64_t zcb_psize_count[SPA_MAX_FOR_16M];
|
||||||
uint64_t zcb_lsize_count[SPA_MAX_FOR_16M];
|
uint64_t zcb_lsize_count[SPA_MAX_FOR_16M];
|
||||||
uint64_t zcb_asize_count[SPA_MAX_FOR_16M];
|
uint64_t zcb_asize_count[SPA_MAX_FOR_16M];
|
||||||
|
@ -5368,6 +5377,8 @@ typedef struct zdb_cb {
|
||||||
int zcb_haderrors;
|
int zcb_haderrors;
|
||||||
spa_t *zcb_spa;
|
spa_t *zcb_spa;
|
||||||
uint32_t **zcb_vd_obsolete_counts;
|
uint32_t **zcb_vd_obsolete_counts;
|
||||||
|
avl_tree_t zcb_brt;
|
||||||
|
boolean_t zcb_brt_is_active;
|
||||||
} zdb_cb_t;
|
} zdb_cb_t;
|
||||||
|
|
||||||
/* test if two DVA offsets from same vdev are within the same metaslab */
|
/* test if two DVA offsets from same vdev are within the same metaslab */
|
||||||
|
@ -5662,6 +5673,45 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||||
zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp);
|
zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp);
|
||||||
zcb->zcb_asize_total += BP_GET_ASIZE(bp);
|
zcb->zcb_asize_total += BP_GET_ASIZE(bp);
|
||||||
|
|
||||||
|
if (zcb->zcb_brt_is_active && brt_maybe_exists(zcb->zcb_spa, bp)) {
|
||||||
|
/*
|
||||||
|
* Cloned blocks are special. We need to count them, so we can
|
||||||
|
* later uncount them when reporting leaked space, and we must
|
||||||
|
* only claim them them once.
|
||||||
|
*
|
||||||
|
* To do this, we keep our own in-memory BRT. For each block
|
||||||
|
* we haven't seen before, we look it up in the real BRT and
|
||||||
|
* if its there, we note it and its refcount then proceed as
|
||||||
|
* normal. If we see the block again, we count it as a clone
|
||||||
|
* and then give it no further consideration.
|
||||||
|
*/
|
||||||
|
zdb_brt_entry_t zbre_search, *zbre;
|
||||||
|
avl_index_t where;
|
||||||
|
|
||||||
|
zbre_search.zbre_dva = bp->blk_dva[0];
|
||||||
|
zbre = avl_find(&zcb->zcb_brt, &zbre_search, &where);
|
||||||
|
if (zbre != NULL) {
|
||||||
|
zcb->zcb_clone_asize += BP_GET_ASIZE(bp);
|
||||||
|
zcb->zcb_clone_blocks++;
|
||||||
|
|
||||||
|
zbre->zbre_refcount--;
|
||||||
|
if (zbre->zbre_refcount == 0) {
|
||||||
|
avl_remove(&zcb->zcb_brt, zbre);
|
||||||
|
umem_free(zbre, sizeof (zdb_brt_entry_t));
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t crefcnt = brt_entry_get_refcount(zcb->zcb_spa, bp);
|
||||||
|
if (crefcnt > 0) {
|
||||||
|
zbre = umem_zalloc(sizeof (zdb_brt_entry_t),
|
||||||
|
UMEM_NOFAIL);
|
||||||
|
zbre->zbre_dva = bp->blk_dva[0];
|
||||||
|
zbre->zbre_refcount = crefcnt;
|
||||||
|
avl_insert(&zcb->zcb_brt, zbre, where);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (dump_opt['L'])
|
if (dump_opt['L'])
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -6664,6 +6714,20 @@ deleted_livelists_dump_mos(spa_t *spa)
|
||||||
iterate_deleted_livelists(spa, dump_livelist_cb, NULL);
|
iterate_deleted_livelists(spa, dump_livelist_cb, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
zdb_brt_entry_compare(const void *zcn1, const void *zcn2)
|
||||||
|
{
|
||||||
|
const dva_t *dva1 = &((const zdb_brt_entry_t *)zcn1)->zbre_dva;
|
||||||
|
const dva_t *dva2 = &((const zdb_brt_entry_t *)zcn2)->zbre_dva;
|
||||||
|
int cmp;
|
||||||
|
|
||||||
|
cmp = TREE_CMP(DVA_GET_VDEV(dva1), DVA_GET_VDEV(dva2));
|
||||||
|
if (cmp == 0)
|
||||||
|
cmp = TREE_CMP(DVA_GET_OFFSET(dva1), DVA_GET_OFFSET(dva2));
|
||||||
|
|
||||||
|
return (cmp);
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
dump_block_stats(spa_t *spa)
|
dump_block_stats(spa_t *spa)
|
||||||
{
|
{
|
||||||
|
@ -6678,6 +6742,13 @@ dump_block_stats(spa_t *spa)
|
||||||
|
|
||||||
zcb = umem_zalloc(sizeof (zdb_cb_t), UMEM_NOFAIL);
|
zcb = umem_zalloc(sizeof (zdb_cb_t), UMEM_NOFAIL);
|
||||||
|
|
||||||
|
if (spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING)) {
|
||||||
|
avl_create(&zcb->zcb_brt, zdb_brt_entry_compare,
|
||||||
|
sizeof (zdb_brt_entry_t),
|
||||||
|
offsetof(zdb_brt_entry_t, zbre_node));
|
||||||
|
zcb->zcb_brt_is_active = B_TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
|
(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
|
||||||
(dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
|
(dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
|
||||||
(dump_opt['c'] == 1) ? "metadata " : "",
|
(dump_opt['c'] == 1) ? "metadata " : "",
|
||||||
|
@ -6779,7 +6850,8 @@ dump_block_stats(spa_t *spa)
|
||||||
metaslab_class_get_alloc(spa_special_class(spa)) +
|
metaslab_class_get_alloc(spa_special_class(spa)) +
|
||||||
metaslab_class_get_alloc(spa_dedup_class(spa)) +
|
metaslab_class_get_alloc(spa_dedup_class(spa)) +
|
||||||
get_unflushed_alloc_space(spa);
|
get_unflushed_alloc_space(spa);
|
||||||
total_found = tzb->zb_asize - zcb->zcb_dedup_asize +
|
total_found =
|
||||||
|
tzb->zb_asize - zcb->zcb_dedup_asize - zcb->zcb_clone_asize +
|
||||||
zcb->zcb_removing_size + zcb->zcb_checkpoint_size;
|
zcb->zcb_removing_size + zcb->zcb_checkpoint_size;
|
||||||
|
|
||||||
if (total_found == total_alloc && !dump_opt['L']) {
|
if (total_found == total_alloc && !dump_opt['L']) {
|
||||||
|
@ -6820,6 +6892,9 @@ dump_block_stats(spa_t *spa)
|
||||||
"bp deduped:", (u_longlong_t)zcb->zcb_dedup_asize,
|
"bp deduped:", (u_longlong_t)zcb->zcb_dedup_asize,
|
||||||
(u_longlong_t)zcb->zcb_dedup_blocks,
|
(u_longlong_t)zcb->zcb_dedup_blocks,
|
||||||
(double)zcb->zcb_dedup_asize / tzb->zb_asize + 1.0);
|
(double)zcb->zcb_dedup_asize / tzb->zb_asize + 1.0);
|
||||||
|
(void) printf("\t%-16s %14llu count: %6llu\n",
|
||||||
|
"bp cloned:", (u_longlong_t)zcb->zcb_clone_asize,
|
||||||
|
(u_longlong_t)zcb->zcb_clone_blocks);
|
||||||
(void) printf("\t%-16s %14llu used: %5.2f%%\n", "Normal class:",
|
(void) printf("\t%-16s %14llu used: %5.2f%%\n", "Normal class:",
|
||||||
(u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
|
(u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
|
||||||
|
|
||||||
|
|
|
@ -36,6 +36,7 @@ extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern boolean_t brt_entry_decref(spa_t *spa, const blkptr_t *bp);
|
extern boolean_t brt_entry_decref(spa_t *spa, const blkptr_t *bp);
|
||||||
|
extern uint64_t brt_entry_get_refcount(spa_t *spa, const blkptr_t *bp);
|
||||||
|
|
||||||
extern uint64_t brt_get_dspace(spa_t *spa);
|
extern uint64_t brt_get_dspace(spa_t *spa);
|
||||||
extern uint64_t brt_get_used(spa_t *spa);
|
extern uint64_t brt_get_used(spa_t *spa);
|
||||||
|
|
|
@ -1544,6 +1544,37 @@ out:
|
||||||
return (B_FALSE);
|
return (B_FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t
|
||||||
|
brt_entry_get_refcount(spa_t *spa, const blkptr_t *bp)
|
||||||
|
{
|
||||||
|
brt_t *brt = spa->spa_brt;
|
||||||
|
brt_vdev_t *brtvd;
|
||||||
|
brt_entry_t bre_search, *bre;
|
||||||
|
uint64_t vdevid, refcnt;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
brt_entry_fill(bp, &bre_search, &vdevid);
|
||||||
|
|
||||||
|
brt_rlock(brt);
|
||||||
|
|
||||||
|
brtvd = brt_vdev(brt, vdevid);
|
||||||
|
ASSERT(brtvd != NULL);
|
||||||
|
|
||||||
|
bre = avl_find(&brtvd->bv_tree, &bre_search, NULL);
|
||||||
|
if (bre == NULL) {
|
||||||
|
error = brt_entry_lookup(brt, brtvd, &bre_search);
|
||||||
|
ASSERT(error == 0 || error == ENOENT);
|
||||||
|
if (error == ENOENT)
|
||||||
|
refcnt = 0;
|
||||||
|
else
|
||||||
|
refcnt = bre_search.bre_refcount;
|
||||||
|
} else
|
||||||
|
refcnt = bre->bre_refcount;
|
||||||
|
|
||||||
|
brt_unlock(brt);
|
||||||
|
return (refcnt);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
brt_prefetch(brt_t *brt, const blkptr_t *bp)
|
brt_prefetch(brt_t *brt, const blkptr_t *bp)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue