zdb -L should skip leak detection altogether
Currently the point of -L option in zdb is to disable leak tracing and the loading of space maps because they are expensive, yet still do leak detection in terms of space. Unfortunately, there is a scenario where this is a lie. If we are using zdb -L on a pool where a vdev is being removed, zdb_claim_removing() will open the metaslab space maps of that device. This patch makes it so zdb -L skips leak detection altogether and ensures that no space maps are loaded. Reviewed-by: Matt Ahrens <mahrens@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Serapheim Dimitropoulos <serapheim@delphix.com> Closes #8335
This commit is contained in:
parent
466f55334a
commit
21e7cf5da8
255
cmd/zdb/zdb.c
255
cmd/zdb/zdb.c
|
@ -799,12 +799,15 @@ dump_spacemap(objset_t *os, space_map_t *sm)
|
|||
(void) printf(" smp_alloc = 0x%llx\n",
|
||||
(longlong_t)sm->sm_phys->smp_alloc);
|
||||
|
||||
if (dump_opt['d'] < 6 && dump_opt['m'] < 4)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Print out the freelist entries in both encoded and decoded form.
|
||||
*/
|
||||
uint8_t mapshift = sm->sm_shift;
|
||||
int64_t alloc = 0;
|
||||
uint64_t word;
|
||||
uint64_t word, entry_id = 0;
|
||||
for (uint64_t offset = 0; offset < space_map_length(sm);
|
||||
offset += sizeof (word)) {
|
||||
|
||||
|
@ -812,11 +815,12 @@ dump_spacemap(objset_t *os, space_map_t *sm)
|
|||
sizeof (word), &word, DMU_READ_PREFETCH));
|
||||
|
||||
if (sm_entry_is_debug(word)) {
|
||||
(void) printf("\t [%6llu] %s: txg %llu, pass %llu\n",
|
||||
(u_longlong_t)(offset / sizeof (word)),
|
||||
(void) printf("\t [%6llu] %s: txg %llu pass %llu\n",
|
||||
(u_longlong_t)entry_id,
|
||||
ddata[SM_DEBUG_ACTION_DECODE(word)],
|
||||
(u_longlong_t)SM_DEBUG_TXG_DECODE(word),
|
||||
(u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word));
|
||||
entry_id++;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -854,7 +858,7 @@ dump_spacemap(objset_t *os, space_map_t *sm)
|
|||
|
||||
(void) printf("\t [%6llu] %c range:"
|
||||
" %010llx-%010llx size: %06llx vdev: %06llu words: %u\n",
|
||||
(u_longlong_t)(offset / sizeof (word)),
|
||||
(u_longlong_t)entry_id,
|
||||
entry_type, (u_longlong_t)entry_off,
|
||||
(u_longlong_t)(entry_off + entry_run),
|
||||
(u_longlong_t)entry_run,
|
||||
|
@ -864,6 +868,7 @@ dump_spacemap(objset_t *os, space_map_t *sm)
|
|||
alloc += entry_run;
|
||||
else
|
||||
alloc -= entry_run;
|
||||
entry_id++;
|
||||
}
|
||||
if ((uint64_t)alloc != space_map_allocated(sm)) {
|
||||
(void) printf("space_map_object alloc (%lld) INCONSISTENT "
|
||||
|
@ -929,11 +934,8 @@ dump_metaslab(metaslab_t *msp)
|
|||
SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
|
||||
}
|
||||
|
||||
if (dump_opt['d'] > 5 || dump_opt['m'] > 3) {
|
||||
ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
|
||||
|
||||
dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
|
||||
}
|
||||
ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
|
||||
dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -3599,6 +3601,9 @@ claim_segment_cb(void *arg, uint64_t offset, uint64_t size)
|
|||
static void
|
||||
zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb)
|
||||
{
|
||||
if (dump_opt['L'])
|
||||
return;
|
||||
|
||||
if (spa->spa_vdev_removal == NULL)
|
||||
return;
|
||||
|
||||
|
@ -3708,6 +3713,8 @@ zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
|
|||
int error;
|
||||
int p;
|
||||
|
||||
ASSERT(!dump_opt['L']);
|
||||
|
||||
bzero(&ddb, sizeof (ddb));
|
||||
while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
|
||||
blkptr_t blk;
|
||||
|
@ -3731,12 +3738,10 @@ zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
|
|||
zcb->zcb_dedup_blocks++;
|
||||
}
|
||||
}
|
||||
if (!dump_opt['L']) {
|
||||
ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
|
||||
ddt_enter(ddt);
|
||||
VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
|
||||
ddt_exit(ddt);
|
||||
}
|
||||
ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
|
||||
ddt_enter(ddt);
|
||||
VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
|
||||
ddt_exit(ddt);
|
||||
}
|
||||
|
||||
ASSERT(error == ENOENT);
|
||||
|
@ -3840,6 +3845,8 @@ zdb_leak_init_vdev_exclude_checkpoint(vdev_t *vd, zdb_cb_t *zcb)
|
|||
static void
|
||||
zdb_leak_init_exclude_checkpoint(spa_t *spa, zdb_cb_t *zcb)
|
||||
{
|
||||
ASSERT(!dump_opt['L']);
|
||||
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
for (uint64_t c = 0; c < rvd->vdev_children; c++) {
|
||||
ASSERT3U(c, ==, rvd->vdev_child[c]->vdev_id);
|
||||
|
@ -3936,6 +3943,8 @@ load_indirect_ms_allocatable_tree(vdev_t *vd, metaslab_t *msp,
|
|||
static void
|
||||
zdb_leak_init_prepare_indirect_vdevs(spa_t *spa, zdb_cb_t *zcb)
|
||||
{
|
||||
ASSERT(!dump_opt['L']);
|
||||
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
for (uint64_t c = 0; c < rvd->vdev_children; c++) {
|
||||
vdev_t *vd = rvd->vdev_child[c];
|
||||
|
@ -3982,67 +3991,63 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
|
|||
{
|
||||
zcb->zcb_spa = spa;
|
||||
|
||||
if (!dump_opt['L']) {
|
||||
dsl_pool_t *dp = spa->spa_dsl_pool;
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
if (dump_opt['L'])
|
||||
return;
|
||||
|
||||
/*
|
||||
* We are going to be changing the meaning of the metaslab's
|
||||
* ms_allocatable. Ensure that the allocator doesn't try to
|
||||
* use the tree.
|
||||
*/
|
||||
spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
|
||||
spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
|
||||
dsl_pool_t *dp = spa->spa_dsl_pool;
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
|
||||
zcb->zcb_vd_obsolete_counts =
|
||||
umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
|
||||
UMEM_NOFAIL);
|
||||
/*
|
||||
* We are going to be changing the meaning of the metaslab's
|
||||
* ms_allocatable. Ensure that the allocator doesn't try to
|
||||
* use the tree.
|
||||
*/
|
||||
spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
|
||||
spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
|
||||
|
||||
/*
|
||||
* For leak detection, we overload the ms_allocatable trees
|
||||
* to contain allocated segments instead of free segments.
|
||||
* As a result, we can't use the normal metaslab_load/unload
|
||||
* interfaces.
|
||||
*/
|
||||
zdb_leak_init_prepare_indirect_vdevs(spa, zcb);
|
||||
load_concrete_ms_allocatable_trees(spa, SM_ALLOC);
|
||||
zcb->zcb_vd_obsolete_counts =
|
||||
umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
|
||||
UMEM_NOFAIL);
|
||||
|
||||
/*
|
||||
* On load_concrete_ms_allocatable_trees() we loaded all the
|
||||
* allocated entries from the ms_sm to the ms_allocatable for
|
||||
* each metaslab. If the pool has a checkpoint or is in the
|
||||
* middle of discarding a checkpoint, some of these blocks
|
||||
* may have been freed but their ms_sm may not have been
|
||||
* updated because they are referenced by the checkpoint. In
|
||||
* order to avoid false-positives during leak-detection, we
|
||||
* go through the vdev's checkpoint space map and exclude all
|
||||
* its entries from their relevant ms_allocatable.
|
||||
*
|
||||
* We also aggregate the space held by the checkpoint and add
|
||||
* it to zcb_checkpoint_size.
|
||||
*
|
||||
* Note that at this point we are also verifying that all the
|
||||
* entries on the checkpoint_sm are marked as allocated in
|
||||
* the ms_sm of their relevant metaslab.
|
||||
* [see comment in checkpoint_sm_exclude_entry_cb()]
|
||||
*/
|
||||
zdb_leak_init_exclude_checkpoint(spa, zcb);
|
||||
/*
|
||||
* For leak detection, we overload the ms_allocatable trees
|
||||
* to contain allocated segments instead of free segments.
|
||||
* As a result, we can't use the normal metaslab_load/unload
|
||||
* interfaces.
|
||||
*/
|
||||
zdb_leak_init_prepare_indirect_vdevs(spa, zcb);
|
||||
load_concrete_ms_allocatable_trees(spa, SM_ALLOC);
|
||||
|
||||
/* for cleaner progress output */
|
||||
(void) fprintf(stderr, "\n");
|
||||
/*
|
||||
* On load_concrete_ms_allocatable_trees() we loaded all the
|
||||
* allocated entries from the ms_sm to the ms_allocatable for
|
||||
* each metaslab. If the pool has a checkpoint or is in the
|
||||
* middle of discarding a checkpoint, some of these blocks
|
||||
* may have been freed but their ms_sm may not have been
|
||||
* updated because they are referenced by the checkpoint. In
|
||||
* order to avoid false-positives during leak-detection, we
|
||||
* go through the vdev's checkpoint space map and exclude all
|
||||
* its entries from their relevant ms_allocatable.
|
||||
*
|
||||
* We also aggregate the space held by the checkpoint and add
|
||||
* it to zcb_checkpoint_size.
|
||||
*
|
||||
* Note that at this point we are also verifying that all the
|
||||
* entries on the checkpoint_sm are marked as allocated in
|
||||
* the ms_sm of their relevant metaslab.
|
||||
* [see comment in checkpoint_sm_exclude_entry_cb()]
|
||||
*/
|
||||
zdb_leak_init_exclude_checkpoint(spa, zcb);
|
||||
ASSERT3U(zcb->zcb_checkpoint_size, ==, spa_get_checkpoint_space(spa));
|
||||
|
||||
if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
|
||||
ASSERT(spa_feature_is_enabled(spa,
|
||||
SPA_FEATURE_DEVICE_REMOVAL));
|
||||
(void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj,
|
||||
increment_indirect_mapping_cb, zcb, NULL);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* If leak tracing is disabled, we still need to consider
|
||||
* any checkpointed space in our space verification.
|
||||
*/
|
||||
zcb->zcb_checkpoint_size += spa_get_checkpoint_space(spa);
|
||||
/* for cleaner progress output */
|
||||
(void) fprintf(stderr, "\n");
|
||||
|
||||
if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
|
||||
ASSERT(spa_feature_is_enabled(spa,
|
||||
SPA_FEATURE_DEVICE_REMOVAL));
|
||||
(void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj,
|
||||
increment_indirect_mapping_cb, zcb, NULL);
|
||||
}
|
||||
|
||||
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
|
||||
|
@ -4125,51 +4130,54 @@ zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb)
|
|||
static boolean_t
|
||||
zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb)
|
||||
{
|
||||
if (dump_opt['L'])
|
||||
return (B_FALSE);
|
||||
|
||||
boolean_t leaks = B_FALSE;
|
||||
if (!dump_opt['L']) {
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
for (unsigned c = 0; c < rvd->vdev_children; c++) {
|
||||
vdev_t *vd = rvd->vdev_child[c];
|
||||
ASSERTV(metaslab_group_t *mg = vd->vdev_mg);
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
for (unsigned c = 0; c < rvd->vdev_children; c++) {
|
||||
vdev_t *vd = rvd->vdev_child[c];
|
||||
ASSERTV(metaslab_group_t *mg = vd->vdev_mg);
|
||||
|
||||
if (zcb->zcb_vd_obsolete_counts[c] != NULL) {
|
||||
leaks |= zdb_check_for_obsolete_leaks(vd, zcb);
|
||||
}
|
||||
|
||||
for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
|
||||
metaslab_t *msp = vd->vdev_ms[m];
|
||||
ASSERT3P(mg, ==, msp->ms_group);
|
||||
|
||||
/*
|
||||
* ms_allocatable has been overloaded
|
||||
* to contain allocated segments. Now that
|
||||
* we finished traversing all blocks, any
|
||||
* block that remains in the ms_allocatable
|
||||
* represents an allocated block that we
|
||||
* did not claim during the traversal.
|
||||
* Claimed blocks would have been removed
|
||||
* from the ms_allocatable. For indirect
|
||||
* vdevs, space remaining in the tree
|
||||
* represents parts of the mapping that are
|
||||
* not referenced, which is not a bug.
|
||||
*/
|
||||
if (vd->vdev_ops == &vdev_indirect_ops) {
|
||||
range_tree_vacate(msp->ms_allocatable,
|
||||
NULL, NULL);
|
||||
} else {
|
||||
range_tree_vacate(msp->ms_allocatable,
|
||||
zdb_leak, vd);
|
||||
}
|
||||
|
||||
if (msp->ms_loaded)
|
||||
msp->ms_loaded = B_FALSE;
|
||||
}
|
||||
if (zcb->zcb_vd_obsolete_counts[c] != NULL) {
|
||||
leaks |= zdb_check_for_obsolete_leaks(vd, zcb);
|
||||
}
|
||||
|
||||
umem_free(zcb->zcb_vd_obsolete_counts,
|
||||
rvd->vdev_children * sizeof (uint32_t *));
|
||||
zcb->zcb_vd_obsolete_counts = NULL;
|
||||
for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
|
||||
metaslab_t *msp = vd->vdev_ms[m];
|
||||
ASSERT3P(mg, ==, msp->ms_group);
|
||||
|
||||
/*
|
||||
* ms_allocatable has been overloaded
|
||||
* to contain allocated segments. Now that
|
||||
* we finished traversing all blocks, any
|
||||
* block that remains in the ms_allocatable
|
||||
* represents an allocated block that we
|
||||
* did not claim during the traversal.
|
||||
* Claimed blocks would have been removed
|
||||
* from the ms_allocatable. For indirect
|
||||
* vdevs, space remaining in the tree
|
||||
* represents parts of the mapping that are
|
||||
* not referenced, which is not a bug.
|
||||
*/
|
||||
if (vd->vdev_ops == &vdev_indirect_ops) {
|
||||
range_tree_vacate(msp->ms_allocatable,
|
||||
NULL, NULL);
|
||||
} else {
|
||||
range_tree_vacate(msp->ms_allocatable,
|
||||
zdb_leak, vd);
|
||||
}
|
||||
|
||||
if (msp->ms_loaded) {
|
||||
msp->ms_loaded = B_FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
umem_free(zcb->zcb_vd_obsolete_counts,
|
||||
rvd->vdev_children * sizeof (uint32_t *));
|
||||
zcb->zcb_vd_obsolete_counts = NULL;
|
||||
|
||||
return (leaks);
|
||||
}
|
||||
|
||||
|
@ -4210,12 +4218,16 @@ dump_block_stats(spa_t *spa)
|
|||
!dump_opt['L'] ? "nothing leaked " : "");
|
||||
|
||||
/*
|
||||
* Load all space maps as SM_ALLOC maps, then traverse the pool
|
||||
* claiming each block we discover. If the pool is perfectly
|
||||
* consistent, the space maps will be empty when we're done.
|
||||
* Anything left over is a leak; any block we can't claim (because
|
||||
* it's not part of any space map) is a double allocation,
|
||||
* reference to a freed block, or an unclaimed log block.
|
||||
* When leak detection is enabled we load all space maps as SM_ALLOC
|
||||
* maps, then traverse the pool claiming each block we discover. If
|
||||
* the pool is perfectly consistent, the segment trees will be empty
|
||||
* when we're done. Anything left over is a leak; any block we can't
|
||||
* claim (because it's not part of any space map) is a double
|
||||
* allocation, reference to a freed block, or an unclaimed log block.
|
||||
*
|
||||
* When leak detection is disabled (-L option) we still traverse the
|
||||
* pool claiming each block we discover, but we skip opening any space
|
||||
* maps.
|
||||
*/
|
||||
bzero(&zcb, sizeof (zdb_cb_t));
|
||||
zdb_leak_init(spa, &zcb);
|
||||
|
@ -4296,11 +4308,10 @@ dump_block_stats(spa_t *spa)
|
|||
total_found = tzb->zb_asize - zcb.zcb_dedup_asize +
|
||||
zcb.zcb_removing_size + zcb.zcb_checkpoint_size;
|
||||
|
||||
if (total_found == total_alloc) {
|
||||
if (!dump_opt['L'])
|
||||
(void) printf("\n\tNo leaks (block sum matches space"
|
||||
" maps exactly)\n");
|
||||
} else {
|
||||
if (total_found == total_alloc && !dump_opt['L']) {
|
||||
(void) printf("\n\tNo leaks (block sum matches space"
|
||||
" maps exactly)\n");
|
||||
} else if (!dump_opt['L']) {
|
||||
(void) printf("block traversal size %llu != alloc %llu "
|
||||
"(%s %lld)\n",
|
||||
(u_longlong_t)total_found,
|
||||
|
@ -5022,6 +5033,8 @@ verify_checkpoint_ms_spacemaps(spa_t *checkpoint, spa_t *current)
|
|||
static void
|
||||
verify_checkpoint_blocks(spa_t *spa)
|
||||
{
|
||||
ASSERT(!dump_opt['L']);
|
||||
|
||||
spa_t *checkpoint_spa;
|
||||
char *checkpoint_pool;
|
||||
nvlist_t *config = NULL;
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
.\"
|
||||
.\"
|
||||
.\" Copyright 2012, Richard Lowe.
|
||||
.\" Copyright (c) 2012, 2017 by Delphix. All rights reserved.
|
||||
.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
||||
.\" Copyright 2017 Nexenta Systems, Inc.
|
||||
.\" Copyright (c) 2017 Lawrence Livermore National Security, LLC.
|
||||
.\" Copyright (c) 2017 Intel Corporation.
|
||||
|
@ -194,7 +194,7 @@ If the
|
|||
option is also specified, also display the uberblocks on this device. Specify
|
||||
multiple times to increase verbosity.
|
||||
.It Fl L
|
||||
Disable leak tracing and the loading of space maps.
|
||||
Disable leak detection and the loading of space maps.
|
||||
By default,
|
||||
.Nm
|
||||
verifies that all non-free blocks are referenced, which can be very expensive.
|
||||
|
|
Loading…
Reference in New Issue