From ce5fb2a7c6dfaa0f305350225e064e9f536bd5a4 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 18 Jan 2019 09:47:55 -0800 Subject: [PATCH] ztest: scrub verification By design ztest will never inject non-repairable damage in to the pool. Update the ztest_scrub() test case such that it waits for the scrub to complete and verifies the pool is always repairable. After enabling scrub verification two scenarios were encountered which are the result of how ztest manages failure injection. The first case is straight forward and pertains to detaching a mirror vdev. In this case, the pool must always be scrubbed prior the detach. Failure to do so can potentially lock in previously repairable data corruption by removing all good copies of a block leaving only damaged ones. The second is a little more subtle. The child/offset selection logic in ztest_fault_inject() depends on the calculated number of leaves always remaining constant between injection passes. This is true within a single execution of ztest, but when using zloop.sh random values are selected for each restart. Therefore, when ztest imports an existing pool it must be scrubbed before failure injection can be safely enabled. Otherwise it is possible that it will inject non-repairable damage. Reviewed by: Matt Ahrens Reviewed by: Tom Caputi Signed-off-by: Brian Behlendorf Closes #8269 --- cmd/ztest/ztest.c | 69 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 63 insertions(+), 6 deletions(-) diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 111ed98353..e080f7bb44 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -476,6 +476,7 @@ static ztest_ds_t *ztest_ds; static kmutex_t ztest_vdev_lock; static boolean_t ztest_device_removal_active = B_FALSE; +static boolean_t ztest_pool_scrubbed = B_FALSE; static kmutex_t ztest_checkpoint_lock; /* @@ -515,6 +516,7 @@ enum ztest_object { }; static void usage(boolean_t) __NORETURN; +static int ztest_scrub_impl(spa_t *spa); /* * These libumem hooks provide a reasonable set of defaults for the allocator's @@ -3427,10 +3429,17 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) pguid = pvd->vdev_guid; /* - * If oldvd has siblings, then half of the time, detach it. + * If oldvd has siblings, then half of the time, detach it. Prior + * to the detach the pool is scrubbed in order to prevent creating + * unrepairable blocks as a result of the data corruption injection. */ if (oldvd_has_siblings && ztest_random(2) == 0) { spa_config_exit(spa, SCL_ALL, FTAG); + + error = ztest_scrub_impl(spa); + if (error) + goto out; + error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE); if (error != 0 && error != ENODEV && error != EBUSY && error != ENOTSUP && error != ZFS_ERR_CHECKPOINT_EXISTS && @@ -5757,6 +5766,19 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) ASSERT(leaves >= 1); + /* + * While ztest is running the number of leaves will not change. This + * is critical for the fault injection logic as it determines where + * errors can be safely injected such that they are always repairable. + * + * When restarting ztest a different number of leaves may be requested + * which will shift the regions to be damaged. This is fine as long + * as the pool has been scrubbed prior to using the new mapping. + * Failure to do can result in non-repairable damage being injected. + */ + if (ztest_pool_scrubbed == B_FALSE) + goto out; + /* * Grab the name lock as reader. There are some operations * which don't like to have their vdevs changed while @@ -6113,6 +6135,32 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) umem_free(od, sizeof (ztest_od_t)); } +/* + * By design ztest will never inject uncorrectable damage in to the pool. + * Issue a scrub, wait for it to complete, and verify there is never any + * any persistent damage. + * + * Only after a full scrub has been completed is it safe to start injecting + * data corruption. See the comment in zfs_fault_inject(). + */ +static int +ztest_scrub_impl(spa_t *spa) +{ + int error = spa_scan(spa, POOL_SCAN_SCRUB); + if (error) + return (error); + + while (dsl_scan_scrubbing(spa_get_dsl(spa))) + txg_wait_synced(spa_get_dsl(spa), 0); + + if (spa_get_errlog_size(spa) > 0) + return (ECKSUM); + + ztest_pool_scrubbed = B_TRUE; + + return (0); +} + /* * Scrub the pool. */ @@ -6121,6 +6169,7 @@ void ztest_scrub(ztest_ds_t *zd, uint64_t id) { spa_t *spa = ztest_spa; + int error; /* * Scrub in progress by device removal. @@ -6128,9 +6177,16 @@ ztest_scrub(ztest_ds_t *zd, uint64_t id) if (ztest_device_removal_active) return; + /* + * Start a scrub, wait a moment, then force a restart. + */ (void) spa_scan(spa, POOL_SCAN_SCRUB); - (void) poll(NULL, 0, 100); /* wait a moment, then force a restart */ - (void) spa_scan(spa, POOL_SCAN_SCRUB); + (void) poll(NULL, 0, 100); + + error = ztest_scrub_impl(spa); + if (error == EBUSY) + error = 0; + ASSERT0(error); } /* @@ -7014,9 +7070,10 @@ ztest_run(ztest_shared_t *zs) while (spa->spa_removing_phys.sr_state == DSS_SCANNING) txg_wait_synced(spa_get_dsl(spa), 0); - (void) spa_scan(spa, POOL_SCAN_SCRUB); - while (dsl_scan_scrubbing(spa_get_dsl(spa))) - txg_wait_synced(spa_get_dsl(spa), 0); + error = ztest_scrub_impl(spa); + if (error == EBUSY) + error = 0; + ASSERT0(error); } run_threads = umem_zalloc(ztest_opts.zo_threads * sizeof (kthread_t *),