From c161360dce0897794d8004cd0dd37fd27aabe261 Mon Sep 17 00:00:00 2001 From: John Poduska Date: Wed, 13 May 2020 13:54:27 -0400 Subject: [PATCH] Resilver restarts unnecessarily when it encounters errors When a resilver finishes, vdev_dtl_reassess is called to hopefully excise DTL_MISSING (amongst other things). If there are errors during the resilver, they are tracked in DTL_SCRUB, as spelled out in the block comment in vdev.c. DTL_SCRUB is in-core only, so it can only be used if the pool was online for the whole resilver. This state is tracked with the spa_scrub_started flag, which only gets set when the scan is initialized. Unfortunately, this flag gets cleared right before vdev_dtl_reassess gets called, so if there are any errors during the scan, DTL_MISSING will never get excised and the resilver will just continually restart. This fix simply moves clearing that flag until after the call to vdev_dtl_reasses. In addition, if a pool is imported and already has scn_errors > 0, this change will restart the resilver immediately instead of doing the rest of the scan and then restarting it from the beginning. On the other hand, if scn_errors == 0 at import, then no errors have been encountered so far, so the spa_scrub_started flag can be safely set. A test has been added to verify that resilver does not restart when relevant DTL's are available. Reviewed-by: Brian Behlendorf Reviewed-by: Paul Zuchowski Signed-off-by: John Poduska Closes #10291 --- module/zfs/dsl_scan.c | 23 +++- module/zfs/vdev.c | 22 +++- tests/runfiles/linux.run | 2 +- .../tests/functional/resilver/Makefile.am | 3 +- .../resilver/resilver_restart_002.ksh | 102 ++++++++++++++++++ 5 files changed, 148 insertions(+), 4 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/resilver/resilver_restart_002.ksh diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 26900523e5..cf8379100d 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -540,6 +540,22 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg) zfs_dbgmsg("new-style scrub was modified " "by old software; restarting in txg %llu", (longlong_t)scn->scn_restart_txg); + } else if (dsl_scan_resilvering(dp)) { + /* + * If a resilver is in progress and there are already + * errors, restart it instead of finishing this scan and + * then restarting it. If there haven't been any errors + * then remember that the incore DTL is valid. + */ + if (scn->scn_phys.scn_errors > 0) { + scn->scn_restart_txg = txg; + zfs_dbgmsg("resilver can't excise DTL_MISSING " + "when finished; restarting in txg %llu", + (u_longlong_t)scn->scn_restart_txg); + } else { + /* it's safe to excise DTL when finished */ + spa->spa_scrub_started = B_TRUE; + } } } @@ -881,7 +897,6 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) "errors=%llu", spa_get_errlog_size(spa)); if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { - spa->spa_scrub_started = B_FALSE; spa->spa_scrub_active = B_FALSE; /* @@ -908,6 +923,12 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) } spa_errlog_rotate(spa); + /* + * Don't clear flag until after vdev_dtl_reassess to ensure that + * DTL_MISSING will get updated when possible. + */ + spa->spa_scrub_started = B_FALSE; + /* * We may have finished replacing a device. * Let the async thread assess this and handle the detach. diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index a68c0dfa73..f16e630860 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -2566,7 +2566,6 @@ vdev_dtl_should_excise(vdev_t *vd) spa_t *spa = vd->vdev_spa; dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan; - ASSERT0(scn->scn_phys.scn_errors); ASSERT0(vd->vdev_children); if (vd->vdev_state < VDEV_STATE_DEGRADED) @@ -2617,6 +2616,7 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done) if (vd->vdev_ops->vdev_op_leaf) { dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan; + boolean_t wasempty = B_TRUE; mutex_enter(&vd->vdev_dtl_lock); @@ -2626,6 +2626,18 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done) if (zfs_scan_ignore_errors && scn) scn->scn_phys.scn_errors = 0; + if (scrub_txg != 0 && + !range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) { + wasempty = B_FALSE; + zfs_dbgmsg("guid:%llu txg:%llu scrub:%llu started:%d " + "dtl:%llu/%llu errors:%llu", + (u_longlong_t)vd->vdev_guid, (u_longlong_t)txg, + (u_longlong_t)scrub_txg, spa->spa_scrub_started, + (u_longlong_t)vdev_dtl_min(vd), + (u_longlong_t)vdev_dtl_max(vd), + (u_longlong_t)(scn ? scn->scn_phys.scn_errors : 0)); + } + /* * If we've completed a scan cleanly then determine * if this vdev should remove any DTLs. We only want to @@ -2662,6 +2674,14 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done) space_reftree_generate_map(&reftree, vd->vdev_dtl[DTL_MISSING], 1); space_reftree_destroy(&reftree); + + if (!range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) { + zfs_dbgmsg("update DTL_MISSING:%llu/%llu", + (u_longlong_t)vdev_dtl_min(vd), + (u_longlong_t)vdev_dtl_max(vd)); + } else if (!wasempty) { + zfs_dbgmsg("DTL_MISSING is now empty"); + } } range_tree_vacate(vd->vdev_dtl[DTL_PARTIAL], NULL, NULL); range_tree_walk(vd->vdev_dtl[DTL_MISSING], diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 107e18f4ec..d4c4bf4845 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -797,7 +797,7 @@ tests = ['reservation_001_pos', 'reservation_002_pos', 'reservation_003_pos', tags = ['functional', 'reservation'] [tests/functional/resilver] -tests = ['resilver_restart_001'] +tests = ['resilver_restart_001', 'resilver_restart_002'] tags = ['functional', 'resilver'] [tests/functional/rootpool] diff --git a/tests/zfs-tests/tests/functional/resilver/Makefile.am b/tests/zfs-tests/tests/functional/resilver/Makefile.am index 465d8f3a3a..38136a843a 100644 --- a/tests/zfs-tests/tests/functional/resilver/Makefile.am +++ b/tests/zfs-tests/tests/functional/resilver/Makefile.am @@ -2,7 +2,8 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/resilver dist_pkgdata_SCRIPTS = \ setup.ksh \ cleanup.ksh \ - resilver_restart_001.ksh + resilver_restart_001.ksh \ + resilver_restart_002.ksh dist_pkgdata_DATA = \ resilver.cfg diff --git a/tests/zfs-tests/tests/functional/resilver/resilver_restart_002.ksh b/tests/zfs-tests/tests/functional/resilver/resilver_restart_002.ksh new file mode 100755 index 0000000000..9ea3215a4b --- /dev/null +++ b/tests/zfs-tests/tests/functional/resilver/resilver_restart_002.ksh @@ -0,0 +1,102 @@ +#!/bin/ksh -p + +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2020, Datto Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/resilver/resilver.cfg + +# +# DESCRIPTION: +# Testing resilver completes when scan errors are encountered, but relevant +# DTL's have not been lost. +# +# STRATEGY: +# 1. Create a pool (1k recordsize) +# 2. Create a 32m file (32k records) +# 3. Inject an error halfway through the file +# 4. Start a resilver, ensure the error is triggered and that the resilver +# does not restart after finishing +# +# NB: use legacy scanning to ensure scan of specific block causes error +# + +function cleanup +{ + log_must zinject -c all + destroy_pool $TESTPOOL + rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE + log_must set_tunable32 zfs_scan_legacy $ORIG_SCAN_LEGACY +} + +log_assert "Check for resilver restarts caused by scan errors" + +ORIG_SCAN_LEGACY=$(get_tunable zfs_scan_legacy) + +log_onexit cleanup + +# use legacy scan to ensure injected error will be triggered +log_must set_tunable32 zfs_scan_legacy 1 + + # create the pool and a 32M file (32k blocks) +log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[0]} $SPARE_VDEV_FILE +log_must zpool create -f -O recordsize=1k $TESTPOOL ${VDEV_FILES[0]} +log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=1M count=32 > /dev/null 2>&1 + +# determine objset/object +objset=$(zdb -d $TESTPOOL/ | sed -ne 's/.*ID \([0-9]*\).*/\1/p') +object=$(ls -i /$TESTPOOL/file | awk '{print $1}') + +# inject event to cause error during resilver +log_must zinject -b `printf "%x:%x:0:3fff" $objset $object` $TESTPOOL + +# clear events and start resilver +log_must zpool events -c +log_must zpool attach $TESTPOOL ${VDEV_FILES[0]} $SPARE_VDEV_FILE + +log_note "waiting for read errors to start showing up" +for iter in {0..59} +do + zpool sync $TESTPOOL + err=$(zpool status $TESTPOOL | grep ${VDEV_FILES[0]} | awk '{print $3}') + (( $err > 0 )) && break + sleep 1 +done + +(( $err == 0 )) && log_fail "Unable to induce errors in resilver" + +log_note "waiting for resilver to finish" +for iter in {0..59} +do + finish=$(zpool events | grep "sysevent.fs.zfs.resilver_finish" | wc -l) + (( $finish > 0 )) && break + sleep 1 +done + +(( $finish == 0 )) && log_fail "resilver took too long to finish" + +# wait a few syncs to ensure that zfs does not restart the resilver +log_must zpool sync $TESTPOOL +log_must zpool sync $TESTPOOL + +# check if resilver was restarted +start=$(zpool events | grep "sysevent.fs.zfs.resilver_start" | wc -l) +(( $start != 1 )) && log_fail "resilver restarted unnecessarily" + +log_pass "Resilver did not restart unnecessarily from scan errors"