From f8b3efbf30cae26401227fadc2833e273628a0d7 Mon Sep 17 00:00:00 2001 From: Pavel Snajdr Date: Tue, 23 Jan 2024 19:16:10 +0100 Subject: [PATCH 1/8] Defer resilver only when progress is above a threshold Restart a resilver from scratch, if the current one in progress is below a new tunable, zfs_resilver_defer_percent (defaulting to 10%). The original rationale for deferring additional resilvers, when there is already one in progress, was to help achieving data redundancy sooner for the data that gets scanned at the end of the resilver. But in case the admin wants to attach multiple disks to a single vdev, it wasn't immediately obvious the admin is supposed to run `zpool resilver` afterwards to reset the deferred resilvers and start a new one from scratch. Signed-off-by: Pavel Snajdr --- man/man4/zfs.4 | 7 +++++++ module/zfs/dsl_scan.c | 38 ++++++++++++++++++++++++++++++++------ 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index 30c168253f..c554cfc512 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -1797,6 +1797,13 @@ Ignore the feature, causing an operation that would start a resilver to immediately restart the one in progress. . +.It Sy zfs_resilver_defer_percent Ns = Ns Sy 10 Ns | Ns % Pq uint +If the ongoing resilver progress is below this threshold, a new resilver will +restart from scratch instead of being deferred after the current one finishes, +even if the +.Sy resilver_defer +feature is enabled. +. .It Sy zfs_resilver_min_time_ms Ns = Ns Sy 3000 Ns ms Po 3 s Pc Pq uint Resilvers are processed by the sync thread. While resilvering, it will spend at least this much time diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 060a5cc36d..c7ae183558 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -212,6 +212,9 @@ static uint64_t zfs_max_async_dedup_frees = 100000; /* set to disable resilver deferring */ static int zfs_resilver_disable_defer = B_FALSE; +/* Don't defer a resilver if the one in progress only got this far: */ +static uint_t zfs_resilver_defer_percent = 10; + /* * We wait a few txgs after importing a pool to begin scanning so that * the import / mounting code isn't held up by scrub / resilver IO. @@ -4260,19 +4263,39 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) dsl_scan_t *scn = dp->dp_scan; spa_t *spa = dp->dp_spa; state_sync_type_t sync_type = SYNC_OPTIONAL; - - if (spa->spa_resilver_deferred && - !spa_feature_is_active(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)) - spa_feature_incr(spa, SPA_FEATURE_RESILVER_DEFER, tx); + uint64_t to_issue, issued; + int restart_early; /* * Check for scn_restart_txg before checking spa_load_state, so * that we can restart an old-style scan while the pool is being * imported (see dsl_scan_init). We also restart scans if there * is a deferred resilver and the user has manually disabled - * deferred resilvers via the tunable. + * deferred resilvers via the tunable, or if the curent scan progress + * is below zfs_resilver_defer_percent. */ - if (dsl_scan_restarting(scn, tx) || + + /* + * Taken from spa_misc.c spa_scan_get_stats(): + */ + to_issue = scn->scn_phys.scn_to_examine - scn->scn_phys.scn_skipped; + issued = scn->scn_issued_before_pass + spa->spa_scan_pass_issued; + + /* + * Make sure we're not in a restart loop and check the threshold + */ + restart_early = spa->spa_resilver_deferred && + !dsl_scan_restarting(scn, tx) && + (issued < (to_issue * zfs_resilver_defer_percent / 100)); + + /* + * Only increment the feature if we're not about to restart early + */ + if (!restart_early && spa->spa_resilver_deferred && + !spa_feature_is_active(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)) + spa_feature_incr(spa, SPA_FEATURE_RESILVER_DEFER, tx); + + if (dsl_scan_restarting(scn, tx) || restart_early || (spa->spa_resilver_deferred && zfs_resilver_disable_defer)) { pool_scan_func_t func = POOL_SCAN_SCRUB; dsl_scan_done(scn, B_FALSE, tx); @@ -5258,6 +5281,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, scan_report_txgs, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, resilver_disable_defer, INT, ZMOD_RW, "Process all resilvers immediately"); +ZFS_MODULE_PARAM(zfs, zfs_, resilver_defer_percent, UINT, ZMOD_RW, + "Issued IO percent complete after which resilvers are deferred"); + ZFS_MODULE_PARAM(zfs, zfs_, scrub_error_blocks_per_txg, UINT, ZMOD_RW, "Error blocks to be scrubbed in one txg"); /* END CSTYLED */ From 8100df022b44f98ba4310b1dec6e08d2b81b2ec6 Mon Sep 17 00:00:00 2001 From: Pavel Snajdr Date: Wed, 24 Jan 2024 21:16:44 +0100 Subject: [PATCH 2/8] Defer resilver only when progress is above a threshold v2 Fix the restart_early condition to avoid restart loop on pool export. --- module/zfs/dsl_scan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index c7ae183558..4bd36c336b 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -4284,7 +4284,8 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) /* * Make sure we're not in a restart loop and check the threshold */ - restart_early = spa->spa_resilver_deferred && + restart_early = (spa_sync_pass(spa) == 1) && + spa->spa_resilver_deferred && !dsl_scan_restarting(scn, tx) && (issued < (to_issue * zfs_resilver_defer_percent / 100)); From 42cc3904aacb0762c6927e305acf30965c49e7c5 Mon Sep 17 00:00:00 2001 From: Pavel Snajdr Date: Thu, 25 Jan 2024 01:40:54 +0100 Subject: [PATCH 3/8] fixup replacement/resilver_restart_001 --- tests/zfs-tests/include/tunables.cfg | 1 + .../functional/replacement/resilver_restart_001.ksh | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg index a619b846dd..82a7cac76a 100644 --- a/tests/zfs-tests/include/tunables.cfg +++ b/tests/zfs-tests/include/tunables.cfg @@ -68,6 +68,7 @@ REBUILD_SCRUB_ENABLED rebuild_scrub_enabled zfs_rebuild_scrub_enabled REMOVAL_SUSPEND_PROGRESS removal_suspend_progress zfs_removal_suspend_progress REMOVE_MAX_SEGMENT remove_max_segment zfs_remove_max_segment RESILVER_MIN_TIME_MS resilver_min_time_ms zfs_resilver_min_time_ms +RESILVER_DEFER_PERCENT resilver_defer_percent zfs_resilver_defer_percent SCAN_LEGACY scan_legacy zfs_scan_legacy SCAN_SUSPEND_PROGRESS scan_suspend_progress zfs_scan_suspend_progress SCAN_VDEV_LIMIT scan_vdev_limit zfs_scan_vdev_limit diff --git a/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh b/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh index b498ba4af7..629870adf9 100755 --- a/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh +++ b/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh @@ -96,6 +96,8 @@ set -A RESTARTS -- '1' '2' '2' '2' set -A VDEVS -- '' '' '' '' set -A DEFER_RESTARTS -- '1' '1' '1' '2' set -A DEFER_VDEVS -- '-' '2' '2' '-' +set -A EARLY_RESTART_DEFER_RESTARTS -- '1' '2' '2' '2' +set -A EARLY_RESTART_DEFER_VDEVS -- '' '' '' '' VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE" @@ -125,7 +127,7 @@ done wait # test without and with deferred resilve feature enabled -for test in "without" "with" +for test in "without" "with" "with_early_restart" do log_note "Testing $test deferred resilvers" @@ -135,6 +137,13 @@ do RESTARTS=( "${DEFER_RESTARTS[@]}" ) VDEVS=( "${DEFER_VDEVS[@]}" ) VDEV_REPLACE="$SPARE_VDEV_FILE ${VDEV_FILES[1]}" + log_must set_tunable32 RESILVER_DEFER_PERCENT 0 + elif [[ $test == "with_early_restart" ]] + then + RESTARTS=( "${EARLY_RESTART_DEFER_RESTARTS[@]}" ) + VDEVS=( "${EARLY_RESTART_DEFER_VDEVS[@]}" ) + VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE" + log_must set_tunable32 RESILVER_DEFER_PERCENT 100 fi # clear the events From 94e4a29191f89d4857314f9bb4337edf043721dc Mon Sep 17 00:00:00 2001 From: Pavel Snajdr Date: Thu, 25 Jan 2024 01:43:23 +0100 Subject: [PATCH 4/8] Defer resilver only when progress is above a threshold v3 --- module/zfs/dsl_scan.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 4bd36c336b..b7e14709a0 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -4286,7 +4286,6 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) */ restart_early = (spa_sync_pass(spa) == 1) && spa->spa_resilver_deferred && - !dsl_scan_restarting(scn, tx) && (issued < (to_issue * zfs_resilver_defer_percent / 100)); /* @@ -4302,8 +4301,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) dsl_scan_done(scn, B_FALSE, tx); if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) func = POOL_SCAN_RESILVER; - zfs_dbgmsg("restarting scan func=%u on %s txg=%llu", - func, dp->dp_spa->spa_name, (longlong_t)tx->tx_txg); + zfs_dbgmsg("restarting scan func=%u on %s txg=%llu early=%d", + func, dp->dp_spa->spa_name, (longlong_t)tx->tx_txg, + restart_early); dsl_scan_setup_sync(&func, tx); } From dfe3b5e147395827a3140a94e24fd394f3a186f0 Mon Sep 17 00:00:00 2001 From: Pavel Snajdr Date: Thu, 25 Jan 2024 01:55:13 +0100 Subject: [PATCH 5/8] fix manpage --- man/man4/zfs.4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index c554cfc512..abb49f4d5b 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -1797,7 +1797,7 @@ Ignore the feature, causing an operation that would start a resilver to immediately restart the one in progress. . -.It Sy zfs_resilver_defer_percent Ns = Ns Sy 10 Ns | Ns % Pq uint +.It Sy zfs_resilver_defer_percent Ns = Ns Sy 10 Ns % Pq uint If the ongoing resilver progress is below this threshold, a new resilver will restart from scratch instead of being deferred after the current one finishes, even if the From 4383355e5125a296d036c350bfa04a243951bc07 Mon Sep 17 00:00:00 2001 From: Pavel Snajdr Date: Thu, 25 Jan 2024 03:13:14 +0100 Subject: [PATCH 6/8] Refactor dsl_scan_sync a bit In hope the result is more readable. Signed-off-by: Pavel Snajdr --- module/zfs/dsl_scan.c | 75 ++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 41 deletions(-) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index b7e14709a0..38d252e850 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -4266,47 +4266,6 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) uint64_t to_issue, issued; int restart_early; - /* - * Check for scn_restart_txg before checking spa_load_state, so - * that we can restart an old-style scan while the pool is being - * imported (see dsl_scan_init). We also restart scans if there - * is a deferred resilver and the user has manually disabled - * deferred resilvers via the tunable, or if the curent scan progress - * is below zfs_resilver_defer_percent. - */ - - /* - * Taken from spa_misc.c spa_scan_get_stats(): - */ - to_issue = scn->scn_phys.scn_to_examine - scn->scn_phys.scn_skipped; - issued = scn->scn_issued_before_pass + spa->spa_scan_pass_issued; - - /* - * Make sure we're not in a restart loop and check the threshold - */ - restart_early = (spa_sync_pass(spa) == 1) && - spa->spa_resilver_deferred && - (issued < (to_issue * zfs_resilver_defer_percent / 100)); - - /* - * Only increment the feature if we're not about to restart early - */ - if (!restart_early && spa->spa_resilver_deferred && - !spa_feature_is_active(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)) - spa_feature_incr(spa, SPA_FEATURE_RESILVER_DEFER, tx); - - if (dsl_scan_restarting(scn, tx) || restart_early || - (spa->spa_resilver_deferred && zfs_resilver_disable_defer)) { - pool_scan_func_t func = POOL_SCAN_SCRUB; - dsl_scan_done(scn, B_FALSE, tx); - if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) - func = POOL_SCAN_RESILVER; - zfs_dbgmsg("restarting scan func=%u on %s txg=%llu early=%d", - func, dp->dp_spa->spa_name, (longlong_t)tx->tx_txg, - restart_early); - dsl_scan_setup_sync(&func, tx); - } - /* * Only process scans in sync pass 1. */ @@ -4327,6 +4286,40 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) if (!scn->scn_async_stalled && !dsl_scan_active(scn)) return; + /* + * issued/to_issue as presented to the user + * in print_scan_scrub_resilver_status() issued/total_i + * @ cmd/zpool/zpool_main.c + */ + to_issue = scn->scn_phys.scn_to_examine - scn->scn_phys.scn_skipped; + issued = scn->scn_issued_before_pass + spa->spa_scan_pass_issued; + restart_early = spa->spa_resilver_deferred && ( + zfs_resilver_disable_defer || + (issued < (to_issue * zfs_resilver_defer_percent / 100))); + + if (spa->spa_resilver_deferred && + !spa_feature_is_active(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)) + spa_feature_incr(spa, SPA_FEATURE_RESILVER_DEFER, tx); + + /* + * Check for scn_restart_txg before checking spa_load_state, so + * that we can restart an old-style scan while the pool is being + * imported (see dsl_scan_init). We also restart scans if there + * is a deferred resilver and the user has manually disabled + * deferred resilvers via zfs_resilver_disable_defer, or if the + * curent scan progress is below zfs_resilver_defer_percent. + */ + if (dsl_scan_restarting(scn, tx) || restart_early) { + pool_scan_func_t func = POOL_SCAN_SCRUB; + dsl_scan_done(scn, B_FALSE, tx); + if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) + func = POOL_SCAN_RESILVER; + zfs_dbgmsg("restarting scan func=%u on %s txg=%llu early=%d", + func, dp->dp_spa->spa_name, (longlong_t)tx->tx_txg, + restart_early); + dsl_scan_setup_sync(&func, tx); + } + /* reset scan statistics */ scn->scn_visited_this_txg = 0; scn->scn_dedup_frees_this_txg = 0; From 82998466205f78ae02af534bac09705d947b870c Mon Sep 17 00:00:00 2001 From: Pavel Snajdr Date: Sun, 28 Jan 2024 22:48:58 +0100 Subject: [PATCH 7/8] always make sure the feature is active as I can't reproduce the zfeature.c assert I'm going to take a blind guess and see if this is what is making it unhappy Signed-off-by: Pavel Snajdr --- module/zfs/dsl_scan.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 38d252e850..80e5d8b628 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -4266,6 +4266,10 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) uint64_t to_issue, issued; int restart_early; + if (spa->spa_resilver_deferred && + !spa_feature_is_active(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)) + spa_feature_incr(spa, SPA_FEATURE_RESILVER_DEFER, tx); + /* * Only process scans in sync pass 1. */ @@ -4297,10 +4301,6 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) zfs_resilver_disable_defer || (issued < (to_issue * zfs_resilver_defer_percent / 100))); - if (spa->spa_resilver_deferred && - !spa_feature_is_active(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)) - spa_feature_incr(spa, SPA_FEATURE_RESILVER_DEFER, tx); - /* * Check for scn_restart_txg before checking spa_load_state, so * that we can restart an old-style scan while the pool is being From 26818ada364dc78d150127be6f42464cb0aa0122 Mon Sep 17 00:00:00 2001 From: Pavel Snajdr Date: Sun, 4 Feb 2024 17:14:39 +0100 Subject: [PATCH 8/8] maybe I can't reorder it that much --- module/zfs/dsl_scan.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 80e5d8b628..ea570da369 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -4276,20 +4276,6 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) if (spa_sync_pass(spa) > 1) return; - /* - * If the spa is shutting down, then stop scanning. This will - * ensure that the scan does not dirty any new data during the - * shutdown phase. - */ - if (spa_shutting_down(spa)) - return; - - /* - * If the scan is inactive due to a stalled async destroy, try again. - */ - if (!scn->scn_async_stalled && !dsl_scan_active(scn)) - return; - /* * issued/to_issue as presented to the user * in print_scan_scrub_resilver_status() issued/total_i @@ -4320,6 +4306,20 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) dsl_scan_setup_sync(&func, tx); } + /* + * If the spa is shutting down, then stop scanning. This will + * ensure that the scan does not dirty any new data during the + * shutdown phase. + */ + if (spa_shutting_down(spa)) + return; + + /* + * If the scan is inactive due to a stalled async destroy, try again. + */ + if (!scn->scn_async_stalled && !dsl_scan_active(scn)) + return; + /* reset scan statistics */ scn->scn_visited_this_txg = 0; scn->scn_dedup_frees_this_txg = 0;