This commit is contained in:
Pavel Snajdr 2024-08-28 22:54:48 -05:00 committed by GitHub
commit 5c6bc0e6ee
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 56 additions and 19 deletions

View File

@ -1961,6 +1961,13 @@ Ignore the
feature, causing an operation that would start a resilver to feature, causing an operation that would start a resilver to
immediately restart the one in progress. immediately restart the one in progress.
. .
.It Sy zfs_resilver_defer_percent Ns = Ns Sy 10 Ns % Pq uint
If the ongoing resilver progress is below this threshold, a new resilver will
restart from scratch instead of being deferred after the current one finishes,
even if the
.Sy resilver_defer
feature is enabled.
.
.It Sy zfs_resilver_min_time_ms Ns = Ns Sy 3000 Ns ms Po 3 s Pc Pq uint .It Sy zfs_resilver_min_time_ms Ns = Ns Sy 3000 Ns ms Po 3 s Pc Pq uint
Resilvers are processed by the sync thread. Resilvers are processed by the sync thread.
While resilvering, it will spend at least this much time While resilvering, it will spend at least this much time

View File

@ -212,6 +212,9 @@ static uint64_t zfs_max_async_dedup_frees = 100000;
/* set to disable resilver deferring */ /* set to disable resilver deferring */
static int zfs_resilver_disable_defer = B_FALSE; static int zfs_resilver_disable_defer = B_FALSE;
/* Don't defer a resilver if the one in progress only got this far: */
static uint_t zfs_resilver_defer_percent = 10;
/* /*
* We wait a few txgs after importing a pool to begin scanning so that * We wait a few txgs after importing a pool to begin scanning so that
* the import / mounting code isn't held up by scrub / resilver IO. * the import / mounting code isn't held up by scrub / resilver IO.
@ -4287,35 +4290,49 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
dsl_scan_t *scn = dp->dp_scan; dsl_scan_t *scn = dp->dp_scan;
spa_t *spa = dp->dp_spa; spa_t *spa = dp->dp_spa;
state_sync_type_t sync_type = SYNC_OPTIONAL; state_sync_type_t sync_type = SYNC_OPTIONAL;
uint64_t to_issue, issued;
int restart_early;
if (spa->spa_resilver_deferred && if (spa->spa_resilver_deferred &&
!spa_feature_is_active(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)) !spa_feature_is_active(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER))
spa_feature_incr(spa, SPA_FEATURE_RESILVER_DEFER, tx); spa_feature_incr(spa, SPA_FEATURE_RESILVER_DEFER, tx);
/*
* Check for scn_restart_txg before checking spa_load_state, so
* that we can restart an old-style scan while the pool is being
* imported (see dsl_scan_init). We also restart scans if there
* is a deferred resilver and the user has manually disabled
* deferred resilvers via the tunable.
*/
if (dsl_scan_restarting(scn, tx) ||
(spa->spa_resilver_deferred && zfs_resilver_disable_defer)) {
pool_scan_func_t func = POOL_SCAN_SCRUB;
dsl_scan_done(scn, B_FALSE, tx);
if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
func = POOL_SCAN_RESILVER;
zfs_dbgmsg("restarting scan func=%u on %s txg=%llu",
func, dp->dp_spa->spa_name, (longlong_t)tx->tx_txg);
dsl_scan_setup_sync(&func, tx);
}
/* /*
* Only process scans in sync pass 1. * Only process scans in sync pass 1.
*/ */
if (spa_sync_pass(spa) > 1) if (spa_sync_pass(spa) > 1)
return; return;
/*
* issued/to_issue as presented to the user
* in print_scan_scrub_resilver_status() issued/total_i
* @ cmd/zpool/zpool_main.c
*/
to_issue = scn->scn_phys.scn_to_examine - scn->scn_phys.scn_skipped;
issued = scn->scn_issued_before_pass + spa->spa_scan_pass_issued;
restart_early = spa->spa_resilver_deferred && (
zfs_resilver_disable_defer ||
(issued < (to_issue * zfs_resilver_defer_percent / 100)));
/*
* Check for scn_restart_txg before checking spa_load_state, so
* that we can restart an old-style scan while the pool is being
* imported (see dsl_scan_init). We also restart scans if there
* is a deferred resilver and the user has manually disabled
* deferred resilvers via zfs_resilver_disable_defer, or if the
* curent scan progress is below zfs_resilver_defer_percent.
*/
if (dsl_scan_restarting(scn, tx) || restart_early) {
pool_scan_func_t func = POOL_SCAN_SCRUB;
dsl_scan_done(scn, B_FALSE, tx);
if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
func = POOL_SCAN_RESILVER;
zfs_dbgmsg("restarting scan func=%u on %s txg=%llu early=%d",
func, dp->dp_spa->spa_name, (longlong_t)tx->tx_txg,
restart_early);
dsl_scan_setup_sync(&func, tx);
}
/* /*
* If the spa is shutting down, then stop scanning. This will * If the spa is shutting down, then stop scanning. This will
* ensure that the scan does not dirty any new data during the * ensure that the scan does not dirty any new data during the
@ -5285,6 +5302,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, scan_report_txgs, UINT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, resilver_disable_defer, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, resilver_disable_defer, INT, ZMOD_RW,
"Process all resilvers immediately"); "Process all resilvers immediately");
ZFS_MODULE_PARAM(zfs, zfs_, resilver_defer_percent, UINT, ZMOD_RW,
"Issued IO percent complete after which resilvers are deferred");
ZFS_MODULE_PARAM(zfs, zfs_, scrub_error_blocks_per_txg, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, scrub_error_blocks_per_txg, UINT, ZMOD_RW,
"Error blocks to be scrubbed in one txg"); "Error blocks to be scrubbed in one txg");
/* END CSTYLED */ /* END CSTYLED */

View File

@ -73,6 +73,7 @@ REBUILD_SCRUB_ENABLED rebuild_scrub_enabled zfs_rebuild_scrub_enabled
REMOVAL_SUSPEND_PROGRESS removal_suspend_progress zfs_removal_suspend_progress REMOVAL_SUSPEND_PROGRESS removal_suspend_progress zfs_removal_suspend_progress
REMOVE_MAX_SEGMENT remove_max_segment zfs_remove_max_segment REMOVE_MAX_SEGMENT remove_max_segment zfs_remove_max_segment
RESILVER_MIN_TIME_MS resilver_min_time_ms zfs_resilver_min_time_ms RESILVER_MIN_TIME_MS resilver_min_time_ms zfs_resilver_min_time_ms
RESILVER_DEFER_PERCENT resilver_defer_percent zfs_resilver_defer_percent
SCAN_LEGACY scan_legacy zfs_scan_legacy SCAN_LEGACY scan_legacy zfs_scan_legacy
SCAN_SUSPEND_PROGRESS scan_suspend_progress zfs_scan_suspend_progress SCAN_SUSPEND_PROGRESS scan_suspend_progress zfs_scan_suspend_progress
SCAN_VDEV_LIMIT scan_vdev_limit zfs_scan_vdev_limit SCAN_VDEV_LIMIT scan_vdev_limit zfs_scan_vdev_limit

View File

@ -96,6 +96,8 @@ set -A RESTARTS -- '1' '2' '2' '2'
set -A VDEVS -- '' '' '' '' set -A VDEVS -- '' '' '' ''
set -A DEFER_RESTARTS -- '1' '1' '1' '2' set -A DEFER_RESTARTS -- '1' '1' '1' '2'
set -A DEFER_VDEVS -- '-' '2' '2' '-' set -A DEFER_VDEVS -- '-' '2' '2' '-'
set -A EARLY_RESTART_DEFER_RESTARTS -- '1' '2' '2' '2'
set -A EARLY_RESTART_DEFER_VDEVS -- '' '' '' ''
VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE" VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE"
@ -125,7 +127,7 @@ done
wait wait
# test without and with deferred resilve feature enabled # test without and with deferred resilve feature enabled
for test in "without" "with" for test in "without" "with" "with_early_restart"
do do
log_note "Testing $test deferred resilvers" log_note "Testing $test deferred resilvers"
@ -135,6 +137,13 @@ do
RESTARTS=( "${DEFER_RESTARTS[@]}" ) RESTARTS=( "${DEFER_RESTARTS[@]}" )
VDEVS=( "${DEFER_VDEVS[@]}" ) VDEVS=( "${DEFER_VDEVS[@]}" )
VDEV_REPLACE="$SPARE_VDEV_FILE ${VDEV_FILES[1]}" VDEV_REPLACE="$SPARE_VDEV_FILE ${VDEV_FILES[1]}"
log_must set_tunable32 RESILVER_DEFER_PERCENT 0
elif [[ $test == "with_early_restart" ]]
then
RESTARTS=( "${EARLY_RESTART_DEFER_RESTARTS[@]}" )
VDEVS=( "${EARLY_RESTART_DEFER_VDEVS[@]}" )
VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE"
log_must set_tunable32 RESILVER_DEFER_PERCENT 100
fi fi
# clear the events # clear the events