Improve resilver ETAs

When resilvering the estimated time remaining is calculated using
the average issue rate over the current pass.  Where the current
pass starts when a scan was started, or restarted, if the pool
was exported/imported.

For dRAID pools in particular this can result in wildly optimistic
estimates since the issue rate will be very high while scanning
when non-degraded regions of the pool are scanned.  Once repair
I/O starts being issued performance drops to a realistic number
but the estimated performance is still significantly skewed.

To address this we redefine a pass such that it starts after a
scanning phase completes so the issue rate is more reflective of
recent performance.  Additionally, the zfs_scan_report_txgs
module option can be set to reset the pass statistics more often.

Reviewed-by: Akash B <akash-b@hpe.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #14410
This commit is contained in:
Brian Behlendorf 2023-01-25 11:28:54 -08:00
parent a68dfdb88c
commit 9fe3da9364
4 changed files with 57 additions and 12 deletions

View File

@ -7549,19 +7549,20 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
zfs_nicebytes(ps->pss_processed, processed_buf, sizeof (processed_buf)); zfs_nicebytes(ps->pss_processed, processed_buf, sizeof (processed_buf));
assert(ps->pss_func == POOL_SCAN_SCRUB || int is_resilver = ps->pss_func == POOL_SCAN_RESILVER;
ps->pss_func == POOL_SCAN_RESILVER); int is_scrub = ps->pss_func == POOL_SCAN_SCRUB;
assert(is_resilver || is_scrub);
/* Scan is finished or canceled. */ /* Scan is finished or canceled. */
if (ps->pss_state == DSS_FINISHED) { if (ps->pss_state == DSS_FINISHED) {
secs_to_dhms(end - start, time_buf); secs_to_dhms(end - start, time_buf);
if (ps->pss_func == POOL_SCAN_SCRUB) { if (is_scrub) {
(void) printf(gettext("scrub repaired %s " (void) printf(gettext("scrub repaired %s "
"in %s with %llu errors on %s"), processed_buf, "in %s with %llu errors on %s"), processed_buf,
time_buf, (u_longlong_t)ps->pss_errors, time_buf, (u_longlong_t)ps->pss_errors,
ctime(&end)); ctime(&end));
} else if (ps->pss_func == POOL_SCAN_RESILVER) { } else if (is_resilver) {
(void) printf(gettext("resilvered %s " (void) printf(gettext("resilvered %s "
"in %s with %llu errors on %s"), processed_buf, "in %s with %llu errors on %s"), processed_buf,
time_buf, (u_longlong_t)ps->pss_errors, time_buf, (u_longlong_t)ps->pss_errors,
@ -7569,10 +7570,10 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
} }
return; return;
} else if (ps->pss_state == DSS_CANCELED) { } else if (ps->pss_state == DSS_CANCELED) {
if (ps->pss_func == POOL_SCAN_SCRUB) { if (is_scrub) {
(void) printf(gettext("scrub canceled on %s"), (void) printf(gettext("scrub canceled on %s"),
ctime(&end)); ctime(&end));
} else if (ps->pss_func == POOL_SCAN_RESILVER) { } else if (is_resilver) {
(void) printf(gettext("resilver canceled on %s"), (void) printf(gettext("resilver canceled on %s"),
ctime(&end)); ctime(&end));
} }
@ -7582,7 +7583,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
assert(ps->pss_state == DSS_SCANNING); assert(ps->pss_state == DSS_SCANNING);
/* Scan is in progress. Resilvers can't be paused. */ /* Scan is in progress. Resilvers can't be paused. */
if (ps->pss_func == POOL_SCAN_SCRUB) { if (is_scrub) {
if (pause == 0) { if (pause == 0) {
(void) printf(gettext("scrub in progress since %s"), (void) printf(gettext("scrub in progress since %s"),
ctime(&start)); ctime(&start));
@ -7592,7 +7593,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
(void) printf(gettext("\tscrub started on %s"), (void) printf(gettext("\tscrub started on %s"),
ctime(&start)); ctime(&start));
} }
} else if (ps->pss_func == POOL_SCAN_RESILVER) { } else if (is_resilver) {
(void) printf(gettext("resilver in progress since %s"), (void) printf(gettext("resilver in progress since %s"),
ctime(&start)); ctime(&start));
} }
@ -7634,17 +7635,27 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
scanned_buf, issued_buf, total_buf); scanned_buf, issued_buf, total_buf);
} }
if (ps->pss_func == POOL_SCAN_RESILVER) { if (is_resilver) {
(void) printf(gettext("\t%s resilvered, %.2f%% done"), (void) printf(gettext("\t%s resilvered, %.2f%% done"),
processed_buf, 100 * fraction_done); processed_buf, 100 * fraction_done);
} else if (ps->pss_func == POOL_SCAN_SCRUB) { } else if (is_scrub) {
(void) printf(gettext("\t%s repaired, %.2f%% done"), (void) printf(gettext("\t%s repaired, %.2f%% done"),
processed_buf, 100 * fraction_done); processed_buf, 100 * fraction_done);
} }
if (pause == 0) { if (pause == 0) {
/*
* Only provide an estimate iff:
* 1) the time remaining is valid, and
* 2) the issue rate exceeds 10 MB/s, and
* 3) it's either:
* a) a resilver which has started repairs, or
* b) a scrub which has entered the issue phase.
*/
if (total_secs_left != UINT64_MAX && if (total_secs_left != UINT64_MAX &&
issue_rate >= 10 * 1024 * 1024) { issue_rate >= 10 * 1024 * 1024 &&
((is_resilver && ps->pss_processed > 0) ||
(is_scrub && issued > 0))) {
(void) printf(gettext(", %s to go\n"), time_buf); (void) printf(gettext(", %s to go\n"), time_buf);
} else { } else {
(void) printf(gettext(", no estimated " (void) printf(gettext(", no estimated "

View File

@ -1831,6 +1831,13 @@ When we cross this limit from above it is because we are issuing verification I/
In this case (unless the metadata scan is done) we stop issuing verification I/O In this case (unless the metadata scan is done) we stop issuing verification I/O
and start scanning metadata again until we get to the hard limit. and start scanning metadata again until we get to the hard limit.
. .
.It Sy zfs_scan_report_txgs Ns = Ns Sy 0 Ns | Ns 1 Pq uint
When reporting resilver throughput and estimated completion time use the
performance observed over roughly the last
.Sy zfs_scan_report_txgs
TXGs.
When set to zero performance is calculated over the time between checkpoints.
.
.It Sy zfs_scan_strict_mem_lim Ns = Ns Sy 0 Ns | Ns 1 Pq int .It Sy zfs_scan_strict_mem_lim Ns = Ns Sy 0 Ns | Ns 1 Pq int
Enforce tight memory limits on pool scans when a sequential scan is in progress. Enforce tight memory limits on pool scans when a sequential scan is in progress.
When disabled, the memory limit may be exceeded by fast disks. When disabled, the memory limit may be exceeded by fast disks.

View File

@ -131,6 +131,15 @@ static uint64_t dsl_scan_count_data_disks(vdev_t *vd);
extern int zfs_vdev_async_write_active_min_dirty_percent; extern int zfs_vdev_async_write_active_min_dirty_percent;
static int zfs_scan_blkstats = 0; static int zfs_scan_blkstats = 0;
/*
* 'zpool status' uses bytes processed per pass to report throughput and
* estimate time remaining. We define a pass to start when the scanning
* phase completes for a sequential resilver. Optionally, this value
* may be used to reset the pass statistics every N txgs to provide an
* estimated completion time based on currently observed performance.
*/
static uint_t zfs_scan_report_txgs = 0;
/* /*
* By default zfs will check to ensure it is not over the hard memory * By default zfs will check to ensure it is not over the hard memory
* limit before each txg. If finer-grained control of this is needed * limit before each txg. If finer-grained control of this is needed
@ -584,6 +593,8 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
} }
spa_scan_stat_init(spa); spa_scan_stat_init(spa);
vdev_scan_stat_init(spa->spa_root_vdev);
return (0); return (0);
} }
@ -742,6 +753,7 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
scn->scn_last_checkpoint = 0; scn->scn_last_checkpoint = 0;
scn->scn_checkpointing = B_FALSE; scn->scn_checkpointing = B_FALSE;
spa_scan_stat_init(spa); spa_scan_stat_init(spa);
vdev_scan_stat_init(spa->spa_root_vdev);
if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max; scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max;
@ -3637,6 +3649,16 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
return; return;
} }
/*
* Disabled by default, set zfs_scan_report_txgs to report
* average performance over the last zfs_scan_report_txgs TXGs.
*/
if (!dsl_scan_is_paused_scrub(scn) && zfs_scan_report_txgs != 0 &&
tx->tx_txg % zfs_scan_report_txgs == 0) {
scn->scn_issued_before_pass += spa->spa_scan_pass_issued;
spa_scan_stat_init(spa);
}
/* /*
* It is possible to switch from unsorted to sorted at any time, * It is possible to switch from unsorted to sorted at any time,
* but afterwards the scan will remain sorted unless reloaded from * but afterwards the scan will remain sorted unless reloaded from
@ -3759,6 +3781,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
if (scn->scn_is_sorted) { if (scn->scn_is_sorted) {
scn->scn_checkpointing = B_TRUE; scn->scn_checkpointing = B_TRUE;
scn->scn_clearing = B_TRUE; scn->scn_clearing = B_TRUE;
scn->scn_issued_before_pass +=
spa->spa_scan_pass_issued;
spa_scan_stat_init(spa);
} }
zfs_dbgmsg("scan complete txg %llu", zfs_dbgmsg("scan complete txg %llu",
(longlong_t)tx->tx_txg); (longlong_t)tx->tx_txg);
@ -4485,6 +4510,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, scan_strict_mem_lim, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, scan_fill_weight, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, scan_fill_weight, INT, ZMOD_RW,
"Tunable to adjust bias towards more filled segments during scans"); "Tunable to adjust bias towards more filled segments during scans");
ZFS_MODULE_PARAM(zfs, zfs_, scan_report_txgs, UINT, ZMOD_RW,
"Tunable to report resilver performance over the last N txgs");
ZFS_MODULE_PARAM(zfs, zfs_, resilver_disable_defer, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, resilver_disable_defer, INT, ZMOD_RW,
"Process all resilvers immediately"); "Process all resilvers immediately");
/* END CSTYLED */ /* END CSTYLED */

View File

@ -2564,7 +2564,6 @@ spa_scan_stat_init(spa_t *spa)
spa->spa_scan_pass_scrub_spent_paused = 0; spa->spa_scan_pass_scrub_spent_paused = 0;
spa->spa_scan_pass_exam = 0; spa->spa_scan_pass_exam = 0;
spa->spa_scan_pass_issued = 0; spa->spa_scan_pass_issued = 0;
vdev_scan_stat_init(spa->spa_root_vdev);
} }
/* /*