Increase default zfs_scan_vdev_limit to 16MB

For HDD based pools the default zfs_scan_vdev_limit of 4M
per-vdev can significantly limit the maximum scrub performance.
Increasing the default to 16M can double the scrub speed from
80 MB/s per disk to 160 MB/s per disk.

This does increase the memory footprint during scrub/resilver
but given the performance win this is a reasonable trade off.
Memory usage is capped at 1/4 of arc_c_max.  Note that number
of outstanding I/Os has not changed and is still limited by
zfs_vdev_scrub_max_active.

Reviewed-by: Akash B <akash-b@hpe.com>
Reviewed-by: Tony Nguyen <tony.nguyen@delphix.com>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #14428
This commit is contained in:
Brian Behlendorf 2023-01-24 14:05:45 -08:00
parent 9fe3da9364
commit fa28e26e42
2 changed files with 17 additions and 13 deletions

View File

@ -1846,7 +1846,7 @@ When disabled, the memory limit may be exceeded by fast disks.
Freezes a scrub/resilver in progress without actually pausing it. Freezes a scrub/resilver in progress without actually pausing it.
Intended for testing/debugging. Intended for testing/debugging.
. .
.It Sy zfs_scan_vdev_limit Ns = Ns Sy 4194304 Ns B Po 4MB Pc Pq int .It Sy zfs_scan_vdev_limit Ns = Ns Sy 16777216 Ns B Po 16 MiB Pc Pq int
Maximum amount of data that can be concurrently issued at once for scrubs and Maximum amount of data that can be concurrently issued at once for scrubs and
resilvers per leaf device, given in bytes. resilvers per leaf device, given in bytes.
. .

View File

@ -37,6 +37,7 @@
#include <sys/dmu_tx.h> #include <sys/dmu_tx.h>
#include <sys/dmu_objset.h> #include <sys/dmu_objset.h>
#include <sys/arc.h> #include <sys/arc.h>
#include <sys/arc_impl.h>
#include <sys/zap.h> #include <sys/zap.h>
#include <sys/zio.h> #include <sys/zio.h>
#include <sys/zfs_context.h> #include <sys/zfs_context.h>
@ -126,7 +127,7 @@ static boolean_t scan_ds_queue_contains(dsl_scan_t *scn, uint64_t dsobj,
static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg); static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg);
static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj); static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj);
static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx); static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx);
static uint64_t dsl_scan_count_data_disks(vdev_t *vd); static uint64_t dsl_scan_count_data_disks(spa_t *spa);
extern int zfs_vdev_async_write_active_min_dirty_percent; extern int zfs_vdev_async_write_active_min_dirty_percent;
static int zfs_scan_blkstats = 0; static int zfs_scan_blkstats = 0;
@ -156,7 +157,7 @@ int zfs_scan_strict_mem_lim = B_FALSE;
* overload the drives with I/O, since that is protected by * overload the drives with I/O, since that is protected by
* zfs_vdev_scrub_max_active. * zfs_vdev_scrub_max_active.
*/ */
unsigned long zfs_scan_vdev_limit = 4 << 20; unsigned long zfs_scan_vdev_limit = 16 << 20;
int zfs_scan_issue_strategy = 0; int zfs_scan_issue_strategy = 0;
int zfs_scan_legacy = B_FALSE; /* don't queue & sort zios, go direct */ int zfs_scan_legacy = B_FALSE; /* don't queue & sort zios, go direct */
@ -459,11 +460,12 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
/* /*
* Calculate the max number of in-flight bytes for pool-wide * Calculate the max number of in-flight bytes for pool-wide
* scanning operations (minimum 1MB). Limits for the issuing * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max).
* phase are done per top-level vdev and are handled separately. * Limits for the issuing phase are done per top-level vdev and
* are handled separately.
*/ */
scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit * scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20,
dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20); zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa)));
avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t), avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t),
offsetof(scan_ds_t, sds_node)); offsetof(scan_ds_t, sds_node));
@ -2809,8 +2811,9 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
} }
static uint64_t static uint64_t
dsl_scan_count_data_disks(vdev_t *rvd) dsl_scan_count_data_disks(spa_t *spa)
{ {
vdev_t *rvd = spa->spa_root_vdev;
uint64_t i, leaves = 0; uint64_t i, leaves = 0;
for (i = 0; i < rvd->vdev_children; i++) { for (i = 0; i < rvd->vdev_children; i++) {
@ -3715,12 +3718,13 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
taskqid_t prefetch_tqid; taskqid_t prefetch_tqid;
/* /*
* Recalculate the max number of in-flight bytes for pool-wide * Calculate the max number of in-flight bytes for pool-wide
* scanning operations (minimum 1MB). Limits for the issuing * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max).
* phase are done per top-level vdev and are handled separately. * Limits for the issuing phase are done per top-level vdev and
* are handled separately.
*/ */
scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit * scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20,
dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20); zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa)));
if (scnp->scn_ddt_bookmark.ddb_class <= if (scnp->scn_ddt_bookmark.ddb_class <=
scnp->scn_ddt_class_max) { scnp->scn_ddt_class_max) {