Increase default zfs_rebuild_vdev_limit to 64MB
When testing distributed rebuild performance with more capable hardware it was observed than increasing the zfs_rebuild_vdev_limit to 64M reduced the rebuild time by 17%. Beyond 64MB there was some improvement (~2%) but it was not significant when weighed against the increased memory usage. Memory usage is capped at 1/4 of arc_c_max. Additionally, vr_bytes_inflight_max has been moved so it's updated per-metaslab to allow the size to be adjust while a rebuild is running. Reviewed-by: Akash B <akash-b@hpe.com> Reviewed-by: Tony Nguyen <tony.nguyen@delphix.com> Reviewed-by: Alexander Motin <mav@FreeBSD.org> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #14428
This commit is contained in:
parent
c0aea7cf4e
commit
973934b965
|
@ -1769,7 +1769,7 @@ completes in order to verify the checksums of all blocks which have been
|
||||||
resilvered.
|
resilvered.
|
||||||
This is enabled by default and strongly recommended.
|
This is enabled by default and strongly recommended.
|
||||||
.
|
.
|
||||||
.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 33554432 Ns B Po 32 MiB Pc Pq u64
|
.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq u64
|
||||||
Maximum amount of I/O that can be concurrently issued for a sequential
|
Maximum amount of I/O that can be concurrently issued for a sequential
|
||||||
resilver per leaf device, given in bytes.
|
resilver per leaf device, given in bytes.
|
||||||
.
|
.
|
||||||
|
|
|
@ -34,6 +34,7 @@
|
||||||
#include <sys/zio.h>
|
#include <sys/zio.h>
|
||||||
#include <sys/dmu_tx.h>
|
#include <sys/dmu_tx.h>
|
||||||
#include <sys/arc.h>
|
#include <sys/arc.h>
|
||||||
|
#include <sys/arc_impl.h>
|
||||||
#include <sys/zap.h>
|
#include <sys/zap.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -116,13 +117,12 @@ static uint64_t zfs_rebuild_max_segment = 1024 * 1024;
|
||||||
* segment size is also large (zfs_rebuild_max_segment=1M). This helps keep
|
* segment size is also large (zfs_rebuild_max_segment=1M). This helps keep
|
||||||
* the queue depth short.
|
* the queue depth short.
|
||||||
*
|
*
|
||||||
* 32MB was selected as the default value to achieve good performance with
|
* 64MB was observed to deliver the best performance and set as the default.
|
||||||
* a large 90-drive dRAID HDD configuration (draid2:8d:90c:2s). A sequential
|
* Testing was performed with a 106-drive dRAID HDD pool (draid2:11d:106c)
|
||||||
* rebuild was unable to saturate all of the drives using smaller values.
|
* and a rebuild rate of 1.2GB/s was measured to the distribute spare.
|
||||||
* With a value of 32MB the sequential resilver write rate was measured at
|
* Smaller values were unable to fully saturate the available pool I/O.
|
||||||
* 800MB/s sustained while rebuilding to a distributed spare.
|
|
||||||
*/
|
*/
|
||||||
static uint64_t zfs_rebuild_vdev_limit = 32 << 20;
|
static uint64_t zfs_rebuild_vdev_limit = 64 << 20;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Automatically start a pool scrub when the last active sequential resilver
|
* Automatically start a pool scrub when the last active sequential resilver
|
||||||
|
@ -754,6 +754,7 @@ vdev_rebuild_thread(void *arg)
|
||||||
{
|
{
|
||||||
vdev_t *vd = arg;
|
vdev_t *vd = arg;
|
||||||
spa_t *spa = vd->vdev_spa;
|
spa_t *spa = vd->vdev_spa;
|
||||||
|
vdev_t *rvd = spa->spa_root_vdev;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -786,9 +787,6 @@ vdev_rebuild_thread(void *arg)
|
||||||
vr->vr_pass_bytes_scanned = 0;
|
vr->vr_pass_bytes_scanned = 0;
|
||||||
vr->vr_pass_bytes_issued = 0;
|
vr->vr_pass_bytes_issued = 0;
|
||||||
|
|
||||||
vr->vr_bytes_inflight_max = MAX(1ULL << 20,
|
|
||||||
zfs_rebuild_vdev_limit * vd->vdev_children);
|
|
||||||
|
|
||||||
uint64_t update_est_time = gethrtime();
|
uint64_t update_est_time = gethrtime();
|
||||||
vdev_rebuild_update_bytes_est(vd, 0);
|
vdev_rebuild_update_bytes_est(vd, 0);
|
||||||
|
|
||||||
|
@ -804,6 +802,17 @@ vdev_rebuild_thread(void *arg)
|
||||||
metaslab_t *msp = vd->vdev_ms[i];
|
metaslab_t *msp = vd->vdev_ms[i];
|
||||||
vr->vr_scan_msp = msp;
|
vr->vr_scan_msp = msp;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Calculate the max number of in-flight bytes for top-level
|
||||||
|
* vdev scanning operations (minimum 1MB, maximum 1/4 of
|
||||||
|
* arc_c_max shared by all top-level vdevs). Limits for the
|
||||||
|
* issuing phase are done per top-level vdev and are handled
|
||||||
|
* separately.
|
||||||
|
*/
|
||||||
|
uint64_t limit = (arc_c_max / 4) / MAX(rvd->vdev_children, 1);
|
||||||
|
vr->vr_bytes_inflight_max = MIN(limit, MAX(1ULL << 20,
|
||||||
|
zfs_rebuild_vdev_limit * vd->vdev_children));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Removal of vdevs from the vdev tree may eliminate the need
|
* Removal of vdevs from the vdev tree may eliminate the need
|
||||||
* for the rebuild, in which case it should be canceled. The
|
* for the rebuild, in which case it should be canceled. The
|
||||||
|
|
Loading…
Reference in New Issue