Reduce ZIO io_lock contention on sorted scrub
During sorted scrub multiple threads (one per vdev) are issuing many ZIOs same time, all using the same scn->scn_zio_root ZIO as parent. It causes huge lock contention on the single global lock on that ZIO. Improve it by introducing per-queue null ZIOs, children to that one, and using them instead as proxy. For 12 SSD pool storing 1.5TB of 4KB blocks on 80-core system this dramatically reduces lock contention and reduces scrub time from 21 minutes down to 12.5, while actual read stages (not scan) are about 3x faster, reaching 100K blocks per second per vdev. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored-By: iXsystems, Inc. Closes #13553
This commit is contained in:
parent
813e15f28c
commit
916d9de158
|
@ -279,6 +279,7 @@ typedef struct scan_io {
|
||||||
struct dsl_scan_io_queue {
|
struct dsl_scan_io_queue {
|
||||||
dsl_scan_t *q_scn; /* associated dsl_scan_t */
|
dsl_scan_t *q_scn; /* associated dsl_scan_t */
|
||||||
vdev_t *q_vd; /* top-level vdev that this queue represents */
|
vdev_t *q_vd; /* top-level vdev that this queue represents */
|
||||||
|
zio_t *q_zio; /* scn_zio_root child for waiting on IO */
|
||||||
|
|
||||||
/* trees used for sorting I/Os and extents of I/Os */
|
/* trees used for sorting I/Os and extents of I/Os */
|
||||||
range_tree_t *q_exts_by_addr;
|
range_tree_t *q_exts_by_addr;
|
||||||
|
@ -3021,15 +3022,19 @@ scan_io_queues_run_one(void *arg)
|
||||||
dsl_scan_io_queue_t *queue = arg;
|
dsl_scan_io_queue_t *queue = arg;
|
||||||
kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;
|
kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;
|
||||||
boolean_t suspended = B_FALSE;
|
boolean_t suspended = B_FALSE;
|
||||||
range_seg_t *rs = NULL;
|
range_seg_t *rs;
|
||||||
scan_io_t *sio = NULL;
|
scan_io_t *sio;
|
||||||
|
zio_t *zio;
|
||||||
list_t sio_list;
|
list_t sio_list;
|
||||||
|
|
||||||
ASSERT(queue->q_scn->scn_is_sorted);
|
ASSERT(queue->q_scn->scn_is_sorted);
|
||||||
|
|
||||||
list_create(&sio_list, sizeof (scan_io_t),
|
list_create(&sio_list, sizeof (scan_io_t),
|
||||||
offsetof(scan_io_t, sio_nodes.sio_list_node));
|
offsetof(scan_io_t, sio_nodes.sio_list_node));
|
||||||
|
zio = zio_null(queue->q_scn->scn_zio_root, queue->q_scn->scn_dp->dp_spa,
|
||||||
|
NULL, NULL, NULL, ZIO_FLAG_CANFAIL);
|
||||||
mutex_enter(q_lock);
|
mutex_enter(q_lock);
|
||||||
|
queue->q_zio = zio;
|
||||||
|
|
||||||
/* Calculate maximum in-flight bytes for this vdev. */
|
/* Calculate maximum in-flight bytes for this vdev. */
|
||||||
queue->q_maxinflight_bytes = MAX(1, zfs_scan_vdev_limit *
|
queue->q_maxinflight_bytes = MAX(1, zfs_scan_vdev_limit *
|
||||||
|
@ -3096,7 +3101,9 @@ scan_io_queues_run_one(void *arg)
|
||||||
scan_io_queue_insert_impl(queue, sio);
|
scan_io_queue_insert_impl(queue, sio);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
queue->q_zio = NULL;
|
||||||
mutex_exit(q_lock);
|
mutex_exit(q_lock);
|
||||||
|
zio_nowait(zio);
|
||||||
list_destroy(&sio_list);
|
list_destroy(&sio_list);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4052,6 +4059,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
|
||||||
dsl_scan_t *scn = dp->dp_scan;
|
dsl_scan_t *scn = dp->dp_scan;
|
||||||
size_t size = BP_GET_PSIZE(bp);
|
size_t size = BP_GET_PSIZE(bp);
|
||||||
abd_t *data = abd_alloc_for_io(size, B_FALSE);
|
abd_t *data = abd_alloc_for_io(size, B_FALSE);
|
||||||
|
zio_t *pio;
|
||||||
|
|
||||||
if (queue == NULL) {
|
if (queue == NULL) {
|
||||||
ASSERT3U(scn->scn_maxinflight_bytes, >, 0);
|
ASSERT3U(scn->scn_maxinflight_bytes, >, 0);
|
||||||
|
@ -4060,6 +4068,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
|
||||||
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
|
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
|
||||||
spa->spa_scrub_inflight += BP_GET_PSIZE(bp);
|
spa->spa_scrub_inflight += BP_GET_PSIZE(bp);
|
||||||
mutex_exit(&spa->spa_scrub_lock);
|
mutex_exit(&spa->spa_scrub_lock);
|
||||||
|
pio = scn->scn_zio_root;
|
||||||
} else {
|
} else {
|
||||||
kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;
|
kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;
|
||||||
|
|
||||||
|
@ -4068,12 +4077,14 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
|
||||||
while (queue->q_inflight_bytes >= queue->q_maxinflight_bytes)
|
while (queue->q_inflight_bytes >= queue->q_maxinflight_bytes)
|
||||||
cv_wait(&queue->q_zio_cv, q_lock);
|
cv_wait(&queue->q_zio_cv, q_lock);
|
||||||
queue->q_inflight_bytes += BP_GET_PSIZE(bp);
|
queue->q_inflight_bytes += BP_GET_PSIZE(bp);
|
||||||
|
pio = queue->q_zio;
|
||||||
mutex_exit(q_lock);
|
mutex_exit(q_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ASSERT(pio != NULL);
|
||||||
count_block(scn, dp->dp_blkstats, bp);
|
count_block(scn, dp->dp_blkstats, bp);
|
||||||
zio_nowait(zio_read(scn->scn_zio_root, spa, bp, data, size,
|
zio_nowait(zio_read(pio, spa, bp, data, size, dsl_scan_scrub_done,
|
||||||
dsl_scan_scrub_done, queue, ZIO_PRIORITY_SCRUB, zio_flags, zb));
|
queue, ZIO_PRIORITY_SCRUB, zio_flags, zb));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue