Avoid vq_lock drop in vdev_queue_aggregate()

vq_lock is already too congested for two more operations per I/O.
Instead of dropping and reacquiring it inside vdev_queue_aggregate()
delegate the zio_vdev_io_bypass() and zio_execute() calls for parent
I/Os to callers, that drop the lock any way to execute the new I/O.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mark.maybee@delphix.com>
Reviewed-by: Brian Atkinson <batkinson@lanl.gov>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored-By: iXsystems, Inc.
Closes #12297
This commit is contained in:
Alexander Motin 2021-08-17 11:47:00 -04:00 committed by GitHub
parent e829a865bf
commit 7f9d9e6f39
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 34 additions and 29 deletions

View File

@ -599,7 +599,6 @@ static zio_t *
vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
{ {
zio_t *first, *last, *aio, *dio, *mandatory, *nio; zio_t *first, *last, *aio, *dio, *mandatory, *nio;
zio_link_t *zl = NULL;
uint64_t maxgap = 0; uint64_t maxgap = 0;
uint64_t size; uint64_t size;
uint64_t limit; uint64_t limit;
@ -797,19 +796,12 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
ASSERT3U(abd_get_size(aio->io_abd), ==, aio->io_size); ASSERT3U(abd_get_size(aio->io_abd), ==, aio->io_size);
/* /*
* We need to drop the vdev queue's lock during zio_execute() to * Callers must call zio_vdev_io_bypass() and zio_execute() for
* avoid a deadlock that we could encounter due to lock order * aggregated (parent) I/Os so that we could avoid dropping the
* reversal between vq_lock and io_lock in zio_change_priority(). * queue's lock here to avoid a deadlock that we could encounter
* due to lock order reversal between vq_lock and io_lock in
* zio_change_priority().
*/ */
mutex_exit(&vq->vq_lock);
while ((dio = zio_walk_parents(aio, &zl)) != NULL) {
ASSERT3U(dio->io_type, ==, aio->io_type);
zio_vdev_io_bypass(dio);
zio_execute(dio);
}
mutex_enter(&vq->vq_lock);
return (aio); return (aio);
} }
@ -847,23 +839,24 @@ again:
ASSERT3U(zio->io_priority, ==, p); ASSERT3U(zio->io_priority, ==, p);
aio = vdev_queue_aggregate(vq, zio); aio = vdev_queue_aggregate(vq, zio);
if (aio != NULL) if (aio != NULL) {
zio = aio; zio = aio;
else } else {
vdev_queue_io_remove(vq, zio); vdev_queue_io_remove(vq, zio);
/* /*
* If the I/O is or was optional and therefore has no data, we need to * If the I/O is or was optional and therefore has no data, we
* simply discard it. We need to drop the vdev queue's lock to avoid a * need to simply discard it. We need to drop the vdev queue's
* deadlock that we could encounter since this I/O will complete * lock to avoid a deadlock that we could encounter since this
* immediately. * I/O will complete immediately.
*/ */
if (zio->io_flags & ZIO_FLAG_NODATA) { if (zio->io_flags & ZIO_FLAG_NODATA) {
mutex_exit(&vq->vq_lock); mutex_exit(&vq->vq_lock);
zio_vdev_io_bypass(zio); zio_vdev_io_bypass(zio);
zio_execute(zio); zio_execute(zio);
mutex_enter(&vq->vq_lock); mutex_enter(&vq->vq_lock);
goto again; goto again;
}
} }
vdev_queue_pending_add(vq, zio); vdev_queue_pending_add(vq, zio);
@ -876,7 +869,8 @@ zio_t *
vdev_queue_io(zio_t *zio) vdev_queue_io(zio_t *zio)
{ {
vdev_queue_t *vq = &zio->io_vd->vdev_queue; vdev_queue_t *vq = &zio->io_vd->vdev_queue;
zio_t *nio; zio_t *dio, *nio;
zio_link_t *zl = NULL;
if (zio->io_flags & ZIO_FLAG_DONT_QUEUE) if (zio->io_flags & ZIO_FLAG_DONT_QUEUE)
return (zio); return (zio);
@ -923,6 +917,11 @@ vdev_queue_io(zio_t *zio)
return (NULL); return (NULL);
if (nio->io_done == vdev_queue_agg_io_done) { if (nio->io_done == vdev_queue_agg_io_done) {
while ((dio = zio_walk_parents(nio, &zl)) != NULL) {
ASSERT3U(dio->io_type, ==, nio->io_type);
zio_vdev_io_bypass(dio);
zio_execute(dio);
}
zio_nowait(nio); zio_nowait(nio);
return (NULL); return (NULL);
} }
@ -934,7 +933,8 @@ void
vdev_queue_io_done(zio_t *zio) vdev_queue_io_done(zio_t *zio)
{ {
vdev_queue_t *vq = &zio->io_vd->vdev_queue; vdev_queue_t *vq = &zio->io_vd->vdev_queue;
zio_t *nio; zio_t *dio, *nio;
zio_link_t *zl = NULL;
hrtime_t now = gethrtime(); hrtime_t now = gethrtime();
vq->vq_io_complete_ts = now; vq->vq_io_complete_ts = now;
@ -946,6 +946,11 @@ vdev_queue_io_done(zio_t *zio)
while ((nio = vdev_queue_io_to_issue(vq)) != NULL) { while ((nio = vdev_queue_io_to_issue(vq)) != NULL) {
mutex_exit(&vq->vq_lock); mutex_exit(&vq->vq_lock);
if (nio->io_done == vdev_queue_agg_io_done) { if (nio->io_done == vdev_queue_agg_io_done) {
while ((dio = zio_walk_parents(nio, &zl)) != NULL) {
ASSERT3U(dio->io_type, ==, nio->io_type);
zio_vdev_io_bypass(dio);
zio_execute(dio);
}
zio_nowait(nio); zio_nowait(nio);
} else { } else {
zio_vdev_io_reissue(nio); zio_vdev_io_reissue(nio);