Fix synchronous behavior in __vdev_disk_physio()
Commit b39c22b
set the READ_SYNC and WRITE_SYNC flags for a bio
based on the ZIO_PRIORITY_* flag passed in. This had the unnoticed
side-effect of making the vdev_disk_io_start() synchronous for
certain I/Os.
This in turn resulted in vdev_disk_io_start() being able to
re-dispatch zio's which would result in a RCU stalls when a disk
was removed from the system. Additionally, this could negatively
impact performance and explains the performance regressions reported
in both #3829 and #3780.
This patch resolves the issue by making the blocking behavior
dependent on a 'wait' flag being passed rather than overloading
the passed bio flags.
Finally, the WRITE_SYNC and READ_SYNC behavior is restricted to
non-rotational devices where there is no benefit to queuing to
aggregate the I/O.
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #3652
Issue #3780
Issue #3785
Issue #3817
Issue #3821
Issue #3829
Issue #3832
Issue #3870
This commit is contained in:
parent
ef5b2e1048
commit
5592404784
|
@ -1,50 +0,0 @@
|
||||||
dnl #
|
|
||||||
dnl # Preferred interface for flagging a synchronous bio:
|
|
||||||
dnl # 2.6.12-2.6.29: BIO_RW_SYNC
|
|
||||||
dnl # 2.6.30-2.6.35: BIO_RW_SYNCIO
|
|
||||||
dnl # 2.6.36-2.6.xx: REQ_SYNC
|
|
||||||
dnl #
|
|
||||||
AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_SYNC], [
|
|
||||||
AC_MSG_CHECKING([whether BIO_RW_SYNC is defined])
|
|
||||||
ZFS_LINUX_TRY_COMPILE([
|
|
||||||
#include <linux/bio.h>
|
|
||||||
],[
|
|
||||||
int flags __attribute__ ((unused));
|
|
||||||
flags = BIO_RW_SYNC;
|
|
||||||
],[
|
|
||||||
AC_MSG_RESULT(yes)
|
|
||||||
AC_DEFINE(HAVE_BIO_RW_SYNC, 1, [BIO_RW_SYNC is defined])
|
|
||||||
],[
|
|
||||||
AC_MSG_RESULT(no)
|
|
||||||
])
|
|
||||||
])
|
|
||||||
|
|
||||||
AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_SYNCIO], [
|
|
||||||
AC_MSG_CHECKING([whether BIO_RW_SYNCIO is defined])
|
|
||||||
ZFS_LINUX_TRY_COMPILE([
|
|
||||||
#include <linux/bio.h>
|
|
||||||
],[
|
|
||||||
int flags __attribute__ ((unused));
|
|
||||||
flags = BIO_RW_SYNCIO;
|
|
||||||
],[
|
|
||||||
AC_MSG_RESULT(yes)
|
|
||||||
AC_DEFINE(HAVE_BIO_RW_SYNCIO, 1, [BIO_RW_SYNCIO is defined])
|
|
||||||
],[
|
|
||||||
AC_MSG_RESULT(no)
|
|
||||||
])
|
|
||||||
])
|
|
||||||
|
|
||||||
AC_DEFUN([ZFS_AC_KERNEL_REQ_SYNC], [
|
|
||||||
AC_MSG_CHECKING([whether REQ_SYNC is defined])
|
|
||||||
ZFS_LINUX_TRY_COMPILE([
|
|
||||||
#include <linux/bio.h>
|
|
||||||
],[
|
|
||||||
int flags __attribute__ ((unused));
|
|
||||||
flags = REQ_SYNC;
|
|
||||||
],[
|
|
||||||
AC_MSG_RESULT(yes)
|
|
||||||
AC_DEFINE(HAVE_REQ_SYNC, 1, [REQ_SYNC is defined])
|
|
||||||
],[
|
|
||||||
AC_MSG_RESULT(no)
|
|
||||||
])
|
|
||||||
])
|
|
|
@ -25,9 +25,6 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
|
||||||
ZFS_AC_KERNEL_BIO_END_IO_T_ARGS
|
ZFS_AC_KERNEL_BIO_END_IO_T_ARGS
|
||||||
ZFS_AC_KERNEL_BIO_RW_BARRIER
|
ZFS_AC_KERNEL_BIO_RW_BARRIER
|
||||||
ZFS_AC_KERNEL_BIO_RW_DISCARD
|
ZFS_AC_KERNEL_BIO_RW_DISCARD
|
||||||
ZFS_AC_KERNEL_BIO_RW_SYNC
|
|
||||||
ZFS_AC_KERNEL_BIO_RW_SYNCIO
|
|
||||||
ZFS_AC_KERNEL_REQ_SYNC
|
|
||||||
ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
|
ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
|
||||||
ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
|
ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
|
||||||
ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
|
ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
|
||||||
|
|
|
@ -369,27 +369,6 @@ vdev_disk_dio_free(dio_request_t *dr)
|
||||||
sizeof (struct bio *) * dr->dr_bio_count);
|
sizeof (struct bio *) * dr->dr_bio_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
vdev_disk_dio_is_sync(dio_request_t *dr)
|
|
||||||
{
|
|
||||||
#ifdef HAVE_BIO_RW_SYNC
|
|
||||||
/* BIO_RW_SYNC preferred interface from 2.6.12-2.6.29 */
|
|
||||||
return (dr->dr_rw & (1 << BIO_RW_SYNC));
|
|
||||||
#else
|
|
||||||
#ifdef HAVE_BIO_RW_SYNCIO
|
|
||||||
/* BIO_RW_SYNCIO preferred interface from 2.6.30-2.6.35 */
|
|
||||||
return (dr->dr_rw & (1 << BIO_RW_SYNCIO));
|
|
||||||
#else
|
|
||||||
#ifdef HAVE_REQ_SYNC
|
|
||||||
/* REQ_SYNC preferred interface from 2.6.36-2.6.xx */
|
|
||||||
return (dr->dr_rw & REQ_SYNC);
|
|
||||||
#else
|
|
||||||
#error "Unable to determine bio sync flag"
|
|
||||||
#endif /* HAVE_REQ_SYNC */
|
|
||||||
#endif /* HAVE_BIO_RW_SYNC */
|
|
||||||
#endif /* HAVE_BIO_RW_SYNCIO */
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
vdev_disk_dio_get(dio_request_t *dr)
|
vdev_disk_dio_get(dio_request_t *dr)
|
||||||
{
|
{
|
||||||
|
@ -444,7 +423,7 @@ BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error)
|
||||||
rc = vdev_disk_dio_put(dr);
|
rc = vdev_disk_dio_put(dr);
|
||||||
|
|
||||||
/* Wake up synchronous waiter this is the last outstanding bio */
|
/* Wake up synchronous waiter this is the last outstanding bio */
|
||||||
if ((rc == 1) && vdev_disk_dio_is_sync(dr))
|
if (rc == 1)
|
||||||
complete(&dr->dr_comp);
|
complete(&dr->dr_comp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -512,7 +491,7 @@ vdev_submit_bio(int rw, struct bio *bio)
|
||||||
|
|
||||||
static int
|
static int
|
||||||
__vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
|
__vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
|
||||||
size_t kbuf_size, uint64_t kbuf_offset, int flags)
|
size_t kbuf_size, uint64_t kbuf_offset, int flags, int wait)
|
||||||
{
|
{
|
||||||
dio_request_t *dr;
|
dio_request_t *dr;
|
||||||
caddr_t bio_ptr;
|
caddr_t bio_ptr;
|
||||||
|
@ -603,7 +582,7 @@ retry:
|
||||||
* only synchronous consumer is vdev_disk_read_rootlabel() all other
|
* only synchronous consumer is vdev_disk_read_rootlabel() all other
|
||||||
* IO originating from vdev_disk_io_start() is asynchronous.
|
* IO originating from vdev_disk_io_start() is asynchronous.
|
||||||
*/
|
*/
|
||||||
if (vdev_disk_dio_is_sync(dr)) {
|
if (wait) {
|
||||||
wait_for_completion(&dr->dr_comp);
|
wait_for_completion(&dr->dr_comp);
|
||||||
error = dr->dr_error;
|
error = dr->dr_error;
|
||||||
ASSERT3S(atomic_read(&dr->dr_ref), ==, 1);
|
ASSERT3S(atomic_read(&dr->dr_ref), ==, 1);
|
||||||
|
@ -619,7 +598,7 @@ vdev_disk_physio(struct block_device *bdev, caddr_t kbuf,
|
||||||
size_t size, uint64_t offset, int flags)
|
size_t size, uint64_t offset, int flags)
|
||||||
{
|
{
|
||||||
bio_set_flags_failfast(bdev, &flags);
|
bio_set_flags_failfast(bdev, &flags);
|
||||||
return (__vdev_disk_physio(bdev, NULL, kbuf, size, offset, flags));
|
return (__vdev_disk_physio(bdev, NULL, kbuf, size, offset, flags, 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, rc)
|
BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, rc)
|
||||||
|
@ -671,6 +650,7 @@ vdev_disk_io_start(zio_t *zio)
|
||||||
{
|
{
|
||||||
vdev_t *v = zio->io_vd;
|
vdev_t *v = zio->io_vd;
|
||||||
vdev_disk_t *vd = v->vdev_tsd;
|
vdev_disk_t *vd = v->vdev_tsd;
|
||||||
|
zio_priority_t pri = zio->io_priority;
|
||||||
int flags, error;
|
int flags, error;
|
||||||
|
|
||||||
switch (zio->io_type) {
|
switch (zio->io_type) {
|
||||||
|
@ -710,14 +690,14 @@ vdev_disk_io_start(zio_t *zio)
|
||||||
zio_execute(zio);
|
zio_execute(zio);
|
||||||
return;
|
return;
|
||||||
case ZIO_TYPE_WRITE:
|
case ZIO_TYPE_WRITE:
|
||||||
if (zio->io_priority == ZIO_PRIORITY_SYNC_WRITE)
|
if ((pri == ZIO_PRIORITY_SYNC_WRITE) && (v->vdev_nonrot))
|
||||||
flags = WRITE_SYNC;
|
flags = WRITE_SYNC;
|
||||||
else
|
else
|
||||||
flags = WRITE;
|
flags = WRITE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ZIO_TYPE_READ:
|
case ZIO_TYPE_READ:
|
||||||
if (zio->io_priority == ZIO_PRIORITY_SYNC_READ)
|
if ((pri == ZIO_PRIORITY_SYNC_READ) && (v->vdev_nonrot))
|
||||||
flags = READ_SYNC;
|
flags = READ_SYNC;
|
||||||
else
|
else
|
||||||
flags = READ;
|
flags = READ;
|
||||||
|
@ -730,7 +710,7 @@ vdev_disk_io_start(zio_t *zio)
|
||||||
}
|
}
|
||||||
|
|
||||||
error = __vdev_disk_physio(vd->vd_bdev, zio, zio->io_data,
|
error = __vdev_disk_physio(vd->vd_bdev, zio, zio->io_data,
|
||||||
zio->io_size, zio->io_offset, flags);
|
zio->io_size, zio->io_offset, flags, 0);
|
||||||
if (error) {
|
if (error) {
|
||||||
zio->io_error = error;
|
zio->io_error = error;
|
||||||
zio_interrupt(zio);
|
zio_interrupt(zio);
|
||||||
|
|
Loading…
Reference in New Issue