From 08fd5ccc38c3b4575da91fc8b6ac350f444b5735 Mon Sep 17 00:00:00 2001 From: Umer Saleem Date: Sat, 3 Feb 2024 00:51:51 +0500 Subject: [PATCH] Improve performance for zpool trim on linux On Linux, ZFS uses blkdev_issue_discard in vdev_disk_io_trim to issue trim command which is synchronous. This commit updates vdev_disk_io_trim to use __blkdev_issue_discard, which is asynchronous. Unfortunately there isn't any asynchronous version for blkdev_issue_secure_erase, so performance of secure trim will still suffer. Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Umer Saleem Closes #15843 --- config/kernel-blkdev.m4 | 34 ++++++++++++--- module/os/linux/zfs/vdev_disk.c | 74 ++++++++++++++++++++++++++------- 2 files changed, 88 insertions(+), 20 deletions(-) diff --git a/config/kernel-blkdev.m4 b/config/kernel-blkdev.m4 index 8e9e638b12..c5a353ca92 100644 --- a/config/kernel-blkdev.m4 +++ b/config/kernel-blkdev.m4 @@ -524,6 +524,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEVNAME], [ dnl # dnl # 5.19 API: blkdev_issue_secure_erase() +dnl # 4.7 API: __blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE) dnl # 3.10 API: blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE) dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [ @@ -539,6 +540,20 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [ sector, nr_sects, GFP_KERNEL); ]) + ZFS_LINUX_TEST_SRC([blkdev_issue_discard_async_flags], [ + #include + ],[ + struct block_device *bdev = NULL; + sector_t sector = 0; + sector_t nr_sects = 0; + unsigned long flags = 0; + struct bio *biop = NULL; + int error __attribute__ ((unused)); + + error = __blkdev_issue_discard(bdev, + sector, nr_sects, GFP_KERNEL, flags, &biop); + ]) + ZFS_LINUX_TEST_SRC([blkdev_issue_discard_flags], [ #include ],[ @@ -562,13 +577,22 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE], [ ],[ AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether blkdev_issue_discard() is available]) - ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_flags], [ + AC_MSG_CHECKING([whether __blkdev_issue_discard() is available]) + ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_async_flags], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD, 1, - [blkdev_issue_discard() is available]) + AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC, 1, + [__blkdev_issue_discard() is available]) ],[ - ZFS_LINUX_TEST_ERROR([blkdev_issue_discard()]) + AC_MSG_RESULT(no) + + AC_MSG_CHECKING([whether blkdev_issue_discard() is available]) + ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_flags], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD, 1, + [blkdev_issue_discard() is available]) + ],[ + ZFS_LINUX_TEST_ERROR([blkdev_issue_discard()]) + ]) ]) ]) ]) diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index e7f0aa5738..b0bda5fa20 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -862,27 +862,66 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio) return (0); } +#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) || \ + defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC) +BIO_END_IO_PROTO(vdev_disk_discard_end_io, bio, error) +{ + zio_t *zio = bio->bi_private; +#ifdef HAVE_1ARG_BIO_END_IO_T + zio->io_error = BIO_END_IO_ERROR(bio); +#else + zio->io_error = -error; +#endif + bio_put(bio); + if (zio->io_error) + vdev_disk_error(zio); + zio_interrupt(zio); +} + +static int +vdev_issue_discard_trim(zio_t *zio, unsigned long flags) +{ + int ret; + struct bio *bio = NULL; + +#if defined(BLKDEV_DISCARD_SECURE) + ret = - __blkdev_issue_discard( + BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh), + zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, flags, &bio); +#else + (void) flags; + ret = - __blkdev_issue_discard( + BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh), + zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, &bio); +#endif + if (!ret && bio) { + bio->bi_private = zio; + bio->bi_end_io = vdev_disk_discard_end_io; + vdev_submit_bio(bio); + } + return (ret); +} +#endif + static int vdev_disk_io_trim(zio_t *zio) { - vdev_t *v = zio->io_vd; - vdev_disk_t *vd = v->vdev_tsd; - -#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) - if (zio->io_trim_flags & ZIO_TRIM_SECURE) { - return (-blkdev_issue_secure_erase(BDH_BDEV(vd->vd_bdh), - zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS)); - } else { - return (-blkdev_issue_discard(BDH_BDEV(vd->vd_bdh), - zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS)); - } -#elif defined(HAVE_BLKDEV_ISSUE_DISCARD) unsigned long trim_flags = 0; -#if defined(BLKDEV_DISCARD_SECURE) - if (zio->io_trim_flags & ZIO_TRIM_SECURE) + if (zio->io_trim_flags & ZIO_TRIM_SECURE) { +#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) + return (-blkdev_issue_secure_erase( + BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh), + zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS)); +#elif defined(BLKDEV_DISCARD_SECURE) trim_flags |= BLKDEV_DISCARD_SECURE; #endif - return (-blkdev_issue_discard(BDH_BDEV(vd->vd_bdh), + } +#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) || \ + defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC) + return (vdev_issue_discard_trim(zio, trim_flags)); +#elif defined(HAVE_BLKDEV_ISSUE_DISCARD) + return (-blkdev_issue_discard( + BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh), zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, trim_flags)); #else #error "Unsupported kernel" @@ -968,7 +1007,12 @@ vdev_disk_io_start(zio_t *zio) case ZIO_TYPE_TRIM: zio->io_error = vdev_disk_io_trim(zio); rw_exit(&vd->vd_lock); +#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) + if (zio->io_trim_flags & ZIO_TRIM_SECURE) + zio_interrupt(zio); +#elif defined(HAVE_BLKDEV_ISSUE_DISCARD) zio_interrupt(zio); +#endif return; default: