From bf8b4a9fd5494a9e7bd54856687fd05f81c397ba Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 9 Aug 2016 11:22:30 -0700 Subject: [PATCH] Linux 4.8 compat: Fix removal of bio->bi_rw member All users of bio->bi_rw have been replaced with compatibility wrappers. This allows the kernel specific logic to be abstracted away, and for each of the supported cases to be documented with the wrapper. The updated interfaces are as follows: * void blk_queue_set_write_cache(struct request_queue *, bool, bool) * boolean_t bio_is_flush(struct bio *) * boolean_t bio_is_fua(struct bio *) * boolean_t bio_is_discard(struct bio *) * boolean_t bio_is_secure_erase(struct bio *) * VDEV_WRITE_FLUSH_FUA Signed-off-by: Brian Behlendorf Signed-off-by: Chunwei Chen Closes #4951 --- config/kernel-bio-op.m4 | 67 +++++++++++++ config/kernel.m4 | 4 + include/linux/blkdev_compat.h | 175 ++++++++++++++++++++++------------ module/zfs/zvol.c | 31 ++---- 4 files changed, 195 insertions(+), 82 deletions(-) create mode 100644 config/kernel-bio-op.m4 diff --git a/config/kernel-bio-op.m4 b/config/kernel-bio-op.m4 new file mode 100644 index 0000000000..b4b699517a --- /dev/null +++ b/config/kernel-bio-op.m4 @@ -0,0 +1,67 @@ +dnl # +dnl # Linux 4.8 API, +dnl # +dnl # The bio_op() helper was introduced as a replacement for explicitly +dnl # checking the bio->bi_rw flags. The following checks are used to +dnl # detect if a specific operation is supported. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_DISCARD], [ + AC_MSG_CHECKING([whether REQ_OP_DISCARD is defined]) + ZFS_LINUX_TRY_COMPILE([ + #include + ],[ + enum req_op op __attribute__ ((unused)) = REQ_OP_DISCARD; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_REQ_OP_DISCARD, 1, + [REQ_OP_DISCARD is defined]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_SECURE_ERASE], [ + AC_MSG_CHECKING([whether REQ_OP_SECURE_ERASE is defined]) + ZFS_LINUX_TRY_COMPILE([ + #include + ],[ + enum req_op op __attribute__ ((unused)) = REQ_OP_SECURE_ERASE; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_REQ_OP_SECURE_DISCARD, 1, + [REQ_OP_SECURE_ERASE is defined]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + + +AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_FLUSH], [ + AC_MSG_CHECKING([whether REQ_OP_FLUSH is defined]) + ZFS_LINUX_TRY_COMPILE([ + #include + ],[ + enum req_op op __attribute__ ((unused)) = REQ_OP_FLUSH; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_REQ_OP_FLUSH, 1, + [REQ_OP_FLUSH is defined]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BIO_BI_OPF], [ + AC_MSG_CHECKING([whether bio->bi_opf is defined]) + ZFS_LINUX_TRY_COMPILE([ + #include + ],[ + struct bio bio __attribute__ ((unused)); + bio.bi_opf = 0; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BIO_BI_OPF, 1, [bio->bi_opf is defined]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 71d6df0c20..53720eeef5 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -23,6 +23,10 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_BIO_BVEC_ITER ZFS_AC_KERNEL_BIO_FAILFAST_DTD ZFS_AC_KERNEL_REQ_FAILFAST_MASK + ZFS_AC_KERNEL_REQ_OP_DISCARD + ZFS_AC_KERNEL_REQ_OP_SECURE_ERASE + ZFS_AC_KERNEL_REQ_OP_FLUSH + ZFS_AC_KERNEL_BIO_BI_OPF ZFS_AC_KERNEL_BIO_END_IO_T_ARGS ZFS_AC_KERNEL_BIO_RW_BARRIER ZFS_AC_KERNEL_BIO_RW_DISCARD diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h index ec301ee9b5..d26d242954 100644 --- a/include/linux/blkdev_compat.h +++ b/include/linux/blkdev_compat.h @@ -37,35 +37,24 @@ typedef unsigned __bitwise__ fmode_t; #endif /* HAVE_FMODE_T */ /* - * 2.6.36 API change, + * 4.7 - 4.x API, + * The blk_queue_write_cache() interface has replaced blk_queue_flush() + * interface. However, the new interface is GPL-only thus we implement + * our own trivial wrapper when the GPL-only version is detected. + * + * 2.6.36 - 4.6 API, * The blk_queue_flush() interface has replaced blk_queue_ordered() * interface. However, while the old interface was available to all the * new one is GPL-only. Thus if the GPL-only version is detected we - * implement our own trivial helper compatibility funcion. The hope is - * that long term this function will be opened up. + * implement our own trivial helper. + * + * 2.6.x - 2.6.35 + * Legacy blk_queue_ordered() interface. */ -#if defined(HAVE_BLK_QUEUE_FLUSH) && defined(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY) -#define blk_queue_flush __blk_queue_flush static inline void -__blk_queue_flush(struct request_queue *q, unsigned int flags) -{ - q->flush_flags = flags & (REQ_FLUSH | REQ_FUA); -} -#endif /* HAVE_BLK_QUEUE_FLUSH && HAVE_BLK_QUEUE_FLUSH_GPL_ONLY */ - -/* - * 4.7 API change, - * The blk_queue_write_cache() interface has replaced blk_queue_flush() - * interface. However, while the new interface is GPL-only. Thus if the - * GPL-only version is detected we implement our own trivial helper - * compatibility funcion. - */ -#if defined(HAVE_BLK_QUEUE_WRITE_CACHE) && \ - defined(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY) -#define blk_queue_write_cache __blk_queue_write_cache -static inline void -__blk_queue_write_cache(struct request_queue *q, bool wc, bool fua) +blk_queue_set_write_cache(struct request_queue *q, bool wc, bool fua) { +#if defined(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY) spin_lock_irq(q->queue_lock); if (wc) queue_flag_set(QUEUE_FLAG_WC, q); @@ -76,8 +65,19 @@ __blk_queue_write_cache(struct request_queue *q, bool wc, bool fua) else queue_flag_clear(QUEUE_FLAG_FUA, q); spin_unlock_irq(q->queue_lock); -} +#elif defined(HAVE_BLK_QUEUE_WRITE_CACHE) + blk_queue_write_cache(q, wc, fua); +#elif defined(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY) + if (wc) + q->flush_flags |= REQ_FLUSH; + if (fua) + q->flush_flags |= REQ_FUA; +#elif defined(HAVE_BLK_QUEUE_FLUSH) + blk_queue_flush(q, (wc ? REQ_FLUSH : 0) | (fua ? REQ_FUA : 0)); +#else + blk_queue_ordered(q, QUEUE_ORDERED_DRAIN, NULL); #endif +} /* * Most of the blk_* macros were removed in 2.6.36. Ostensibly this was @@ -299,68 +299,121 @@ bio_set_flags_failfast(struct block_device *bdev, int *flags) * allow richer semantics to be expressed to the block layer. It is * the block layers responsibility to choose the correct way to * implement these semantics. - * - * The existence of these flags implies that REQ_FLUSH an REQ_FUA are - * defined. Thus we can safely define VDEV_REQ_FLUSH and VDEV_REQ_FUA - * compatibility macros. - * - * Linux 4.8 renamed the REQ_FLUSH to REQ_PREFLUSH but there was no - * functional change in behavior. */ #ifdef WRITE_FLUSH_FUA - #define VDEV_WRITE_FLUSH_FUA WRITE_FLUSH_FUA -#ifdef REQ_PREFLUSH -#define VDEV_REQ_FLUSH REQ_PREFLUSH -#define VDEV_REQ_FUA REQ_FUA #else -#define VDEV_REQ_FLUSH REQ_FLUSH -#define VDEV_REQ_FUA REQ_FUA -#endif - -#else - #define VDEV_WRITE_FLUSH_FUA WRITE_BARRIER -#ifdef HAVE_BIO_RW_BARRIER -#define VDEV_REQ_FLUSH (1 << BIO_RW_BARRIER) -#define VDEV_REQ_FUA (1 << BIO_RW_BARRIER) -#else -#define VDEV_REQ_FLUSH REQ_HARDBARRIER -#define VDEV_REQ_FUA REQ_FUA -#endif - #endif /* - * 2.6.28 - 2.6.35 API, - * BIO_RW_DISCARD + * 4.8 - 4.x API, + * REQ_OP_FLUSH + * + * 4.8-rc0 - 4.8-rc1, + * REQ_PREFLUSH + * + * 2.6.36 - 4.7 API, + * REQ_FLUSH + * + * 2.6.x - 2.6.35 API, + * HAVE_BIO_RW_BARRIER + * + * Used to determine if a cache flush has been requested. This check has + * been left intentionally broad in order to cover both a legacy flush + * and the new preflush behavior introduced in Linux 4.8. This is correct + * in all cases but may have a performance impact for some kernels. It + * has the advantage of minimizing kernel specific changes in the zvol code. + */ +static inline boolean_t +bio_is_flush(struct bio *bio) +{ +#if defined(HAVE_REQ_OP_FLUSH) && defined(HAVE_BIO_BI_OPF) + return ((bio_op(bio) == REQ_OP_FLUSH) || (bio->bi_opf & REQ_PREFLUSH)); +#elif defined(REQ_PREFLUSH) && defined(HAVE_BIO_BI_OPF) + return (bio->bi_opf & REQ_PREFLUSH); +#elif defined(REQ_PREFLUSH) && !defined(HAVE_BIO_BI_OPF) + return (bio->bi_rw & REQ_PREFLUSH); +#elif defined(REQ_FLUSH) + return (bio->bi_rw & REQ_FLUSH); +#elif defined(HAVE_BIO_RW_BARRIER) + return (bio->bi_rw & (1 << BIO_RW_BARRIER)); +#else +#error "Allowing the build will cause flush requests to be ignored. Please " + "file an issue report at: https://github.com/zfsonlinux/zfs/issues/new" +#endif +} + +/* + * 4.8 - 4.x API, + * REQ_FUA flag moved to bio->bi_opf + * + * 2.6.x - 4.7 API, + * REQ_FUA + */ +static inline boolean_t +bio_is_fua(struct bio *bio) +{ +#if defined(HAVE_BIO_BI_OPF) + return (bio->bi_opf & REQ_FUA); +#elif defined(REQ_FUA) + return (bio->bi_rw & REQ_FUA); +#else +#error "Allowing the build will cause fua requests to be ignored. Please " + "file an issue report at: https://github.com/zfsonlinux/zfs/issues/new" +#endif +} + +/* + * 4.8 - 4.x API, + * REQ_OP_DISCARD * * 2.6.36 - 4.7 API, * REQ_DISCARD * - * 4.8 - 4.x API, - * REQ_OP_DISCARD + * 2.6.28 - 2.6.35 API, + * BIO_RW_DISCARD * * In all cases the normal I/O path is used for discards. The only * difference is how the kernel tags individual I/Os as discards. */ -#ifdef QUEUE_FLAG_DISCARD static inline boolean_t bio_is_discard(struct bio *bio) { -#if defined(HAVE_BIO_RW_DISCARD) - return (bio->bi_rw & (1 << BIO_RW_DISCARD)); +#if defined(HAVE_REQ_OP_DISCARD) + return (bio_op(bio) == REQ_OP_DISCARD); #elif defined(REQ_DISCARD) return (bio->bi_rw & REQ_DISCARD); -#else - return (bio_op(bio) == REQ_OP_DISCARD); -#endif -} +#elif defined(HAVE_BIO_RW_DISCARD) + return (bio->bi_rw & (1 << BIO_RW_DISCARD)); #else #error "Allowing the build will cause discard requests to become writes " - "potentially triggering the DMU_MAX_ACCESS assertion. Please file a " + "potentially triggering the DMU_MAX_ACCESS assertion. Please file " "an issue report at: https://github.com/zfsonlinux/zfs/issues/new" #endif +} + +/* + * 4.8 - 4.x API, + * REQ_OP_SECURE_ERASE + * + * 2.6.36 - 4.7 API, + * REQ_SECURE + * + * 2.6.x - 2.6.35 API, + * Unsupported by kernel + */ +static inline boolean_t +bio_is_secure_erase(struct bio *bio) +{ +#if defined(HAVE_REQ_OP_SECURE_ERASE) + return (bio_op(bio) == REQ_OP_SECURE_ERASE); +#elif defined(REQ_SECURE) + return (bio->bi_rw & REQ_SECURE); +#else + return (0); +#endif +} /* * 2.6.33 API change diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 3c2a196d62..04f68b5f8b 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -623,7 +623,7 @@ zvol_write(struct bio *bio) ASSERT(zv && zv->zv_open_count > 0); - if (bio->bi_rw & VDEV_REQ_FLUSH) + if (bio_is_flush(bio)) zil_commit(zv->zv_zilog, ZVOL_OBJ); /* @@ -648,12 +648,12 @@ zvol_write(struct bio *bio) error = dmu_write_bio(zv->zv_objset, ZVOL_OBJ, bio, tx); if (error == 0) zvol_log_write(zv, tx, offset, size, - !!(bio->bi_rw & VDEV_REQ_FUA)); + !!(bio_is_fua(bio))); dmu_tx_commit(tx); zfs_range_unlock(rl); - if ((bio->bi_rw & VDEV_REQ_FUA) || + if ((bio_is_fua(bio)) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zv->zv_zilog, ZVOL_OBJ); @@ -677,20 +677,16 @@ zvol_discard(struct bio *bio) return (SET_ERROR(EIO)); /* - * Align the request to volume block boundaries when REQ_SECURE is - * available, but not requested. If we don't, then this will force - * dnode_free_range() to zero out the unaligned parts, which is slow - * (read-modify-write) and useless since we are not freeing any space - * by doing so. Kernels that do not support REQ_SECURE (2.6.32 through - * 2.6.35) will not receive this optimization. + * Align the request to volume block boundaries when a secure erase is + * not required. This will prevent dnode_free_range() from zeroing out + * the unaligned parts which is slow (read-modify-write) and useless + * since we are not freeing any space by doing so. */ -#ifdef REQ_SECURE - if (!(bio->bi_rw & REQ_SECURE)) { + if (!bio_is_secure_erase(bio)) { start = P2ROUNDUP(start, zv->zv_volblocksize); end = P2ALIGN(end, zv->zv_volblocksize); size = end - start; } -#endif if (start >= end) return (0); @@ -766,7 +762,7 @@ zvol_request(struct request_queue *q, struct bio *bio) goto out2; } - if (bio_is_discard(bio)) { + if (bio_is_discard(bio) || bio_is_secure_erase(bio)) { error = zvol_discard(bio); goto out2; } @@ -1234,14 +1230,7 @@ zvol_alloc(dev_t dev, const char *name) goto out_kmem; blk_queue_make_request(zv->zv_queue, zvol_request); - -#ifdef HAVE_BLK_QUEUE_WRITE_CACHE - blk_queue_write_cache(zv->zv_queue, B_TRUE, B_TRUE); -#elif defined(HAVE_BLK_QUEUE_FLUSH) - blk_queue_flush(zv->zv_queue, VDEV_REQ_FLUSH | VDEV_REQ_FUA); -#else - blk_queue_ordered(zv->zv_queue, QUEUE_ORDERED_DRAIN, NULL); -#endif /* HAVE_BLK_QUEUE_FLUSH */ + blk_queue_set_write_cache(zv->zv_queue, B_TRUE, B_TRUE); zv->zv_disk = alloc_disk(ZVOL_MINORS); if (zv->zv_disk == NULL)