diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 54bdff611f..abd66b8abc 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -655,9 +655,9 @@ int param_set_min_auto_ashift(ZFS_MODULE_PARAM_ARGS); int param_set_max_auto_ashift(ZFS_MODULE_PARAM_ARGS); /* - * VDEV checksum verification precentage for Direct I/O writes + * VDEV checksum verification for Direct I/O writes */ -extern uint_t zfs_vdev_direct_write_verify_pct; +extern uint_t zfs_vdev_direct_write_verify; #ifdef __cplusplus } diff --git a/include/sys/zfs_vnops.h b/include/sys/zfs_vnops.h index 4fd9525138..e60b99bed1 100644 --- a/include/sys/zfs_vnops.h +++ b/include/sys/zfs_vnops.h @@ -46,9 +46,6 @@ extern int mappedread(znode_t *, int, zfs_uio_t *); extern int mappedread_sf(znode_t *, int, zfs_uio_t *); extern void update_pages(znode_t *, int64_t, int, objset_t *); -extern int zfs_check_direct_enabled(znode_t *, int, boolean_t *); -extern int zfs_setup_direct(znode_t *, zfs_uio_t *, zfs_uio_rw_t, int *); - /* * Platform code that asynchronously drops zp's inode / vnode_t. * diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index ab0ab5e716..5f3ad01a94 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -416,12 +416,10 @@ May be increased up to .Sy ASHIFT_MAX Po 16 Pc , but this may negatively impact pool space efficiency. . -.It Sy zfs_vdev_direct_write_verify_pct Ns = Ns Sy Linux 2 | FreeBSD 0 Pq uint +.It Sy zfs_vdev_direct_write_verify Ns = Ns Sy Linux 1 | FreeBSED 0 Pq uint If non-zero, then a Direct I/O write's checksum will be verified every -percentage (pct) of Direct I/O writes that are issued to a top-level VDEV -before it is committed and the block pointer is updated. -In the event the checksum is not valid then the I/O operation will be -redirected through the ARC. +time the write is issued and before it is commited to the block pointer. +In the event the checksum is not valid then the I/O operation will return EIO. This module parameter can be used to detect if the contents of the users buffer have changed in the process of doing a Direct I/O write. @@ -432,7 +430,7 @@ Each verify error causes a zevent. Direct Write I/O checkum verify errors can be seen with .Nm zpool Cm status Fl d . -The default value for this is 2 percent on Linux, but is 0 for +The default value for this is 1 on Linux, but is 0 for .Fx because user pages can be placed under write protection in .Fx diff --git a/man/man8/zpool-events.8 b/man/man8/zpool-events.8 index 77d44bd8ad..234612baea 100644 --- a/man/man8/zpool-events.8 +++ b/man/man8/zpool-events.8 @@ -100,14 +100,14 @@ The number of delay events is ratelimited by the module parameter. .It Sy dio_verify Issued when there was a checksum verify error after a Direct I/O write has been -issued and is redirected through the ARC. +issued. This event can only take place if the module parameter -.Sy zfs_vdev_direct_write_verify_pct +.Sy zfs_vdev_direct_write_verify is not set to zero. See .Xr zfs 4 for more details on the -.Sy zfs_vdev_direct_write_verify_pct +.Sy zfs_vdev_direct_write_verify module paramter. .It Sy config Issued every time a vdev change have been done to the pool. diff --git a/man/man8/zpool-status.8 b/man/man8/zpool-status.8 index 923b99de30..868fc4414d 100644 --- a/man/man8/zpool-status.8 +++ b/man/man8/zpool-status.8 @@ -85,7 +85,7 @@ to set pool GUID as key for pool objects instead of pool names. Display the number of Direct I/O write checksum verify errors that have occured on a top-level VDEV. See -.Sx zfs_vdev_direct_write_verify_pct +.Sx zfs_vdev_direct_write_verify in .Xr zfs 4 for details about the conditions that can cause Direct I/O write checksum diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c index 91986cd433..5dbca10a3e 100644 --- a/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/module/os/freebsd/zfs/zfs_vnops_os.c @@ -4274,29 +4274,6 @@ ioflags(int ioflags) return (flags); } -static int -zfs_freebsd_read_direct(znode_t *zp, zfs_uio_t *uio, zfs_uio_rw_t rw, - int ioflag, cred_t *cr) -{ - int ret; - int flags = ioflag; - - ASSERT3U(rw, ==, UIO_READ); - - /* On error, return to fallback to the buffred path */ - ret = zfs_setup_direct(zp, uio, rw, &flags); - if (ret) - return (ret); - - ASSERT(uio->uio_extflg & UIO_DIRECT); - - ret = zfs_read(zp, uio, flags, cr); - - zfs_uio_free_dio_pages(uio, rw); - - return (ret); -} - #ifndef _SYS_SYSPROTO_H_ struct vop_read_args { struct vnode *a_vp; @@ -4311,85 +4288,37 @@ zfs_freebsd_read(struct vop_read_args *ap) { zfs_uio_t uio; int error = 0; - znode_t *zp = VTOZ(ap->a_vp); - int ioflag = ioflags(ap->a_ioflag); - boolean_t is_direct; - zfs_uio_init(&uio, ap->a_uio); - - error = zfs_check_direct_enabled(zp, ioflag, &is_direct); - - if (error) { - return (error); - } else if (is_direct) { - error = - zfs_freebsd_read_direct(zp, &uio, UIO_READ, ioflag, - ap->a_cred); - /* - * XXX We occasionally get an EFAULT for Direct I/O reads on - * FreeBSD 13. This still needs to be resolved. The EFAULT comes - * from: - * zfs_uio_get__dio_pages_alloc() -> - * zfs_uio_get_dio_pages_impl() -> - * zfs_uio_iov_step() -> - * zfs_uio_get_user_pages(). - * We return EFAULT from zfs_uio_iov_step(). When a Direct I/O - * read fails to map in the user pages (returning EFAULT) the - * Direct I/O request is broken up into two separate IO requests - * and issued separately using Direct I/O. - */ + error = zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag), + ap->a_cred); + /* + * XXX We occasionally get an EFAULT for Direct I/O reads on + * FreeBSD 13. This still needs to be resolved. The EFAULT comes + * from: + * zfs_uio_get__dio_pages_alloc() -> + * zfs_uio_get_dio_pages_impl() -> + * zfs_uio_iov_step() -> + * zfs_uio_get_user_pages(). + * We return EFAULT from zfs_uio_iov_step(). When a Direct I/O + * read fails to map in the user pages (returning EFAULT) the + * Direct I/O request is broken up into two separate IO requests + * and issued separately using Direct I/O. + */ #ifdef ZFS_DEBUG - if (error == EFAULT) { + if (error == EFAULT && uio.uio_extflg & UIO_DIRECT) { #if 0 - printf("%s(%d): Direct I/O read returning EFAULT " - "uio = %p, zfs_uio_offset(uio) = %lu " - "zfs_uio_resid(uio) = %lu\n", - __FUNCTION__, __LINE__, &uio, zfs_uio_offset(&uio), - zfs_uio_resid(&uio)); + printf("%s(%d): Direct I/O read returning EFAULT " + "uio = %p, zfs_uio_offset(uio) = %lu " + "zfs_uio_resid(uio) = %lu\n", + __FUNCTION__, __LINE__, &uio, zfs_uio_offset(&uio), + zfs_uio_resid(&uio)); #endif - } - -#endif - - /* - * On error we will return unless the error is EAGAIN, which - * just tells us to fallback to the buffered path. - */ - if (error != EAGAIN) - return (error); - else - ioflag &= ~O_DIRECT; } - - error = zfs_read(zp, &uio, ioflag, ap->a_cred); - +#endif return (error); } -static int -zfs_freebsd_write_direct(znode_t *zp, zfs_uio_t *uio, zfs_uio_rw_t rw, - int ioflag, cred_t *cr) -{ - int ret; - int flags = ioflag; - - ASSERT3U(rw, ==, UIO_WRITE); - - /* On error, return to fallback to the buffred path */ - ret = zfs_setup_direct(zp, uio, rw, &flags); - if (ret) - return (ret); - - ASSERT(uio->uio_extflg & UIO_DIRECT); - - ret = zfs_write(zp, uio, flags, cr); - - zfs_uio_free_dio_pages(uio, rw); - - return (ret); -} - #ifndef _SYS_SYSPROTO_H_ struct vop_write_args { struct vnode *a_vp; @@ -4403,36 +4332,9 @@ static int zfs_freebsd_write(struct vop_write_args *ap) { zfs_uio_t uio; - int error = 0; - znode_t *zp = VTOZ(ap->a_vp); - int ioflag = ioflags(ap->a_ioflag); - boolean_t is_direct; - zfs_uio_init(&uio, ap->a_uio); - - error = zfs_check_direct_enabled(zp, ioflag, &is_direct); - - if (error) { - return (error); - } else if (is_direct) { - error = - zfs_freebsd_write_direct(zp, &uio, UIO_WRITE, ioflag, - ap->a_cred); - - /* - * On error we will return unless the error is EAGAIN, which - * just tells us to fallback to the buffered path. - */ - if (error != EAGAIN) - return (error); - else - ioflag &= ~O_DIRECT; - - } - - error = zfs_write(zp, &uio, ioflag, ap->a_cred); - - return (error); + return (zfs_write(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag), + ap->a_cred)); } /* diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c index 8781e71847..62f772afef 100644 --- a/module/os/linux/zfs/zpl_file.c +++ b/module/os/linux/zfs/zpl_file.c @@ -309,7 +309,7 @@ zpl_uio_init(zfs_uio_t *uio, struct kiocb *kiocb, struct iov_iter *to, } static ssize_t -zpl_iter_read_buffered(struct kiocb *kiocb, struct iov_iter *to) +zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to) { cred_t *cr = CRED(); fstrans_cookie_t cookie; @@ -322,15 +322,14 @@ zpl_iter_read_buffered(struct kiocb *kiocb, struct iov_iter *to) crhold(cr); cookie = spl_fstrans_mark(); - int flags = (filp->f_flags | zfs_io_flags(kiocb)) & ~O_DIRECT; - int error = -zfs_read(ITOZ(filp->f_mapping->host), &uio, - flags, cr); + ssize_t ret = -zfs_read(ITOZ(filp->f_mapping->host), &uio, + filp->f_flags | zfs_io_flags(kiocb), cr); spl_fstrans_unmark(cookie); crfree(cr); - if (error < 0) - return (error); + if (ret < 0) + return (ret); ssize_t read = count - uio.uio_resid; kiocb->ki_pos += read; @@ -340,71 +339,6 @@ zpl_iter_read_buffered(struct kiocb *kiocb, struct iov_iter *to) return (read); } -static ssize_t -zpl_iter_read_direct(struct kiocb *kiocb, struct iov_iter *to) -{ - cred_t *cr = CRED(); - struct file *filp = kiocb->ki_filp; - struct inode *ip = filp->f_mapping->host; - ssize_t count = iov_iter_count(to); - int flags = filp->f_flags | zfs_io_flags(kiocb); - zfs_uio_t uio; - ssize_t ret; - - zpl_uio_init(&uio, kiocb, to, kiocb->ki_pos, count, 0); - - /* On error, return to fallback to the buffered path. */ - ret = zfs_setup_direct(ITOZ(ip), &uio, UIO_READ, &flags); - if (ret) - return (-ret); - - ASSERT(uio.uio_extflg & UIO_DIRECT); - - crhold(cr); - fstrans_cookie_t cookie = spl_fstrans_mark(); - - int error = -zfs_read(ITOZ(ip), &uio, flags, cr); - - spl_fstrans_unmark(cookie); - crfree(cr); - - zfs_uio_free_dio_pages(&uio, UIO_READ); - - if (error < 0) - return (error); - - ssize_t read = count - uio.uio_resid; - kiocb->ki_pos += read; - - zpl_file_accessed(filp); - - return (read); -} - -static ssize_t -zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to) -{ - struct inode *ip = kiocb->ki_filp->f_mapping->host; - struct file *filp = kiocb->ki_filp; - int flags = filp->f_flags | zfs_io_flags(kiocb); - boolean_t is_direct; - - int error = zfs_check_direct_enabled(ITOZ(ip), flags, &is_direct); - - if (error) { - return (-error); - } else if (is_direct) { - ssize_t read = zpl_iter_read_direct(kiocb, to); - - if (read >= 0 || read != -EAGAIN) - return (read); - - /* Otherwise fallback to buffered read */ - } - - return (zpl_iter_read_buffered(kiocb, to)); -} - static inline ssize_t zpl_generic_write_checks(struct kiocb *kiocb, struct iov_iter *from, size_t *countp) @@ -430,249 +364,57 @@ zpl_generic_write_checks(struct kiocb *kiocb, struct iov_iter *from, return (0); } -static ssize_t -zpl_iter_write_buffered(struct kiocb *kiocb, struct iov_iter *from) -{ - cred_t *cr = CRED(); - struct file *filp = kiocb->ki_filp; - struct inode *ip = filp->f_mapping->host; - size_t wrote; - size_t count = iov_iter_count(from); - - zfs_uio_t uio; - zpl_uio_init(&uio, kiocb, from, kiocb->ki_pos, count, from->iov_offset); - - crhold(cr); - fstrans_cookie_t cookie = spl_fstrans_mark(); - - int flags = (filp->f_flags | zfs_io_flags(kiocb)) & ~O_DIRECT; - int error = -zfs_write(ITOZ(ip), &uio, flags, cr); - - spl_fstrans_unmark(cookie); - crfree(cr); - - if (error < 0) - return (error); - - wrote = count - uio.uio_resid; - kiocb->ki_pos += wrote; - - if (wrote > 0) - iov_iter_advance(from, wrote); - - return (wrote); -} - -static ssize_t -zpl_iter_write_direct(struct kiocb *kiocb, struct iov_iter *from) -{ - cred_t *cr = CRED(); - struct file *filp = kiocb->ki_filp; - struct inode *ip = filp->f_mapping->host; - size_t wrote; - int flags = filp->f_flags | zfs_io_flags(kiocb); - size_t count = iov_iter_count(from); - - zfs_uio_t uio; - zpl_uio_init(&uio, kiocb, from, kiocb->ki_pos, count, from->iov_offset); - - /* On error, return to fallback to the buffered path. */ - ssize_t ret = zfs_setup_direct(ITOZ(ip), &uio, UIO_WRITE, &flags); - if (ret) - return (-ret); - - ASSERT(uio.uio_extflg & UIO_DIRECT); - - crhold(cr); - fstrans_cookie_t cookie = spl_fstrans_mark(); - - int error = -zfs_write(ITOZ(ip), &uio, flags, cr); - - spl_fstrans_unmark(cookie); - crfree(cr); - - zfs_uio_free_dio_pages(&uio, UIO_WRITE); - - if (error < 0) - return (error); - - wrote = count - uio.uio_resid; - kiocb->ki_pos += wrote; - - return (wrote); -} - static ssize_t zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from) { - struct inode *ip = kiocb->ki_filp->f_mapping->host; + cred_t *cr = CRED(); + fstrans_cookie_t cookie; struct file *filp = kiocb->ki_filp; - int flags = filp->f_flags | zfs_io_flags(kiocb); + struct inode *ip = filp->f_mapping->host; + zfs_uio_t uio; size_t count = 0; - boolean_t is_direct; + ssize_t ret; - ssize_t ret = zpl_generic_write_checks(kiocb, from, &count); + ret = zpl_generic_write_checks(kiocb, from, &count); if (ret) return (ret); - loff_t offset = kiocb->ki_pos; + zpl_uio_init(&uio, kiocb, from, kiocb->ki_pos, count, from->iov_offset); - ret = zfs_check_direct_enabled(ITOZ(ip), flags, &is_direct); + crhold(cr); + cookie = spl_fstrans_mark(); - if (ret) { - return (-ret); - } else if (is_direct) { - ssize_t wrote = zpl_iter_write_direct(kiocb, from); + ret = -zfs_write(ITOZ(ip), &uio, + filp->f_flags | zfs_io_flags(kiocb), cr); - if (wrote >= 0 || wrote != -EAGAIN) { - return (wrote); - } + spl_fstrans_unmark(cookie); + crfree(cr); - /* - * If we are falling back to a buffered write, then the - * file position should not be updated at this point. - */ - ASSERT3U(offset, ==, kiocb->ki_pos); - } + if (ret < 0) + return (ret); - return (zpl_iter_write_buffered(kiocb, from)); + ssize_t wrote = count - uio.uio_resid; + kiocb->ki_pos += wrote; + + return (wrote); } #else /* !HAVE_VFS_RW_ITERATE */ -static ssize_t -zpl_aio_read_buffered(struct kiocb *kiocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - cred_t *cr = CRED(); - fstrans_cookie_t cookie; - struct file *filp = kiocb->ki_filp; - size_t count; - ssize_t ret; - - ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); - if (ret) - return (ret); - - zfs_uio_t uio; - zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE, - count, 0); - - crhold(cr); - cookie = spl_fstrans_mark(); - - int flags = (filp->f_flags | zfs_io_flags(kiocb)) & ~O_DIRECT; - int error = -zfs_read(ITOZ(filp->f_mapping->host), &uio, - flags, cr); - - spl_fstrans_unmark(cookie); - crfree(cr); - - if (error < 0) - return (error); - - ssize_t read = count - uio.uio_resid; - kiocb->ki_pos += read; - - zpl_file_accessed(filp); - - return (read); -} - -static ssize_t -zpl_aio_read_direct(struct kiocb *kiocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - cred_t *cr = CRED(); - fstrans_cookie_t cookie; - struct file *filp = kiocb->ki_filp; - struct inode *ip = filp->f_mapping->host; - int flags = filp->f_flags | zfs_io_flags(kiocb); - size_t count; - ssize_t ret; - - ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); - if (ret) - return (ret); - - zfs_uio_t uio; - zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE, - count, 0); - - /* On error, return to fallback to the buffered path */ - ret = zfs_setup_direct(ITOZ(ip), &uio, UIO_READ, &flags); - if (ret) - return (-ret); - - ASSERT(uio.uio_extflg & UIO_DIRECT); - - crhold(cr); - cookie = spl_fstrans_mark(); - - int error = -zfs_read(ITOZ(ip), &uio, flags, cr); - - spl_fstrans_unmark(cookie); - crfree(cr); - - zfs_uio_free_dio_pages(&uio, UIO_READ); - - if (error < 0) - return (error); - - ssize_t read = count - uio.uio_resid; - kiocb->ki_pos += read; - - zpl_file_accessed(filp); - - return (read); -} - static ssize_t zpl_aio_read(struct kiocb *kiocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - struct inode *ip = kiocb->ki_filp->f_mapping->host; + cred_t *cr = CRED(); + fstrans_cookie_t cookie; struct file *filp = kiocb->ki_filp; - int flags = filp->f_flags | zfs_io_flags(kiocb); size_t count; ssize_t ret; - boolean_t is_direct; ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); if (ret) return (ret); - ret = zfs_check_direct_enabled(ITOZ(ip), flags, &is_direct); - - if (ret) { - return (-ret); - } else if (is_direct) { - ssize_t read = zpl_aio_read_direct(kiocb, iov, nr_segs, pos); - - if (read >= 0 || read != -EAGAIN) - return (read); - - /* Otherwise fallback to buffered read */ - } - - return (zpl_aio_read_buffered(kiocb, iov, nr_segs, pos)); -} - -static ssize_t -zpl_aio_write_buffered(struct kiocb *kiocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - cred_t *cr = CRED(); - fstrans_cookie_t cookie; - struct file *filp = kiocb->ki_filp; - struct inode *ip = filp->f_mapping->host; - size_t count; - ssize_t ret; - - ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); - if (ret) - return (ret); - zfs_uio_t uio; zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE, count, 0); @@ -680,110 +422,64 @@ zpl_aio_write_buffered(struct kiocb *kiocb, const struct iovec *iov, crhold(cr); cookie = spl_fstrans_mark(); - int flags = (filp->f_flags | zfs_io_flags(kiocb)) & ~O_DIRECT; - int error = -zfs_write(ITOZ(ip), &uio, flags, cr); + ret = -zfs_read(ITOZ(filp->f_mapping->host), &uio, + flip->f_flags | zfs_io_flags(kiocb), cr); spl_fstrans_unmark(cookie); crfree(cr); - if (error < 0) - return (error); - - ssize_t wrote = count - uio.uio_resid; - kiocb->ki_pos += wrote; - - return (wrote); -} - -static ssize_t -zpl_aio_write_direct(struct kiocb *kiocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - cred_t *cr = CRED(); - fstrans_cookie_t cookie; - struct file *filp = kiocb->ki_filp; - struct inode *ip = filp->f_mapping->host; - int flags = filp->f_flags | zfs_io_flags(kiocb); - size_t count; - ssize_t ret; - - ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); - if (ret) + if (ret < 0) return (ret); - zfs_uio_t uio; - zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE, - count, 0); + ssize_t read = count - uio.uio_resid; + kiocb->ki_pos += read; - /* On error, return to fallback to the buffered path. */ - ret = zfs_setup_direct(ITOZ(ip), &uio, UIO_WRITE, &flags); - if (ret) - return (-ret); + zpl_file_accessed(filp); - ASSERT(uio.uio_extflg & UIO_DIRECT); - - crhold(cr); - cookie = spl_fstrans_mark(); - - int error = -zfs_write(ITOZ(ip), &uio, flags, cr); - - spl_fstrans_unmark(cookie); - crfree(cr); - - zfs_uio_free_dio_pages(&uio, UIO_WRITE); - - if (error < 0) - return (error); - - ssize_t wrote = count - uio.uio_resid; - kiocb->ki_pos += wrote; - - return (wrote); + return (read); } static ssize_t zpl_aio_write(struct kiocb *kiocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { + cred_t *cr = CRED(); + fstrans_cookie_t cookie; struct file *filp = kiocb->ki_filp; struct inode *ip = filp->f_mapping->host; - int flags = filp->f_flags | zfs_io_flags(kiocb); - size_t ocount; size_t count; ssize_t ret; - boolean_t is_direct; - ret = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); + ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); if (ret) return (ret); - count = ocount; - - ret = generic_write_checks(filp, &pos, &count, S_ISBLK(ip->i_mode)); + ret = geeric_write_checks(filep, &pos, &count, S_ISBLK(ip->i_mode)); if (ret) return (ret); kiocb->ki_pos = pos; - ret = zfs_check_direct_enabled(ITOZ(ip), flags, &is_direct); + zfs_uio_t uio; + zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE, + count, 0); - if (ret) { - return (-ret); - } else if (is_direct) { - ssize_t wrote = zpl_aio_write_direct(kiocb, iov, nr_segs, pos); + crhold(cr); + cookie = spl_fstrans_mark(); - if (wrote >= 0 || wrote != -EAGAIN) { - return (wrote); - } + ret = -zfs_write(ITOZ(ip), &uio, + filp->f_flags | zfs_io_flags(kiocb), cr); - /* - * If we are falling back to a buffered write, then the - * file position should not be updated at this point. - */ - ASSERT3U(pos, ==, kiocb->ki_pos); - } + spl_fstrans_unmark(cookie); + crfree(cr); - return (zpl_aio_write_buffered(kiocb, iov, nr_segs, pos)); + if (ret < 0) + return (ret); + + ssize_t wrote = count - uio.uio_resid; + kiocb->ki_pos += wrote; + + return (wrote); } #endif /* HAVE_VFS_RW_ITERATE */ diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index db08b18431..ea7731b8d8 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1191,7 +1191,7 @@ dmu_read_impl(dnode_t *dn, uint64_t offset, uint64_t size, /* Allow Direct I/O when requested and properly aligned */ if ((flags & DMU_DIRECTIO) && zfs_dio_page_aligned(buf) && - zfs_dio_aligned(offset, size, SPA_MINBLOCKSIZE)) { + zfs_dio_aligned(offset, size, PAGESIZE)) { abd_t *data = abd_get_from_buf(buf, size); err = dmu_read_abd(dn, offset, size, data, flags); abd_free(data); diff --git a/module/zfs/dmu_direct.c b/module/zfs/dmu_direct.c index 3d5fb23907..91a7fd8df4 100644 --- a/module/zfs/dmu_direct.c +++ b/module/zfs/dmu_direct.c @@ -104,7 +104,7 @@ dmu_write_direct_done(zio_t *zio) if (zio->io_error != 0) { if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) - ASSERT3U(zio->io_error, ==, EAGAIN); + ASSERT3U(zio->io_error, ==, EIO); /* * In the event of an I/O error this block has been freed in diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 56af7c1298..9305bd894d 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -159,14 +159,14 @@ uint_t zfs_vdev_max_auto_ashift = 14; uint_t zfs_vdev_min_auto_ashift = ASHIFT_MIN; /* - * VDEV checksum verification percentage for Direct I/O writes. This is - * neccessary for Linux, because user pages can not be placed under write - * protection during Direct I/O writes. + * VDEV checksum verification for Direct I/O writes. This is neccessary for + * Linux, because anonymous pages can not be placed under write protection + * during Direct I/O writes. */ #if !defined(__FreeBSD__) -uint_t zfs_vdev_direct_write_verify_pct = 2; +uint_t zfs_vdev_direct_write_verify = 1; #else -uint_t zfs_vdev_direct_write_verify_pct = 0; +uint_t zfs_vdev_direct_write_verify = 0; #endif void @@ -6527,9 +6527,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, dio_write_verify_events_per_second, UINT, ZMOD_RW, "Rate Direct I/O write verify events to this many per second"); /* BEGIN CSTYLED */ -ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, direct_write_verify_pct, UINT, ZMOD_RW, - "Percentage of Direct I/O writes per top-level VDEV for checksum " - "verification to be performed"); +ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, direct_write_verify, UINT, ZMOD_RW, + "Direct I/O writes will perform for checksum verification before " + "commiting write"); ZFS_MODULE_PARAM(zfs, zfs_, checksum_events_per_second, UINT, ZMOD_RW, "Rate limit checksum events to this many checksum errors per second " diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 4cf03abc5a..bf81073a16 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -202,28 +202,6 @@ zfs_access(znode_t *zp, int mode, int flag, cred_t *cr) return (error); } -int -zfs_check_direct_enabled(znode_t *zp, int ioflags, boolean_t *is_direct) -{; - zfsvfs_t *zfsvfs = ZTOZSB(zp); - *is_direct = B_FALSE; - int error; - - if ((error = zfs_enter(zfsvfs, FTAG)) != 0) - return (error); - - if (ioflags & O_DIRECT && - zfsvfs->z_os->os_direct != ZFS_DIRECT_DISABLED) { - *is_direct = B_TRUE; - } else if (zfsvfs->z_os->os_direct == ZFS_DIRECT_ALWAYS) { - *is_direct = B_TRUE; - } - - zfs_exit(zfsvfs, FTAG); - - return (0); -} - /* * Determine if Direct I/O has been requested (either via the O_DIRECT flag or * the "direct" dataset property). When inherited by the property only apply @@ -236,12 +214,11 @@ zfs_check_direct_enabled(znode_t *zp, int ioflags, boolean_t *is_direct) * synhronized with the ARC. * * It is possible that a file's pages could be mmap'ed after it is checked - * here. If so, that is handled according in zfs_read() and zfs_write(). See - * comments in the following two areas for how this handled: - * zfs_read() -> mappedread() + * here. If so, that is handled coorarding in zfs_write(). See comments in the + * following area for how this is handled: * zfs_write() -> update_pages() */ -int +static int zfs_setup_direct(struct znode *zp, zfs_uio_t *uio, zfs_uio_rw_t rw, int *ioflagp) { @@ -250,49 +227,49 @@ zfs_setup_direct(struct znode *zp, zfs_uio_t *uio, zfs_uio_rw_t rw, int ioflag = *ioflagp; int error = 0; - if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) - return (error); - - if (os->os_direct == ZFS_DIRECT_DISABLED) { - error = EAGAIN; + if (os->os_direct == ZFS_DIRECT_DISABLED || + zn_has_cached_data(zp, zfs_uio_offset(uio), + zfs_uio_offset(uio) + zfs_uio_resid(uio) - 1)) { + /* + * Direct I/O is disabled or the region is mmap'ed. In either + * case the I/O request will just directed through the ARC. + */ + ioflag &= ~O_DIRECT; goto out; - } else if (os->os_direct == ZFS_DIRECT_ALWAYS && zfs_uio_page_aligned(uio) && - zfs_uio_aligned(uio, SPA_MINBLOCKSIZE)) { + zfs_uio_aligned(uio, PAGE_SIZE)) { if ((rw == UIO_WRITE && zfs_uio_resid(uio) >= zp->z_blksz) || (rw == UIO_READ)) { ioflag |= O_DIRECT; } + } else if (os->os_direct == ZFS_DIRECT_ALWAYS && (ioflag & O_DIRECT)) { + /* + * Direct I/O was requested through the direct=always, but it + * is not properly PAGE_SIZE aligned. The request will be + * directed through the ARC. + */ + ioflag &= ~O_DIRECT; } if (ioflag & O_DIRECT) { if (!zfs_uio_page_aligned(uio) || - !zfs_uio_aligned(uio, SPA_MINBLOCKSIZE)) { + !zfs_uio_aligned(uio, PAGE_SIZE)) { error = SET_ERROR(EINVAL); goto out; } - if (zn_has_cached_data(zp, zfs_uio_offset(uio), - zfs_uio_offset(uio) + zfs_uio_resid(uio) - 1)) { - error = SET_ERROR(EAGAIN); + error = zfs_uio_get_dio_pages_alloc(uio, rw); + if (error) { goto out; } - - error = zfs_uio_get_dio_pages_alloc(uio, rw); - if (error) - goto out; - } else { - error = EAGAIN; - goto out; } IMPLY(ioflag & O_DIRECT, uio->uio_extflg & UIO_DIRECT); ASSERT0(error); - *ioflagp = ioflag; out: - zfs_exit(zfsvfs, FTAG); + *ioflagp = ioflag; return (error); } @@ -380,8 +357,16 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) error = 0; goto out; } - ASSERT(zfs_uio_offset(uio) < zp->z_size); + + /* + * Setting up Direct I/O if requested. + */ + error = zfs_setup_direct(zp, uio, UIO_READ, &ioflag); + if (error) { + goto out; + } + #if defined(__linux__) ssize_t start_offset = zfs_uio_offset(uio); #endif @@ -424,22 +409,7 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) #endif if (zn_has_cached_data(zp, zfs_uio_offset(uio), zfs_uio_offset(uio) + nbytes - 1)) { - /* - * It is possible that a files pages have been mmap'ed - * since our check for Direct I/O reads and the read - * being issued. In this case, we will use the ARC to - * keep it synchronized with the page cache. In order - * to do this we temporarily remove the UIO_DIRECT - * flag. - */ - boolean_t uio_direct_mmap = B_FALSE; - if (uio->uio_extflg & UIO_DIRECT) { - uio->uio_extflg &= ~UIO_DIRECT; - uio_direct_mmap = B_TRUE; - } error = mappedread(zp, nbytes, uio); - if (uio_direct_mmap) - uio->uio_extflg |= UIO_DIRECT; } else { error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), uio, nbytes); @@ -494,6 +464,12 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) out: zfs_rangelock_exit(lr); + /* + * Cleanup for Direct I/O if requested. + */ + if (uio->uio_extflg & UIO_DIRECT) + zfs_uio_free_dio_pages(uio, UIO_READ); + ZFS_ACCESSTIME_STAMP(zfsvfs, zp); zfs_exit(zfsvfs, FTAG); return (error); @@ -631,6 +607,15 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) return (SET_ERROR(EINVAL)); } + /* + * Setting up Direct I/O if requested. + */ + error = zfs_setup_direct(zp, uio, UIO_WRITE, &ioflag); + if (error) { + zfs_exit(zfsvfs, FTAG); + return (SET_ERROR(error)); + } + /* * Pre-fault the pages to ensure slow (eg NFS) pages * don't hold up txg. @@ -641,6 +626,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) return (SET_ERROR(EFAULT)); } + /* * If in append mode, set the io offset pointer to eof. */ @@ -676,6 +662,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) lr = zfs_rangelock_enter(&zp->z_rangelock, woff, n, RL_WRITER); } + if (zn_rlimit_fsize_uio(zp, uio)) { zfs_rangelock_exit(lr); zfs_exit(zfsvfs, FTAG); @@ -896,15 +883,27 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) zfs_uioskip(uio, nbytes); tx_bytes = nbytes; } - /* - * There is a a window where a file's pages can be mmap'ed after - * the Direct I/O write has started. In this case we will still - * call update_pages() to make sure there is consistency - * between the ARC and the page cache. This is unfortunate + * There is a window where a file's pages can be mmap'ed after + * zfs_setup_direct() is called. This is due to the fact that + * the rangelock in this function is acquired after calling + * zfs_setup_direct(). This is done so that + * zfs_uio_prefaultpages() does not attempt to fault in pages + * on Linux for Direct I/O requests. This is not necessary as + * the pages are pinned in memory and can not be faulted out. + * Ideally, the rangelock would be held before calling + * zfs_setup_direct() and zfs_uio_prefaultpages(); however, + * this can lead to a deadlock as zfs_getpage() also acquires + * the rangelock as a RL_WRITER and prefaulting the pages can + * lead to zfs_getpage() being called. + * + * In the case of the pages being mapped after + * zfs_setup_direct() is called, the call to update_pages() + * will still be made to make sure there is consistency between + * the ARC and the Linux page cache. This is an ufortunate * situation as the data will be read back into the ARC after - * the Direct I/O write has completed, but this is the pentalty - * for writing to a mmap'ed region of the file using O_DIRECT. + * the Direct I/O write has completed, but this is the penality + * for writing to a mmap'ed region of a file using Direct I/O. */ if (tx_bytes && zn_has_cached_data(zp, woff, woff + tx_bytes - 1)) { @@ -987,6 +986,12 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) zfs_znode_update_vfs(zp); zfs_rangelock_exit(lr); + /* + * Cleanup for Direct I/O if requested. + */ + if (uio->uio_extflg & UIO_DIRECT) + zfs_uio_free_dio_pages(uio, UIO_WRITE); + /* * If we're in replay mode, or we made no progress, or the * uio data is inaccessible return an error. Otherwise, it's diff --git a/module/zfs/zio.c b/module/zfs/zio.c index f4ada08a91..ae58f7704e 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -804,7 +804,7 @@ zio_notify_parent(zio_t *pio, zio_t *zio, enum zio_wait_type wait, ASSERT3U(*countp, >, 0); if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) { - ASSERT3U(*errorp, ==, EAGAIN); + ASSERT3U(*errorp, ==, EIO); ASSERT3U(pio->io_child_type, ==, ZIO_CHILD_LOGICAL); pio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR; } @@ -4521,13 +4521,12 @@ zio_vdev_io_assess(zio_t *zio) /* * If a Direct I/O write checksum verify error has occurred then this - * I/O should not attempt to be issued again. Instead the EAGAIN will - * be returned and this write will attempt to be issued through the - * ARC instead. + * I/O should not attempt to be issued again. Instead the EIO will + * be returned. */ if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) { ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_LOGICAL); - ASSERT3U(zio->io_error, ==, EAGAIN); + ASSERT3U(zio->io_error, ==, EIO); zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; return (zio); } @@ -4850,6 +4849,7 @@ static zio_t * zio_dio_checksum_verify(zio_t *zio) { zio_t *pio = zio_unique_parent(zio); + int error; ASSERT3P(zio->io_vd, !=, NULL); ASSERT3P(zio->io_bp, !=, NULL); @@ -4858,38 +4858,28 @@ zio_dio_checksum_verify(zio_t *zio) ASSERT3B(pio->io_prop.zp_direct_write, ==, B_TRUE); ASSERT3U(pio->io_child_type, ==, ZIO_CHILD_LOGICAL); - if (zfs_vdev_direct_write_verify_pct == 0 || zio->io_error != 0) + if (zfs_vdev_direct_write_verify == 0 || zio->io_error != 0) goto out; - /* - * A Direct I/O write checksum verification will only be - * performed based on the top-level VDEV percentage for checks. - */ - uint32_t rand = random_in_range(100); - int error; + if ((error = zio_checksum_error(zio, NULL)) != 0) { + zio->io_error = error; + if (error == ECKSUM) { + mutex_enter(&zio->io_vd->vdev_stat_lock); + zio->io_vd->vdev_stat.vs_dio_verify_errors++; + mutex_exit(&zio->io_vd->vdev_stat_lock); + zio->io_error = SET_ERROR(EIO); + zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR; - if (rand < zfs_vdev_direct_write_verify_pct) { - if ((error = zio_checksum_error(zio, NULL)) != 0) { - zio->io_error = error; - if (error == ECKSUM) { - mutex_enter(&zio->io_vd->vdev_stat_lock); - zio->io_vd->vdev_stat.vs_dio_verify_errors++; - mutex_exit(&zio->io_vd->vdev_stat_lock); - zio->io_error = SET_ERROR(EAGAIN); - zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR; + /* + * The EIO error must be propagated up to the logical + * parent ZIO in zio_notify_parent() so it can be + * returned to dmu_write_abd(). + */ + zio->io_flags &= ~ZIO_FLAG_DONT_PROPAGATE; - /* - * The EAGAIN error must be propagated up to the - * logical parent ZIO in zio_notify_parent() so - * it can be returned to dmu_write_abd(). - */ - zio->io_flags &= ~ZIO_FLAG_DONT_PROPAGATE; - - (void) zfs_ereport_post( - FM_EREPORT_ZFS_DIO_VERIFY, - zio->io_spa, zio->io_vd, &zio->io_bookmark, - zio, 0); - } + (void) zfs_ereport_post(FM_EREPORT_ZFS_DIO_VERIFY, + zio->io_spa, zio->io_vd, &zio->io_bookmark, + zio, 0); } } @@ -5243,8 +5233,8 @@ zio_done(zio_t *zio) } if ((zio->io_error == EIO || !(zio->io_flags & - (ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE | - ZIO_FLAG_DIO_CHKSUM_ERR))) && + (ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) && + !(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) && zio == zio->io_logical) { /* * For logical I/O requests, tell the SPA to log the diff --git a/tests/zfs-tests/cmd/manipulate_user_buffer.c b/tests/zfs-tests/cmd/manipulate_user_buffer.c index c195a197ad..7daa7d3084 100644 --- a/tests/zfs-tests/cmd/manipulate_user_buffer.c +++ b/tests/zfs-tests/cmd/manipulate_user_buffer.c @@ -41,6 +41,7 @@ static char *outputfile = NULL; static int blocksize = 131072; /* 128K */ +static int wr_err_expected = 0; static int numblocks = 100; static char *execname = NULL; static int print_usage = 0; @@ -56,28 +57,33 @@ static void usage(void) { (void) fprintf(stderr, - "usage %s -o outputfile [-b blocksize] [-n numblocks]\n" - " [-p randpattern] [-h help]\n" + "usage %s -o outputfile [-b blocksize] [-e wr_error_expected]\n" + " [-n numblocks] [-p randpattern] [-h help]\n" "\n" "Testing whether checksum verify works correctly for O_DIRECT.\n" "when manipulating the contents of a userspace buffer.\n" "\n" - " outputfile: File to write to.\n" - " blocksize: Size of each block to write (must be at \n" - " least >= 512).\n" - " numblocks: Total number of blocksized blocks to write.\n" - " randpattern: Fill data buffer with random data. Default \n" - " behavior is to fill the buffer with the \n" - " known data pattern (0xdeadbeef).\n" - " help: Print usage information and exit.\n" + " outputfile: File to write to.\n" + " blocksize: Size of each block to write (must be at \n" + " least >= 512).\n" + " wr_err_expected: Whether pwrite() is expected to return EIO\n" + " while manipulating the contents of the\n" + " buffer.\n" + " numblocks: Total number of blocksized blocks to\n" + " write.\n" + " randpattern: Fill data buffer with random data. Default\n" + " behavior is to fill the buffer with the \n" + " known data pattern (0xdeadbeef).\n" + " help: Print usage information and exit.\n" "\n" " Required parameters:\n" " outputfile\n" "\n" " Default Values:\n" - " blocksize -> 131072\n" - " numblocks -> 100\n" - " randpattern -> false\n", + " blocksize -> 131072\n" + " wr_err_expexted -> false\n" + " numblocks -> 100\n" + " randpattern -> false\n", execname); (void) exit(1); } @@ -91,12 +97,16 @@ parse_options(int argc, char *argv[]) extern int optind, optopt; execname = argv[0]; - while ((c = getopt(argc, argv, "b:hn:o:p")) != -1) { + while ((c = getopt(argc, argv, "b:ehn:o:p")) != -1) { switch (c) { case 'b': blocksize = atoi(optarg); break; + case 'e': + wr_err_expected = 1; + break; + case 'h': print_usage = 1; break; @@ -153,8 +163,10 @@ write_thread(void *arg) while (!args->entire_file_written) { wrote = pwrite(ofd, buf, blocksize, offset); if (wrote != blocksize) { - perror("write"); - exit(2); + if (wr_err_expected) + assert(errno == EIO); + else + exit(2); } offset = ((offset + blocksize) % total_data); diff --git a/tests/zfs-tests/cmd/stride_dd.c b/tests/zfs-tests/cmd/stride_dd.c index 19aab1c97f..e1e45794cf 100644 --- a/tests/zfs-tests/cmd/stride_dd.c +++ b/tests/zfs-tests/cmd/stride_dd.c @@ -212,7 +212,6 @@ read_entire_file(int ifd, int ofd, void *buf) } } - if (stride > 1) { if (lseek(ifd, (stride - 1) * bsize, SEEK_CUR) == -1) { perror("input lseek"); diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg index d3c4a7d940..b41f54ba35 100644 --- a/tests/zfs-tests/include/tunables.cfg +++ b/tests/zfs-tests/include/tunables.cfg @@ -93,7 +93,7 @@ VDEV_FILE_LOGICAL_ASHIFT vdev.file.logical_ashift vdev_file_logical_ashift VDEV_FILE_PHYSICAL_ASHIFT vdev.file.physical_ashift vdev_file_physical_ashift VDEV_MAX_AUTO_ASHIFT vdev.max_auto_ashift zfs_vdev_max_auto_ashift VDEV_MIN_MS_COUNT vdev.min_ms_count zfs_vdev_min_ms_count -VDEV_DIRECT_WR_VERIFY_PCT vdev.direct_write_verify_pct zfs_vdev_direct_write_verify_pct +VDEV_DIRECT_WR_VERIFY vdev.direct_write_verify zfs_vdev_direct_write_verify VDEV_VALIDATE_SKIP vdev.validate_skip vdev_validate_skip VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev VOL_MODE vol.mode zvol_volmode diff --git a/tests/zfs-tests/tests/functional/direct/dio_aligned_block.ksh b/tests/zfs-tests/tests/functional/direct/dio_aligned_block.ksh index 4aac5edd8e..e26fbdfc25 100755 --- a/tests/zfs-tests/tests/functional/direct/dio_aligned_block.ksh +++ b/tests/zfs-tests/tests/functional/direct/dio_aligned_block.ksh @@ -43,7 +43,6 @@ function cleanup { zfs set recordsize=$rs $TESTPOOL/$TESTFS log_must rm -f $tmp_file - check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 } log_onexit cleanup diff --git a/tests/zfs-tests/tests/functional/direct/dio_async_always.ksh b/tests/zfs-tests/tests/functional/direct/dio_async_always.ksh index 3f26715fc3..27fd66ccd2 100755 --- a/tests/zfs-tests/tests/functional/direct/dio_async_always.ksh +++ b/tests/zfs-tests/tests/functional/direct/dio_async_always.ksh @@ -44,7 +44,6 @@ function cleanup { zfs set direct=standard $TESTPOOL/$TESTFS rm $tmp_file - check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 } log_assert "Verify direct=always mixed small async requests" diff --git a/tests/zfs-tests/tests/functional/direct/dio_async_fio_ioengines.ksh b/tests/zfs-tests/tests/functional/direct/dio_async_fio_ioengines.ksh index 82d7d8250f..5492a5a905 100755 --- a/tests/zfs-tests/tests/functional/direct/dio_async_fio_ioengines.ksh +++ b/tests/zfs-tests/tests/functional/direct/dio_async_fio_ioengines.ksh @@ -44,7 +44,6 @@ verify_runnable "global" function cleanup { log_must rm -f "$mntpnt/direct-*" - check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 } function check_fio_ioengine diff --git a/tests/zfs-tests/tests/functional/direct/dio_compression.ksh b/tests/zfs-tests/tests/functional/direct/dio_compression.ksh index 5be93d104d..5463715d7b 100755 --- a/tests/zfs-tests/tests/functional/direct/dio_compression.ksh +++ b/tests/zfs-tests/tests/functional/direct/dio_compression.ksh @@ -46,7 +46,6 @@ function cleanup { log_must rm -f "$mntpnt/direct-*" log_must zfs set compression=off $TESTPOOL/$TESTFS - check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 } log_assert "Verify compression works using Direct I/O." diff --git a/tests/zfs-tests/tests/functional/direct/dio_dedup.ksh b/tests/zfs-tests/tests/functional/direct/dio_dedup.ksh index c703fcc05f..9de94dee6c 100755 --- a/tests/zfs-tests/tests/functional/direct/dio_dedup.ksh +++ b/tests/zfs-tests/tests/functional/direct/dio_dedup.ksh @@ -45,7 +45,6 @@ function cleanup { log_must rm -f "$mntpnt/direct-*" log_must zfs set dedup=off $TESTPOOL/$TESTFS - check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 } log_assert "Verify deduplication works using Direct I/O." diff --git a/tests/zfs-tests/tests/functional/direct/dio_encryption.ksh b/tests/zfs-tests/tests/functional/direct/dio_encryption.ksh index 843b570d2d..b6faa11970 100755 --- a/tests/zfs-tests/tests/functional/direct/dio_encryption.ksh +++ b/tests/zfs-tests/tests/functional/direct/dio_encryption.ksh @@ -59,6 +59,4 @@ for bs in "4k" "128k" "1m"; do done done -check_dio_write_chksum_verify_failures $TESTPOOL1 "stripe" 0 - log_pass "Verified encryption works using Direct I/O" diff --git a/tests/zfs-tests/tests/functional/direct/dio_grow_block.ksh b/tests/zfs-tests/tests/functional/direct/dio_grow_block.ksh index c54d079366..12b2f21275 100755 --- a/tests/zfs-tests/tests/functional/direct/dio_grow_block.ksh +++ b/tests/zfs-tests/tests/functional/direct/dio_grow_block.ksh @@ -41,7 +41,6 @@ function cleanup { zfs set recordsize=$rs $TESTPOOL/$TESTFS log_must rm -f $tmp_file - check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 } log_assert "Verify the number direct/buffered requests when growing a file" diff --git a/tests/zfs-tests/tests/functional/direct/dio_max_recordsize.ksh b/tests/zfs-tests/tests/functional/direct/dio_max_recordsize.ksh index 87900443ed..2c0ce832b1 100755 --- a/tests/zfs-tests/tests/functional/direct/dio_max_recordsize.ksh +++ b/tests/zfs-tests/tests/functional/direct/dio_max_recordsize.ksh @@ -57,14 +57,6 @@ for type in "" "mirror" "raidz" "draid"; do; verify_dio_write_count $TESTPOOL1 $recsize $((4 * recsize)) \ $mntpnt - if [[ "$type" == "" ]]; then - check_dio_write_chksum_verify_failures $TESTPOOL1 \ - "stripe" 0 - else - check_dio_write_chksum_verify_failures $TESTPOOL1 \ - "$type" 0 - fi - destroy_pool $TESTPOOL1 done done diff --git a/tests/zfs-tests/tests/functional/direct/dio_mixed.ksh b/tests/zfs-tests/tests/functional/direct/dio_mixed.ksh index 38c6159537..6f217d91d5 100755 --- a/tests/zfs-tests/tests/functional/direct/dio_mixed.ksh +++ b/tests/zfs-tests/tests/functional/direct/dio_mixed.ksh @@ -42,7 +42,6 @@ verify_runnable "global" function cleanup { log_must rm -f $src_file $new_file $tmp_file - check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 } log_assert "Verify mixed buffered and Direct I/O are coherent." diff --git a/tests/zfs-tests/tests/functional/direct/dio_mmap.ksh b/tests/zfs-tests/tests/functional/direct/dio_mmap.ksh index 27d03e0412..fbd6afd7b3 100755 --- a/tests/zfs-tests/tests/functional/direct/dio_mmap.ksh +++ b/tests/zfs-tests/tests/functional/direct/dio_mmap.ksh @@ -45,7 +45,6 @@ function cleanup { zfs set recordsize=$rs $TESTPOOL/$TESTFS log_must rm -f "$tmp_file" - check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 } log_assert "Verify mixed Direct I/O and mmap I/O" diff --git a/tests/zfs-tests/tests/functional/direct/dio_overwrites.ksh b/tests/zfs-tests/tests/functional/direct/dio_overwrites.ksh index 3854766ed8..04973fc886 100755 --- a/tests/zfs-tests/tests/functional/direct/dio_overwrites.ksh +++ b/tests/zfs-tests/tests/functional/direct/dio_overwrites.ksh @@ -43,7 +43,6 @@ function cleanup { zfs set recordsize=$rs $TESTPOOL/$TESTFS log_must rm -f "$tmp_file" - check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 } log_assert "Verify Direct I/O overwrites" diff --git a/tests/zfs-tests/tests/functional/direct/dio_property.ksh b/tests/zfs-tests/tests/functional/direct/dio_property.ksh index 4fbcfec068..9e18f0bf78 100755 --- a/tests/zfs-tests/tests/functional/direct/dio_property.ksh +++ b/tests/zfs-tests/tests/functional/direct/dio_property.ksh @@ -44,7 +44,6 @@ function cleanup { zfs set direct=standard $TESTPOOL/$TESTFS log_must rm -f $tmp_file - check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 } log_assert "Verify the direct=always|disabled|standard property" @@ -61,7 +60,8 @@ count=8 # # Check when "direct=always" any aligned IO is done as direct. -# Note that "flag=direct" is not set in the following calls to dd(1). +# Note that the "-D" and "-d" flags are not set in the following calls to +# stride_dd. # log_must zfs set direct=always $TESTPOOL/$TESTFS @@ -92,7 +92,8 @@ log_must rm -f $tmp_file # # Check when "direct=disabled" there are never any direct requests. -# Note that "flag=direct" is always set in the following calls to dd(1). +# Note that the "-D" and "-d" flags are always set in the following calls to +# stride_dd. # log_must zfs set direct=disabled $TESTPOOL/$TESTFS diff --git a/tests/zfs-tests/tests/functional/direct/dio_random.ksh b/tests/zfs-tests/tests/functional/direct/dio_random.ksh index 42c18d4261..abe8d5c0dc 100755 --- a/tests/zfs-tests/tests/functional/direct/dio_random.ksh +++ b/tests/zfs-tests/tests/functional/direct/dio_random.ksh @@ -45,7 +45,6 @@ verify_runnable "global" function cleanup { log_must rm -f "$tmp_file" - check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 } log_assert "Verify randomly sized mixed Direct I/O and buffered I/O" diff --git a/tests/zfs-tests/tests/functional/direct/dio_recordsize.ksh b/tests/zfs-tests/tests/functional/direct/dio_recordsize.ksh index e1087e5ac3..def4682213 100755 --- a/tests/zfs-tests/tests/functional/direct/dio_recordsize.ksh +++ b/tests/zfs-tests/tests/functional/direct/dio_recordsize.ksh @@ -61,14 +61,6 @@ for type in "" "mirror" "raidz" "draid"; do done done - if [[ "$type" == "" ]]; then - check_dio_write_chksum_verify_failures $TESTPOOL1 \ - "stripe" 0 - else - check_dio_write_chksum_verify_failures $TESTPOOL1 \ - "$type" 0 - fi - destroy_pool $TESTPOOL1 done done diff --git a/tests/zfs-tests/tests/functional/direct/dio_unaligned_block.ksh b/tests/zfs-tests/tests/functional/direct/dio_unaligned_block.ksh index 9f50187149..309d35ea0e 100755 --- a/tests/zfs-tests/tests/functional/direct/dio_unaligned_block.ksh +++ b/tests/zfs-tests/tests/functional/direct/dio_unaligned_block.ksh @@ -44,7 +44,6 @@ function cleanup zfs set recordsize=$rs $TESTPOOL/$TESTFS zfs set direct=standard $TESTPOOL/$TESTFS log_must rm -f $tmp_file - check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 } log_onexit cleanup diff --git a/tests/zfs-tests/tests/functional/direct/dio_unaligned_filesize.ksh b/tests/zfs-tests/tests/functional/direct/dio_unaligned_filesize.ksh index 571767d3b1..8bb363f1a9 100755 --- a/tests/zfs-tests/tests/functional/direct/dio_unaligned_filesize.ksh +++ b/tests/zfs-tests/tests/functional/direct/dio_unaligned_filesize.ksh @@ -49,7 +49,6 @@ function cleanup { log_must rm -f "$filename" log_must set recordsize=$rs $TESTPOOL/$TESTFS - check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 } log_assert "Verify Direct I/O reads can read an entire file that is not \ diff --git a/tests/zfs-tests/tests/functional/direct/dio_write_stable_pages.ksh b/tests/zfs-tests/tests/functional/direct/dio_write_stable_pages.ksh index 5a5a5cf7ad..efc9ee6391 100755 --- a/tests/zfs-tests/tests/functional/direct/dio_write_stable_pages.ksh +++ b/tests/zfs-tests/tests/functional/direct/dio_write_stable_pages.ksh @@ -77,7 +77,7 @@ do # Manipulate the user's buffer while running O_DIRECT write # workload with the buffer. log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" \ - -n $NUMBLOCKS -b $BS + -n $NUMBLOCKS -b $BS # Reading back the contents of the file log_must stride_dd -i $mntpnt/direct-write.iso -o /dev/null \ diff --git a/tests/zfs-tests/tests/functional/direct/dio_write_verify.ksh b/tests/zfs-tests/tests/functional/direct/dio_write_verify.ksh index a7e9dc0cde..536459a35e 100755 --- a/tests/zfs-tests/tests/functional/direct/dio_write_verify.ksh +++ b/tests/zfs-tests/tests/functional/direct/dio_write_verify.ksh @@ -33,7 +33,7 @@ # Verify checksum verify works for Direct I/O writes. # # STRATEGY: -# 1. Set the module parameter zfs_vdev_direct_write_verify_pct to 30. +# 1. Set the module parameter zfs_vdev_direct_write_verify to 0. # 2. Check that manipulating the user buffer while Direct I/O writes are # taking place does not cause any panics with compression turned on. # 3. Start a Direct I/O write workload while manipulating the user buffer @@ -42,7 +42,7 @@ # zpool status -d and checking for zevents. We also make sure there # are reported data errors when reading the file back. # 5. Repeat steps 3 and 4 for 3 iterations. -# 6. Set zfs_vdev_direct_write_verify_pct set to 1 and repeat 3. +# 6. Set zfs_vdev_direct_write_verify set to 1 and repeat 3. # 7. Verify there are Direct I/O write verify failures using # zpool status -d and checking for zevents. We also make sure there # there are no reported data errors when reading the file back because @@ -58,22 +58,22 @@ function cleanup log_must zpool clear $TESTPOOL # Clearing out dio_verify from event logs log_must zpool events -c - log_must set_tunable32 VDEV_DIRECT_WR_VERIFY_PCT 2 + log_must set_tunable32 VDEV_DIRECT_WR_VERIFY $DIO_WR_VERIFY_TUNABLE } log_assert "Verify checksum verify works for Direct I/O writes." if is_freebsd; then - log_unsupported "FeeBSD is capable of stable pages for O_DIRECT writes" + log_unsupported "FreeBSD is capable of stable pages for O_DIRECT writes" fi log_onexit cleanup ITERATIONS=3 NUMBLOCKS=300 -VERIFY_PCT=30 BS=$((128 * 1024)) # 128k mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +typeset DIO_WR_VERIFY_TUNABLE=$(get_tunable VDEV_DIRECT_WR_VERIFY) # Get a list of vdevs in our pool set -A array $(get_disklist_fullpath $TESTPOOL) @@ -82,7 +82,7 @@ set -A array $(get_disklist_fullpath $TESTPOOL) firstvdev=${array[0]} log_must zfs set recordsize=128k $TESTPOOL/$TESTFS -log_must set_tunable32 VDEV_DIRECT_WR_VERIFY_PCT $VERIFY_PCT +log_must set_tunable32 VDEV_DIRECT_WR_VERIFY 0 # First we will verify there are no panics while manipulating the contents of # the user buffer during Direct I/O writes with compression. The contents @@ -101,25 +101,21 @@ if [[ $total_dio_wr -lt 1 ]]; then log_fail "No Direct I/O writes $total_dio_wr" fi -log_must rm -f "$mntpnt/direct-write.iso" # Clearing out DIO counts for Zpool log_must zpool clear $TESTPOOL # Clearing out dio_verify from event logs log_must zpool events -c - - +log_must rm -f "$mntpnt/direct-write.iso" # Next we will verify there are checksum errors for Direct I/O writes while # manipulating the contents of the user pages. log_must zfs set compression=off $TESTPOOL/$TESTFS for i in $(seq 1 $ITERATIONS); do - log_note "Verifying 30% of Direct I/O write checksums iteration \ - $i of $ITERATIONS with \ - zfs_vdev_direct_write_verify_pct=$VERIFY_PCT" + log_note "Verifying Direct I/O write checksums iteration \ + $i of $ITERATIONS with zfs_vdev_direct_write_verify=0" prev_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) - prev_arc_wr=$(get_iostats_stat $TESTPOOL arc_write_count) log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" \ -n $NUMBLOCKS -b $BS @@ -131,9 +127,7 @@ for i in $(seq 1 $ITERATIONS); do # Getting new Direct I/O and ARC write counts. curr_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) - curr_arc_wr=$(get_iostats_stat $TESTPOOL arc_write_count) total_dio_wr=$((curr_dio_wr - prev_dio_wr)) - total_arc_wr=$((curr_arc_wr - prev_arc_wr)) # Verifying there are checksum errors log_note "Making sure there are checksum errors for the ZPool" @@ -144,23 +138,13 @@ for i in $(seq 1 $ITERATIONS); do log_fail "No checksum failures for ZPool $TESTPOOL" fi - # Getting checksum verify failures - verify_failures=$(get_zpool_status_chksum_verify_failures $TESTPOOL "raidz") - log_note "Making sure we have Direct I/O writes logged" if [[ $total_dio_wr -lt 1 ]]; then log_fail "No Direct I/O writes $total_dio_wr" fi - log_note "Making sure we have Direct I/O write checksum verifies with ZPool" - check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 1 - - # In the event of checksum verify error, the write will be redirected - # through the ARC. We check here that we have ARC writes. - log_note "Making sure we have ARC writes have taken place in the event \ - a Direct I/O checksum verify failures occurred" - if [[ $total_arc_wr -lt $verify_failures ]]; then - log_fail "ARC writes $total_arc_wr < $verify_failures" - fi + log_note "Making sure we have no Direct I/O write checksum verifies \ + with ZPool" + check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 log_must rm -f "$mntpnt/direct-write.iso" done @@ -168,19 +152,22 @@ done log_must zpool status -v $TESTPOOL log_must zpool sync $TESTPOOL + + # Finally we will verfiy that with checking every Direct I/O write we have no # errors at all. -VERIFY_PCT=100 -log_must set_tunable32 VDEV_DIRECT_WR_VERIFY_PCT $VERIFY_PCT +# Create the file before trying to manipulate the contents +log_must file_write -o create -f "$mntpnt/direct-write.iso" -b $BS \ + -c $NUMBLOCKS -w +log_must set_tunable32 VDEV_DIRECT_WR_VERIFY 1 for i in $(seq 1 $ITERATIONS); do log_note "Verifying every Direct I/O write checksums iteration $i of \ - $ITERATIONS with zfs_vdev_direct_write_verify_pct=$VERIFY_PCT" + $ITERATIONS with zfs_vdev_direct_write_verify=1" prev_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) - prev_arc_wr=$(get_iostats_stat $TESTPOOL arc_write_count) log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" \ - -n $NUMBLOCKS -b $BS + -n $NUMBLOCKS -b $BS -e # Reading file back to verify there no are checksum errors filesize=$(get_file_size "$mntpnt/direct-write.iso") @@ -190,16 +177,11 @@ for i in $(seq 1 $ITERATIONS); do # Getting new Direct I/O and ARC Write counts. curr_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) - curr_arc_wr=$(get_iostats_stat $TESTPOOL arc_write_count) total_dio_wr=$((curr_dio_wr - prev_dio_wr)) - total_arc_wr=$((curr_arc_wr - prev_arc_wr)) log_note "Making sure there are no checksum errors with the ZPool" log_must check_pool_status $TESTPOOL "errors" "No known data errors" - # Geting checksum verify failures - verify_failures=$(get_zpool_status_chksum_verify_failures $TESTPOOL "raidz") - log_note "Making sure we have Direct I/O writes logged" if [[ $total_dio_wr -lt 1 ]]; then log_fail "No Direct I/O writes $total_dio_wr" @@ -207,16 +189,8 @@ for i in $(seq 1 $ITERATIONS); do log_note "Making sure we have Direct I/O write checksum verifies with ZPool" check_dio_write_chksum_verify_failures "$TESTPOOL" "raidz" 1 - - # In the event of checksum verify error, the write will be redirected - # through the ARC. We check here that we have ARC writes. - log_note "Making sure we have ARC writes have taken place in the event \ - a Direct I/O checksum verify failures occurred" - if [[ $total_arc_wr -lt $verify_failures ]]; then - log_fail "ARC writes $total_arc_wr < $verify_failures" - fi - - log_must rm -f "$mntpnt/direct-write.iso" done +log_must rm -f "$mntpnt/direct-write.iso" + log_pass "Verified checksum verify works for Direct I/O writes."