From a7982d5d30dabd2e206011f54226b25d6c70c4d6 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 5 Apr 2023 21:12:17 +0000 Subject: [PATCH] FreeBSD: fix up EXDEV handling for clone_range API contract requires VOPs to handle EXDEV internally, worst case by falling back to the generic copy routine. This broke with the recent changes. While here whack custom loop to lock 2 vnodes with vn_lock_pair, which provides the same functionality internally. write start/finish around it plays no role so got eliminated. One difference is that vn_lock_pair always takes an exclusive lock on both vnodes. I did not patch around it because current code takes an exclusive lock on the target vnode. zfs supports shared-locking for writes, so this serializes different calls to the routine as is, despite range locking inside. At the same time you may notice the source vnode can get some traffic if only shared-locked, thus once more this goes the safer route of exclusive-locking. Note this should be patched to use shared-locking for both once the feature is considered stable. Technically the switch to vn_lock_pair should be a separate change, but it would only introduce churn immediately whacked by the rest of the patch. Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Signed-off-by: Mateusz Guzik Sponsored by: Rubicon Communications, LLC ("Netgate") Closes #14723 --- module/os/freebsd/zfs/zfs_vnops_os.c | 63 +++++++++++++++------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c index 8abd7239ad..0ec4d40ce7 100644 --- a/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/module/os/freebsd/zfs/zfs_vnops_os.c @@ -6249,56 +6249,59 @@ zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap) * need something else than vn_generic_copy_file_range(). */ - /* Lock both vnodes, avoiding risk of deadlock. */ - do { - mp = NULL; - error = vn_start_write(outvp, &mp, V_WAIT); - if (error == 0) { - error = vn_lock(outvp, LK_EXCLUSIVE); - if (error == 0) { - if (invp == outvp) - break; - error = vn_lock(invp, LK_SHARED | LK_NOWAIT); - if (error == 0) - break; - VOP_UNLOCK(outvp); - if (mp != NULL) - vn_finished_write(mp); - mp = NULL; - error = vn_lock(invp, LK_SHARED); - if (error == 0) - VOP_UNLOCK(invp); - } + vn_start_write(outvp, &mp, V_WAIT); + if (invp == outvp) { + if (vn_lock(outvp, LK_EXCLUSIVE) != 0) { + goto bad_write_fallback; } - if (mp != NULL) - vn_finished_write(mp); - } while (error == 0); - if (error != 0) - return (error); + } else { +#if __FreeBSD_version >= 1400086 + vn_lock_pair(invp, false, LK_EXCLUSIVE, outvp, false, + LK_EXCLUSIVE); +#else + vn_lock_pair(invp, false, outvp, false); +#endif + if (VN_IS_DOOMED(invp) || VN_IS_DOOMED(outvp)) { + goto bad_locked_fallback; + } + } + #ifdef MAC error = mac_vnode_check_write(curthread->td_ucred, ap->a_outcred, outvp); if (error != 0) - goto unlock; + goto out_locked; #endif io.uio_offset = *ap->a_outoffp; io.uio_resid = *ap->a_lenp; error = vn_rlimit_fsize(outvp, &io, ap->a_fsizetd); if (error != 0) - goto unlock; + goto out_locked; error = zfs_clone_range(VTOZ(invp), ap->a_inoffp, VTOZ(outvp), - ap->a_outoffp, &len, ap->a_fsizetd->td_ucred); + ap->a_outoffp, &len, ap->a_outcred); + if (error == EXDEV) + goto bad_locked_fallback; *ap->a_lenp = (size_t)len; - -unlock: +out_locked: if (invp != outvp) VOP_UNLOCK(invp); VOP_UNLOCK(outvp); if (mp != NULL) vn_finished_write(mp); + return (error); +bad_locked_fallback: + if (invp != outvp) + VOP_UNLOCK(invp); + VOP_UNLOCK(outvp); +bad_write_fallback: + if (mp != NULL) + vn_finished_write(mp); + error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, + ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, + ap->a_incred, ap->a_outcred, ap->a_fsizetd); return (error); }