Fix several bugs in the FreeBSD rename VOP implementation

- To avoid a use-after-free, zfsvfs->z_log needs to be loaded after the
  teardown lock is acquired with ZFS_ENTER().
- Avoid leaking vnode locks in zfs_rename_relock() and zfs_rename_()
  when the ZFS_ENTER() macros forces an early return.

Refactor the rename implementation so that ZFS_ENTER() can be used
safely.  As a bonus, this lets us use the ZFS_VERIFY_ZP() macro instead
of open-coding its implementation.

Reported-by: Peter Holm <pho@FreeBSD.org>
Tested-by: Peter Holm <pho@FreeBSD.org>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Reviewed-by: Tony Nguyen <tony.nguyen@delphix.com>
Signed-off-by: Mark Johnston <markj@FreeBSD.org>
Sponsored-by: The FreeBSD Foundation
Closes #12717
This commit is contained in:
Mark Johnston 2021-11-19 17:26:39 -05:00 committed by Tony Hutter
parent b96737b83e
commit 19337332cc
1 changed files with 135 additions and 137 deletions

View File

@ -2935,48 +2935,18 @@ out2:
} }
/* /*
* We acquire all but fdvp locks using non-blocking acquisitions. If we * Look up the directory entries corresponding to the source and target
* fail to acquire any lock in the path we will drop all held locks, * directory/name pairs.
* acquire the new lock in a blocking fashion, and then release it and
* restart the rename. This acquire/release step ensures that we do not
* spin on a lock waiting for release. On error release all vnode locks
* and decrement references the way tmpfs_rename() would do.
*/ */
static int static int
zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, zfs_rename_relock_lookup(znode_t *sdzp, const struct componentname *scnp,
struct vnode *tdvp, struct vnode **tvpp, znode_t **szpp, znode_t *tdzp, const struct componentname *tcnp,
const struct componentname *scnp, const struct componentname *tcnp) znode_t **tzpp)
{ {
zfsvfs_t *zfsvfs; zfsvfs_t *zfsvfs;
struct vnode *nvp, *svp, *tvp; znode_t *szp, *tzp;
znode_t *sdzp, *tdzp, *szp, *tzp;
const char *snm = scnp->cn_nameptr;
const char *tnm = tcnp->cn_nameptr;
int error; int error;
VOP_UNLOCK1(tdvp);
if (*tvpp != NULL && *tvpp != tdvp)
VOP_UNLOCK1(*tvpp);
relock:
error = vn_lock(sdvp, LK_EXCLUSIVE);
if (error)
goto out;
sdzp = VTOZ(sdvp);
error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT);
if (error != 0) {
VOP_UNLOCK1(sdvp);
if (error != EBUSY)
goto out;
error = vn_lock(tdvp, LK_EXCLUSIVE);
if (error)
goto out;
VOP_UNLOCK1(tdvp);
goto relock;
}
tdzp = VTOZ(tdvp);
/* /*
* Before using sdzp and tdzp we must ensure that they are live. * Before using sdzp and tdzp we must ensure that they are live.
* As a porting legacy from illumos we have two things to worry * As a porting legacy from illumos we have two things to worry
@ -2991,59 +2961,86 @@ relock:
zfsvfs = sdzp->z_zfsvfs; zfsvfs = sdzp->z_zfsvfs;
ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs); ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs);
ZFS_ENTER(zfsvfs); ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(sdzp);
/* ZFS_VERIFY_ZP(tdzp);
* We can not use ZFS_VERIFY_ZP() here because it could directly return
* bypassing the cleanup code in the case of an error.
*/
if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) {
ZFS_EXIT(zfsvfs);
VOP_UNLOCK1(sdvp);
VOP_UNLOCK1(tdvp);
error = SET_ERROR(EIO);
goto out;
}
/* /*
* Re-resolve svp to be certain it still exists and fetch the * Re-resolve svp to be certain it still exists and fetch the
* correct vnode. * correct vnode.
*/ */
error = zfs_dirent_lookup(sdzp, snm, &szp, ZEXISTS); error = zfs_dirent_lookup(sdzp, scnp->cn_nameptr, &szp, ZEXISTS);
if (error != 0) { if (error != 0) {
/* Source entry invalid or not there. */ /* Source entry invalid or not there. */
ZFS_EXIT(zfsvfs);
VOP_UNLOCK1(sdvp);
VOP_UNLOCK1(tdvp);
if ((scnp->cn_flags & ISDOTDOT) != 0 || if ((scnp->cn_flags & ISDOTDOT) != 0 ||
(scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.')) (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.'))
error = SET_ERROR(EINVAL); error = SET_ERROR(EINVAL);
goto out; goto out;
} }
svp = ZTOV(szp); *szpp = szp;
/* /*
* Re-resolve tvp, if it disappeared we just carry on. * Re-resolve tvp, if it disappeared we just carry on.
*/ */
error = zfs_dirent_lookup(tdzp, tnm, &tzp, 0); error = zfs_dirent_lookup(tdzp, tcnp->cn_nameptr, &tzp, 0);
if (error != 0) { if (error != 0) {
ZFS_EXIT(zfsvfs); vrele(ZTOV(szp));
VOP_UNLOCK1(sdvp);
VOP_UNLOCK1(tdvp);
vrele(svp);
if ((tcnp->cn_flags & ISDOTDOT) != 0) if ((tcnp->cn_flags & ISDOTDOT) != 0)
error = SET_ERROR(EINVAL); error = SET_ERROR(EINVAL);
goto out; goto out;
} }
if (tzp != NULL) *tzpp = tzp;
tvp = ZTOV(tzp); out:
else ZFS_EXIT(zfsvfs);
tvp = NULL; return (error);
}
/* /*
* At present the vnode locks must be acquired before z_teardown_lock, * We acquire all but fdvp locks using non-blocking acquisitions. If we
* although it would be more logical to use the opposite order. * fail to acquire any lock in the path we will drop all held locks,
* acquire the new lock in a blocking fashion, and then release it and
* restart the rename. This acquire/release step ensures that we do not
* spin on a lock waiting for release. On error release all vnode locks
* and decrement references the way tmpfs_rename() would do.
*/ */
ZFS_EXIT(zfsvfs); static int
zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp,
struct vnode *tdvp, struct vnode **tvpp,
const struct componentname *scnp, const struct componentname *tcnp)
{
struct vnode *nvp, *svp, *tvp;
znode_t *sdzp, *tdzp, *szp, *tzp;
int error;
VOP_UNLOCK1(tdvp);
if (*tvpp != NULL && *tvpp != tdvp)
VOP_UNLOCK1(*tvpp);
relock:
error = vn_lock(sdvp, LK_EXCLUSIVE);
if (error)
goto out;
error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT);
if (error != 0) {
VOP_UNLOCK1(sdvp);
if (error != EBUSY)
goto out;
error = vn_lock(tdvp, LK_EXCLUSIVE);
if (error)
goto out;
VOP_UNLOCK1(tdvp);
goto relock;
}
tdzp = VTOZ(tdvp);
sdzp = VTOZ(sdvp);
error = zfs_rename_relock_lookup(sdzp, scnp, &szp, tdzp, tcnp, &tzp);
if (error != 0) {
VOP_UNLOCK1(sdvp);
VOP_UNLOCK1(tdvp);
goto out;
}
svp = ZTOV(szp);
tvp = tzp != NULL ? ZTOV(tzp) : NULL;
/* /*
* Now try acquire locks on svp and tvp. * Now try acquire locks on svp and tvp.
@ -3180,17 +3177,22 @@ cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
} }
#endif #endif
static int
zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
cred_t *cr);
/* /*
* Move an entry from the provided source directory to the target * Move an entry from the provided source directory to the target
* directory. Change the entry name as indicated. * directory. Change the entry name as indicated.
* *
* IN: sdvp - Source directory containing the "old entry". * IN: sdvp - Source directory containing the "old entry".
* snm - Old entry name. * scnp - Old entry name.
* tdvp - Target directory to contain the "new entry". * tdvp - Target directory to contain the "new entry".
* tnm - New entry name. * tcnp - New entry name.
* cr - credentials of caller. * cr - credentials of caller.
* ct - caller context * INOUT: svpp - Source file
* flags - case flags * tvpp - Target file, may point to NULL initially
* *
* RETURN: 0 on success, error code on failure. * RETURN: 0 on success, error code on failure.
* *
@ -3199,18 +3201,15 @@ cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
*/ */
/*ARGSUSED*/ /*ARGSUSED*/
static int static int
zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, zfs_do_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
cred_t *cr, int log) cred_t *cr)
{ {
zfsvfs_t *zfsvfs; int error;
znode_t *sdzp, *tdzp, *szp, *tzp;
zilog_t *zilog = NULL; ASSERT_VOP_ELOCKED(tdvp, __func__);
dmu_tx_t *tx; if (*tvpp != NULL)
const char *snm = scnp->cn_nameptr; ASSERT_VOP_ELOCKED(*tvpp, __func__);
const char *tnm = tcnp->cn_nameptr;
int error = 0;
bool want_seqc_end __maybe_unused = false;
/* Reject renames across filesystems. */ /* Reject renames across filesystems. */
if ((*svpp)->v_mount != tdvp->v_mount || if ((*svpp)->v_mount != tdvp->v_mount ||
@ -3233,51 +3232,64 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
return (error); return (error);
} }
error = zfs_do_rename_impl(sdvp, svpp, scnp, tdvp, tvpp, tcnp, cr);
VOP_UNLOCK1(sdvp);
VOP_UNLOCK1(*svpp);
out:
if (*tvpp != NULL)
VOP_UNLOCK1(*tvpp);
if (tdvp != *tvpp)
VOP_UNLOCK1(tdvp);
return (error);
}
static int
zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
cred_t *cr)
{
dmu_tx_t *tx;
zfsvfs_t *zfsvfs;
zilog_t *zilog;
znode_t *tdzp, *sdzp, *tzp, *szp;
const char *snm = scnp->cn_nameptr;
const char *tnm = tcnp->cn_nameptr;
int error;
tdzp = VTOZ(tdvp); tdzp = VTOZ(tdvp);
sdzp = VTOZ(sdvp); sdzp = VTOZ(sdvp);
zfsvfs = tdzp->z_zfsvfs; zfsvfs = tdzp->z_zfsvfs;
zilog = zfsvfs->z_log;
/*
* After we re-enter ZFS_ENTER() we will have to revalidate all
* znodes involved.
*/
ZFS_ENTER(zfsvfs); ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(tdzp);
ZFS_VERIFY_ZP(sdzp);
zilog = zfsvfs->z_log;
if (zfsvfs->z_utf8 && u8_validate(tnm, if (zfsvfs->z_utf8 && u8_validate(tnm,
strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
error = SET_ERROR(EILSEQ); error = SET_ERROR(EILSEQ);
goto unlockout; goto out;
} }
/* If source and target are the same file, there is nothing to do. */ /* If source and target are the same file, there is nothing to do. */
if ((*svpp) == (*tvpp)) { if ((*svpp) == (*tvpp)) {
error = 0; error = 0;
goto unlockout; goto out;
} }
if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) || if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) ||
((*tvpp) != NULL && (*tvpp)->v_type == VDIR && ((*tvpp) != NULL && (*tvpp)->v_type == VDIR &&
(*tvpp)->v_mountedhere != NULL)) { (*tvpp)->v_mountedhere != NULL)) {
error = SET_ERROR(EXDEV); error = SET_ERROR(EXDEV);
goto unlockout; goto out;
}
/*
* We can not use ZFS_VERIFY_ZP() here because it could directly return
* bypassing the cleanup code in the case of an error.
*/
if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) {
error = SET_ERROR(EIO);
goto unlockout;
} }
szp = VTOZ(*svpp); szp = VTOZ(*svpp);
ZFS_VERIFY_ZP(szp);
tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp); tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp);
if (szp->z_sa_hdl == NULL || (tzp != NULL && tzp->z_sa_hdl == NULL)) { if (tzp != NULL)
error = SET_ERROR(EIO); ZFS_VERIFY_ZP(tzp);
goto unlockout;
}
/* /*
* This is to prevent the creation of links into attribute space * This is to prevent the creation of links into attribute space
@ -3286,7 +3298,7 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
*/ */
if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
error = SET_ERROR(EINVAL); error = SET_ERROR(EINVAL);
goto unlockout; goto out;
} }
/* /*
@ -3299,7 +3311,7 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
if (tdzp->z_pflags & ZFS_PROJINHERIT && if (tdzp->z_pflags & ZFS_PROJINHERIT &&
tdzp->z_projid != szp->z_projid) { tdzp->z_projid != szp->z_projid) {
error = SET_ERROR(EXDEV); error = SET_ERROR(EXDEV);
goto unlockout; goto out;
} }
/* /*
@ -3309,7 +3321,7 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
* done in a single check. * done in a single check.
*/ */
if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr))) if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
goto unlockout; goto out;
if ((*svpp)->v_type == VDIR) { if ((*svpp)->v_type == VDIR) {
/* /*
@ -3319,7 +3331,7 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
sdzp == szp || sdzp == szp ||
(scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
error = EINVAL; error = EINVAL;
goto unlockout; goto out;
} }
/* /*
@ -3327,7 +3339,7 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
* Can't do a move like this: /usr/a/b to /usr/a/b/c/d * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
*/ */
if ((error = zfs_rename_check(szp, sdzp, tdzp))) if ((error = zfs_rename_check(szp, sdzp, tdzp)))
goto unlockout; goto out;
} }
/* /*
@ -3340,7 +3352,7 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
if ((*svpp)->v_type == VDIR) { if ((*svpp)->v_type == VDIR) {
if ((*tvpp)->v_type != VDIR) { if ((*tvpp)->v_type != VDIR) {
error = SET_ERROR(ENOTDIR); error = SET_ERROR(ENOTDIR);
goto unlockout; goto out;
} else { } else {
cache_purge(tdvp); cache_purge(tdvp);
if (sdvp != tdvp) if (sdvp != tdvp)
@ -3349,7 +3361,7 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
} else { } else {
if ((*tvpp)->v_type == VDIR) { if ((*tvpp)->v_type == VDIR) {
error = SET_ERROR(EISDIR); error = SET_ERROR(EISDIR);
goto unlockout; goto out;
} }
} }
} }
@ -3360,9 +3372,7 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
vn_seqc_write_begin(*tvpp); vn_seqc_write_begin(*tvpp);
if (tdvp != *tvpp) if (tdvp != *tvpp)
vn_seqc_write_begin(tdvp); vn_seqc_write_begin(tdvp);
#if __FreeBSD_version >= 1300102
want_seqc_end = true;
#endif
vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct); vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct);
if (tzp) if (tzp)
vnevent_rename_dest(*tvpp, tdvp, tnm, ct); vnevent_rename_dest(*tvpp, tdvp, tnm, ct);
@ -3394,10 +3404,9 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
error = dmu_tx_assign(tx, TXG_WAIT); error = dmu_tx_assign(tx, TXG_WAIT);
if (error) { if (error) {
dmu_tx_abort(tx); dmu_tx_abort(tx);
goto unlockout; goto out_seq;
} }
if (tzp) /* Attempt to remove the existing target */ if (tzp) /* Attempt to remove the existing target */
error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL); error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL);
@ -3444,30 +3453,19 @@ zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
dmu_tx_commit(tx); dmu_tx_commit(tx);
unlockout: /* all 4 vnodes are locked, ZFS_ENTER called */ out_seq:
if (want_seqc_end) {
vn_seqc_write_end(*svpp); vn_seqc_write_end(*svpp);
vn_seqc_write_end(sdvp); vn_seqc_write_end(sdvp);
if (*tvpp != NULL) if (*tvpp != NULL)
vn_seqc_write_end(*tvpp); vn_seqc_write_end(*tvpp);
if (tdvp != *tvpp) if (tdvp != *tvpp)
vn_seqc_write_end(tdvp); vn_seqc_write_end(tdvp);
want_seqc_end = false;
}
VOP_UNLOCK1(*svpp);
VOP_UNLOCK1(sdvp);
out:
if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0); zil_commit(zilog, 0);
ZFS_EXIT(zfsvfs); ZFS_EXIT(zfsvfs);
out: /* original two vnodes are locked */
MPASS(!want_seqc_end);
if (*tvpp != NULL)
VOP_UNLOCK1(*tvpp);
if (tdvp != *tvpp)
VOP_UNLOCK1(tdvp);
return (error); return (error);
} }
@ -3499,7 +3497,7 @@ zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname,
goto fail; goto fail;
} }
error = zfs_rename_(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr, 0); error = zfs_do_rename(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr);
fail: fail:
if (svp != NULL) if (svp != NULL)
vrele(svp); vrele(svp);
@ -4979,8 +4977,8 @@ zfs_freebsd_rename(struct vop_rename_args *ap)
ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART));
ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART));
error = zfs_rename_(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp, error = zfs_do_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp,
ap->a_tcnp, ap->a_fcnp->cn_cred, 1); ap->a_tcnp, ap->a_fcnp->cn_cred);
vrele(fdvp); vrele(fdvp);
vrele(fvp); vrele(fvp);