Illumos #4347 ZPL can use dmu_tx_assign(TXG_WAIT)
Fix a lock contention issue by allowing threads not holding ZPL locks to block when waiting to assign a transaction. Porting Notes: zfs_putpage() still uses TXG_NOWAIT, unlike the upstream version. This case may be a contention point just like zfs_write(), however it is not safe to block here since it may be called during memory reclaim. Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Dan McDonald <danmcd@nexenta.com> Reviewed by: Boris Protopopov <boris.protopopov@nexenta.com> Approved by: Dan McDonald <danmcd@nexenta.com> References: https://www.illumos.org/issues/4347 illumos/illumos-gate@e722410c49 Ported-by: Ned Bass <bass6@llnl.gov> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
parent
729210564a
commit
384f8a09f8
|
@ -973,7 +973,6 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, struct inode **xipp, cred_t *cr)
|
||||||
return (SET_ERROR(EDQUOT));
|
return (SET_ERROR(EDQUOT));
|
||||||
}
|
}
|
||||||
|
|
||||||
top:
|
|
||||||
tx = dmu_tx_create(zsb->z_os);
|
tx = dmu_tx_create(zsb->z_os);
|
||||||
dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
|
dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
|
||||||
ZFS_SA_BASE_ATTR_SIZE);
|
ZFS_SA_BASE_ATTR_SIZE);
|
||||||
|
@ -982,13 +981,8 @@ top:
|
||||||
fuid_dirtied = zsb->z_fuid_dirty;
|
fuid_dirtied = zsb->z_fuid_dirty;
|
||||||
if (fuid_dirtied)
|
if (fuid_dirtied)
|
||||||
zfs_fuid_txhold(zsb, tx);
|
zfs_fuid_txhold(zsb, tx);
|
||||||
error = dmu_tx_assign(tx, TXG_NOWAIT);
|
error = dmu_tx_assign(tx, TXG_WAIT);
|
||||||
if (error) {
|
if (error) {
|
||||||
if (error == ERESTART) {
|
|
||||||
dmu_tx_wait(tx);
|
|
||||||
dmu_tx_abort(tx);
|
|
||||||
goto top;
|
|
||||||
}
|
|
||||||
zfs_acl_ids_free(&acl_ids);
|
zfs_acl_ids_free(&acl_ids);
|
||||||
dmu_tx_abort(tx);
|
dmu_tx_abort(tx);
|
||||||
return (error);
|
return (error);
|
||||||
|
|
|
@ -106,11 +106,18 @@
|
||||||
* (3) All range locks must be grabbed before calling dmu_tx_assign(),
|
* (3) All range locks must be grabbed before calling dmu_tx_assign(),
|
||||||
* as they can span dmu_tx_assign() calls.
|
* as they can span dmu_tx_assign() calls.
|
||||||
*
|
*
|
||||||
* (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign().
|
* (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
|
||||||
* This is critical because we don't want to block while holding locks.
|
* dmu_tx_assign(). This is critical because we don't want to block
|
||||||
* Note, in particular, that if a lock is sometimes acquired before
|
* while holding locks.
|
||||||
* the tx assigns, and sometimes after (e.g. z_lock), then failing to
|
*
|
||||||
* use a non-blocking assign can deadlock the system. The scenario:
|
* If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This
|
||||||
|
* reduces lock contention and CPU usage when we must wait (note that if
|
||||||
|
* throughput is constrained by the storage, nearly every transaction
|
||||||
|
* must wait).
|
||||||
|
*
|
||||||
|
* Note, in particular, that if a lock is sometimes acquired before
|
||||||
|
* the tx assigns, and sometimes after (e.g. z_lock), then failing
|
||||||
|
* to use a non-blocking assign can deadlock the system. The scenario:
|
||||||
*
|
*
|
||||||
* Thread A has grabbed a lock before calling dmu_tx_assign().
|
* Thread A has grabbed a lock before calling dmu_tx_assign().
|
||||||
* Thread B is in an already-assigned tx, and blocks for this lock.
|
* Thread B is in an already-assigned tx, and blocks for this lock.
|
||||||
|
@ -712,7 +719,6 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
|
||||||
while (n > 0) {
|
while (n > 0) {
|
||||||
abuf = NULL;
|
abuf = NULL;
|
||||||
woff = uio->uio_loffset;
|
woff = uio->uio_loffset;
|
||||||
again:
|
|
||||||
if (zfs_owner_overquota(zsb, zp, B_FALSE) ||
|
if (zfs_owner_overquota(zsb, zp, B_FALSE) ||
|
||||||
zfs_owner_overquota(zsb, zp, B_TRUE)) {
|
zfs_owner_overquota(zsb, zp, B_TRUE)) {
|
||||||
if (abuf != NULL)
|
if (abuf != NULL)
|
||||||
|
@ -762,13 +768,8 @@ again:
|
||||||
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
|
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
|
||||||
dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
|
dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
|
||||||
zfs_sa_upgrade_txholds(tx, zp);
|
zfs_sa_upgrade_txholds(tx, zp);
|
||||||
error = dmu_tx_assign(tx, TXG_NOWAIT);
|
error = dmu_tx_assign(tx, TXG_WAIT);
|
||||||
if (error) {
|
if (error) {
|
||||||
if (error == ERESTART) {
|
|
||||||
dmu_tx_wait(tx);
|
|
||||||
dmu_tx_abort(tx);
|
|
||||||
goto again;
|
|
||||||
}
|
|
||||||
dmu_tx_abort(tx);
|
dmu_tx_abort(tx);
|
||||||
if (abuf != NULL)
|
if (abuf != NULL)
|
||||||
dmu_return_arcbuf(abuf);
|
dmu_return_arcbuf(abuf);
|
||||||
|
@ -2833,12 +2834,9 @@ top:
|
||||||
|
|
||||||
zfs_sa_upgrade_txholds(tx, zp);
|
zfs_sa_upgrade_txholds(tx, zp);
|
||||||
|
|
||||||
err = dmu_tx_assign(tx, TXG_NOWAIT);
|
err = dmu_tx_assign(tx, TXG_WAIT);
|
||||||
if (err) {
|
if (err)
|
||||||
if (err == ERESTART)
|
|
||||||
dmu_tx_wait(tx);
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
|
||||||
|
|
||||||
count = 0;
|
count = 0;
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -1205,7 +1205,6 @@ zfs_extend(znode_t *zp, uint64_t end)
|
||||||
zfs_range_unlock(rl);
|
zfs_range_unlock(rl);
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
top:
|
|
||||||
tx = dmu_tx_create(zsb->z_os);
|
tx = dmu_tx_create(zsb->z_os);
|
||||||
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
|
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
|
||||||
zfs_sa_upgrade_txholds(tx, zp);
|
zfs_sa_upgrade_txholds(tx, zp);
|
||||||
|
@ -1225,13 +1224,8 @@ top:
|
||||||
newblksz = 0;
|
newblksz = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
error = dmu_tx_assign(tx, TXG_NOWAIT);
|
error = dmu_tx_assign(tx, TXG_WAIT);
|
||||||
if (error) {
|
if (error) {
|
||||||
if (error == ERESTART) {
|
|
||||||
dmu_tx_wait(tx);
|
|
||||||
dmu_tx_abort(tx);
|
|
||||||
goto top;
|
|
||||||
}
|
|
||||||
dmu_tx_abort(tx);
|
dmu_tx_abort(tx);
|
||||||
zfs_range_unlock(rl);
|
zfs_range_unlock(rl);
|
||||||
return (error);
|
return (error);
|
||||||
|
@ -1419,13 +1413,8 @@ log:
|
||||||
tx = dmu_tx_create(zsb->z_os);
|
tx = dmu_tx_create(zsb->z_os);
|
||||||
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
|
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
|
||||||
zfs_sa_upgrade_txholds(tx, zp);
|
zfs_sa_upgrade_txholds(tx, zp);
|
||||||
error = dmu_tx_assign(tx, TXG_NOWAIT);
|
error = dmu_tx_assign(tx, TXG_WAIT);
|
||||||
if (error) {
|
if (error) {
|
||||||
if (error == ERESTART) {
|
|
||||||
dmu_tx_wait(tx);
|
|
||||||
dmu_tx_abort(tx);
|
|
||||||
goto log;
|
|
||||||
}
|
|
||||||
dmu_tx_abort(tx);
|
dmu_tx_abort(tx);
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue