Reintroduce zfs_remove() synchronous deletes
Reintroduce a slightly adapted version of the Illumos logic for synchronous unlinks. The basic idea here is that only files smaller than zfs_delete_blocks (20480) blocks should be deleted synchronously. Unlinking larger files should be handled asynchronously to minimize impact to the caller. To accomplish this iput() which is responsible for calling zfs_znode_delete() on Linux is only called in the delete_now path. Otherwise zfs_async_iput() is used which allows the last reference to be dropped by a taskq thread effectively making the removal asynchronous. Porting notes: - Add zfs_delete_blocks module option for performance analysis. The default value is DMU_MAX_DELETEBLKCNT which is the same as upstream. Reducing this value means that smaller files will be unlinked asynchronously like large files. - All occurrences of zfsvfs changes to zsb. Ported-by: KernelOfTruth kerneloftruth@gmail.com Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
parent
460a021391
commit
a966c5640e
|
@ -715,6 +715,21 @@ Note: \fBzfs_delay_scale\fR * \fBzfs_dirty_data_max\fR must be < 2^64.
|
||||||
Default value: \fB500,000\fR.
|
Default value: \fB500,000\fR.
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
|
.sp
|
||||||
|
.ne 2
|
||||||
|
.na
|
||||||
|
\fBzfs_delete_blocks\fR (ulong)
|
||||||
|
.ad
|
||||||
|
.RS 12n
|
||||||
|
This is the used to define a large file for the purposes of delete. Files
|
||||||
|
containing more than \fBzfs_delete_blocks\fR will be deleted asynchronously
|
||||||
|
while smaller files are deleted synchronously. Decreasing this value will
|
||||||
|
reduce the time spent in an unlink(2) system call at the expense of a longer
|
||||||
|
delay before the freed space is available.
|
||||||
|
.sp
|
||||||
|
Default value: \fB20,480\fR.
|
||||||
|
.RE
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
.ne 2
|
.ne 2
|
||||||
.na
|
.na
|
||||||
|
|
|
@ -418,6 +418,7 @@ mappedread(struct inode *ip, int nbytes, uio_t *uio)
|
||||||
#endif /* _KERNEL */
|
#endif /* _KERNEL */
|
||||||
|
|
||||||
unsigned long zfs_read_chunk_size = 1024 * 1024; /* Tunable */
|
unsigned long zfs_read_chunk_size = 1024 * 1024; /* Tunable */
|
||||||
|
unsigned long zfs_delete_blocks = DMU_MAX_DELETEBLKCNT;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Read bytes from specified file into supplied buffer.
|
* Read bytes from specified file into supplied buffer.
|
||||||
|
@ -1520,13 +1521,13 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr)
|
||||||
struct inode *ip;
|
struct inode *ip;
|
||||||
zfs_sb_t *zsb = ITOZSB(dip);
|
zfs_sb_t *zsb = ITOZSB(dip);
|
||||||
zilog_t *zilog;
|
zilog_t *zilog;
|
||||||
uint64_t xattr_obj;
|
uint64_t acl_obj, xattr_obj;
|
||||||
uint64_t xattr_obj_unlinked = 0;
|
uint64_t xattr_obj_unlinked = 0;
|
||||||
uint64_t obj = 0;
|
uint64_t obj = 0;
|
||||||
zfs_dirlock_t *dl;
|
zfs_dirlock_t *dl;
|
||||||
dmu_tx_t *tx;
|
dmu_tx_t *tx;
|
||||||
boolean_t may_delete_now;
|
boolean_t may_delete_now, delete_now = FALSE;
|
||||||
boolean_t unlinked;
|
boolean_t unlinked, toobig = FALSE;
|
||||||
uint64_t txtype;
|
uint64_t txtype;
|
||||||
pathname_t *realnmp = NULL;
|
pathname_t *realnmp = NULL;
|
||||||
#ifdef HAVE_PN_UTILS
|
#ifdef HAVE_PN_UTILS
|
||||||
|
@ -1590,9 +1591,10 @@ top:
|
||||||
mutex_exit(&zp->z_lock);
|
mutex_exit(&zp->z_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We never delete the znode and always place it in the unlinked
|
* We may delete the znode now, or we may put it in the unlinked set;
|
||||||
* set. The dentry cache will always hold the last reference and
|
* it depends on whether we're the last link, and on whether there are
|
||||||
* is responsible for safely freeing the znode.
|
* other holds on the inode. So we dmu_tx_hold() the right things to
|
||||||
|
* allow for either case.
|
||||||
*/
|
*/
|
||||||
obj = zp->z_id;
|
obj = zp->z_id;
|
||||||
tx = dmu_tx_create(zsb->z_os);
|
tx = dmu_tx_create(zsb->z_os);
|
||||||
|
@ -1600,6 +1602,12 @@ top:
|
||||||
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
|
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
|
||||||
zfs_sa_upgrade_txholds(tx, zp);
|
zfs_sa_upgrade_txholds(tx, zp);
|
||||||
zfs_sa_upgrade_txholds(tx, dzp);
|
zfs_sa_upgrade_txholds(tx, dzp);
|
||||||
|
if (may_delete_now) {
|
||||||
|
toobig = zp->z_size > zp->z_blksz * zfs_delete_blocks;
|
||||||
|
/* if the file is too big, only hold_free a token amount */
|
||||||
|
dmu_tx_hold_free(tx, zp->z_id, 0,
|
||||||
|
(toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END));
|
||||||
|
}
|
||||||
|
|
||||||
/* are there any extended attributes? */
|
/* are there any extended attributes? */
|
||||||
error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
|
error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
|
||||||
|
@ -1611,6 +1619,11 @@ top:
|
||||||
dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
|
dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mutex_enter(&zp->z_lock);
|
||||||
|
if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now)
|
||||||
|
dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
|
||||||
|
mutex_exit(&zp->z_lock);
|
||||||
|
|
||||||
/* charge as an update -- would be nice not to charge at all */
|
/* charge as an update -- would be nice not to charge at all */
|
||||||
dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
|
dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
|
||||||
|
|
||||||
|
@ -1662,6 +1675,42 @@ top:
|
||||||
mutex_enter(&zp->z_lock);
|
mutex_enter(&zp->z_lock);
|
||||||
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
|
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
|
||||||
&xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
|
&xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
|
||||||
|
delete_now = may_delete_now && !toobig &&
|
||||||
|
atomic_read(&ip->i_count) == 1 && !(zp->z_is_mapped) &&
|
||||||
|
xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) ==
|
||||||
|
acl_obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (delete_now) {
|
||||||
|
if (xattr_obj_unlinked) {
|
||||||
|
ASSERT3U(xzp->z_links, ==, 2);
|
||||||
|
mutex_enter(&xzp->z_lock);
|
||||||
|
xzp->z_unlinked = 1;
|
||||||
|
xzp->z_links = 0;
|
||||||
|
error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zsb),
|
||||||
|
&xzp->z_links, sizeof (xzp->z_links), tx);
|
||||||
|
ASSERT3U(error, ==, 0);
|
||||||
|
mutex_exit(&xzp->z_lock);
|
||||||
|
zfs_unlinked_add(xzp, tx);
|
||||||
|
|
||||||
|
if (zp->z_is_sa)
|
||||||
|
error = sa_remove(zp->z_sa_hdl,
|
||||||
|
SA_ZPL_XATTR(zsb), tx);
|
||||||
|
else
|
||||||
|
error = sa_update(zp->z_sa_hdl,
|
||||||
|
SA_ZPL_XATTR(zsb), &null_xattr,
|
||||||
|
sizeof (uint64_t), tx);
|
||||||
|
ASSERT0(error);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Add to the unlinked set because a new reference could be
|
||||||
|
* taken concurrently resulting in a deferred destruction.
|
||||||
|
*/
|
||||||
|
zfs_unlinked_add(zp, tx);
|
||||||
|
mutex_exit(&zp->z_lock);
|
||||||
|
zfs_inode_update(zp);
|
||||||
|
iput(ip);
|
||||||
|
} else if (unlinked) {
|
||||||
mutex_exit(&zp->z_lock);
|
mutex_exit(&zp->z_lock);
|
||||||
zfs_unlinked_add(zp, tx);
|
zfs_unlinked_add(zp, tx);
|
||||||
}
|
}
|
||||||
|
@ -1682,13 +1731,16 @@ out:
|
||||||
|
|
||||||
zfs_dirent_unlock(dl);
|
zfs_dirent_unlock(dl);
|
||||||
zfs_inode_update(dzp);
|
zfs_inode_update(dzp);
|
||||||
zfs_inode_update(zp);
|
|
||||||
if (xzp)
|
|
||||||
zfs_inode_update(xzp);
|
|
||||||
|
|
||||||
iput(ip);
|
if (!delete_now) {
|
||||||
if (xzp)
|
zfs_inode_update(zp);
|
||||||
iput(ZTOI(xzp));
|
zfs_iput_async(ip);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (xzp) {
|
||||||
|
zfs_inode_update(xzp);
|
||||||
|
zfs_iput_async(ZTOI(xzp));
|
||||||
|
}
|
||||||
|
|
||||||
if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
|
if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
|
||||||
zil_commit(zilog, 0);
|
zil_commit(zilog, 0);
|
||||||
|
@ -4710,6 +4762,8 @@ zfs_retzcbuf(struct inode *ip, xuio_t *xuio, cred_t *cr)
|
||||||
#endif /* HAVE_UIO_ZEROCOPY */
|
#endif /* HAVE_UIO_ZEROCOPY */
|
||||||
|
|
||||||
#if defined(_KERNEL) && defined(HAVE_SPL)
|
#if defined(_KERNEL) && defined(HAVE_SPL)
|
||||||
|
module_param(zfs_delete_blocks, ulong, 0644);
|
||||||
|
MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
|
||||||
module_param(zfs_read_chunk_size, long, 0644);
|
module_param(zfs_read_chunk_size, long, 0644);
|
||||||
MODULE_PARM_DESC(zfs_read_chunk_size, "Bytes to read per chunk");
|
MODULE_PARM_DESC(zfs_read_chunk_size, "Bytes to read per chunk");
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue