SEEK_HOLE should not block on txg_wait_synced()
Force flushing of txg's can be painfully slow when competing for disk IO, since this is a process meant to execute asynchronously. Optimize this path via allowing data/hole seeking if the file is clean, but if dirty fall back to old logic. This is a compromise to disabling the feature entirely. Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov> Reviewed-by: George Melikov <mail@gmelikov.ru> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Debabrata Banerjee <dbanerje@akamai.com> Closes #4306 Closes #5962
This commit is contained in:
parent
a44e7faa6c
commit
66aca24730
|
@ -1427,6 +1427,20 @@ Enable NOP writes
|
||||||
Use \fB1\fR for yes (default) and \fB0\fR to disable.
|
Use \fB1\fR for yes (default) and \fB0\fR to disable.
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
|
.sp
|
||||||
|
.ne 2
|
||||||
|
.na
|
||||||
|
\fBzfs_dmu_offset_next_sync\fR (int)
|
||||||
|
.ad
|
||||||
|
.RS 12n
|
||||||
|
Enable forcing txg sync to find holes. When enabled forces ZFS to act
|
||||||
|
like prior versions when SEEK_HOLE or SEEK_DATA flags are used, which
|
||||||
|
when a dnode is dirty causes txg's to be synced so that this data can be
|
||||||
|
found.
|
||||||
|
.sp
|
||||||
|
Use \fB1\fR for yes and \fB0\fR to disable (default).
|
||||||
|
.RE
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
.ne 2
|
.ne 2
|
||||||
.na
|
.na
|
||||||
|
|
|
@ -67,6 +67,11 @@ int zfs_nopwrite_enabled = 1;
|
||||||
*/
|
*/
|
||||||
unsigned long zfs_per_txg_dirty_frees_percent = 30;
|
unsigned long zfs_per_txg_dirty_frees_percent = 30;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Enable/disable forcing txg sync when dirty in dmu_offset_next.
|
||||||
|
*/
|
||||||
|
int zfs_dmu_offset_next_sync = 0;
|
||||||
|
|
||||||
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
|
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
|
||||||
{ DMU_BSWAP_UINT8, TRUE, "unallocated" },
|
{ DMU_BSWAP_UINT8, TRUE, "unallocated" },
|
||||||
{ DMU_BSWAP_ZAP, TRUE, "object directory" },
|
{ DMU_BSWAP_ZAP, TRUE, "object directory" },
|
||||||
|
@ -1989,24 +1994,43 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
|
||||||
zp->zp_nopwrite = nopwrite;
|
zp->zp_nopwrite = nopwrite;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This function is only called from zfs_holey_common() for zpl_llseek()
|
||||||
|
* in order to determine the location of holes. In order to accurately
|
||||||
|
* report holes all dirty data must be synced to disk. This causes extremely
|
||||||
|
* poor performance when seeking for holes in a dirty file. As a compromise,
|
||||||
|
* only provide hole data when the dnode is clean. When a dnode is dirty
|
||||||
|
* report the dnode as having no holes which is always a safe thing to do.
|
||||||
|
*/
|
||||||
int
|
int
|
||||||
dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
|
dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
|
||||||
{
|
{
|
||||||
dnode_t *dn;
|
dnode_t *dn;
|
||||||
int i, err;
|
int i, err;
|
||||||
|
boolean_t clean = B_TRUE;
|
||||||
|
|
||||||
err = dnode_hold(os, object, FTAG, &dn);
|
err = dnode_hold(os, object, FTAG, &dn);
|
||||||
if (err)
|
if (err)
|
||||||
return (err);
|
return (err);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Sync any current changes before
|
* Check if dnode is dirty
|
||||||
* we go trundling through the block pointers.
|
|
||||||
*/
|
*/
|
||||||
|
if (dn->dn_dirtyctx != DN_UNDIRTIED) {
|
||||||
for (i = 0; i < TXG_SIZE; i++) {
|
for (i = 0; i < TXG_SIZE; i++) {
|
||||||
if (list_link_active(&dn->dn_dirty_link[i]))
|
if (!list_is_empty(&dn->dn_dirty_records[i])) {
|
||||||
|
clean = B_FALSE;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (i != TXG_SIZE) {
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If compatibility option is on, sync any current changes before
|
||||||
|
* we go trundling through the block pointers.
|
||||||
|
*/
|
||||||
|
if (!clean && zfs_dmu_offset_next_sync) {
|
||||||
|
clean = B_TRUE;
|
||||||
dnode_rele(dn, FTAG);
|
dnode_rele(dn, FTAG);
|
||||||
txg_wait_synced(dmu_objset_pool(os), 0);
|
txg_wait_synced(dmu_objset_pool(os), 0);
|
||||||
err = dnode_hold(os, object, FTAG, &dn);
|
err = dnode_hold(os, object, FTAG, &dn);
|
||||||
|
@ -2014,7 +2038,12 @@ dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
|
||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
||||||
err = dnode_next_offset(dn, (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0);
|
if (clean)
|
||||||
|
err = dnode_next_offset(dn,
|
||||||
|
(hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0);
|
||||||
|
else
|
||||||
|
err = SET_ERROR(EBUSY);
|
||||||
|
|
||||||
dnode_rele(dn, FTAG);
|
dnode_rele(dn, FTAG);
|
||||||
|
|
||||||
return (err);
|
return (err);
|
||||||
|
@ -2238,5 +2267,11 @@ MODULE_PARM_DESC(zfs_nopwrite_enabled, "Enable NOP writes");
|
||||||
module_param(zfs_per_txg_dirty_frees_percent, ulong, 0644);
|
module_param(zfs_per_txg_dirty_frees_percent, ulong, 0644);
|
||||||
MODULE_PARM_DESC(zfs_per_txg_dirty_frees_percent,
|
MODULE_PARM_DESC(zfs_per_txg_dirty_frees_percent,
|
||||||
"percentage of dirtied blocks from frees in one TXG");
|
"percentage of dirtied blocks from frees in one TXG");
|
||||||
|
|
||||||
|
module_param(zfs_dmu_offset_next_sync, int, 0644);
|
||||||
|
MODULE_PARM_DESC(zfs_dmu_offset_next_sync,
|
||||||
|
"Enable forcing txg sync to find holes");
|
||||||
|
|
||||||
/* END CSTYLED */
|
/* END CSTYLED */
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -278,6 +278,10 @@ zfs_holey_common(struct inode *ip, int cmd, loff_t *off)
|
||||||
if (error == ESRCH)
|
if (error == ESRCH)
|
||||||
return (SET_ERROR(ENXIO));
|
return (SET_ERROR(ENXIO));
|
||||||
|
|
||||||
|
/* file was dirty, so fall back to using file_sz logic */
|
||||||
|
if (error == EBUSY)
|
||||||
|
error = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We could find a hole that begins after the logical end-of-file,
|
* We could find a hole that begins after the logical end-of-file,
|
||||||
* because dmu_offset_next() only works on whole blocks. If the
|
* because dmu_offset_next() only works on whole blocks. If the
|
||||||
|
|
Loading…
Reference in New Issue