Additional limits on hole reporting

Holding the zp->z_rangelock as a RL_READER over the range
0-UINT64_MAX is sufficient to prevent the dnode from being
re-dirtied by concurrent writers.  To avoid potentially
looping multiple times for external caller which do not
take the rangelock holes are not reported after the first
sync.  While not optimal this is always functionally correct.

This change adds the missing rangelock calls on FreeBSD to
zvol_cdev_ioctl().

Reviewed-by: Brian Atkinson <batkinson@lanl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #14512
Closes #14641
This commit is contained in:
Brian Behlendorf 2023-03-28 08:19:03 -07:00 committed by Tony Hutter
parent 3da577280a
commit 164d184ed9
3 changed files with 22 additions and 15 deletions

View File

@ -1161,7 +1161,10 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data,
hole = (cmd == FIOSEEKHOLE); hole = (cmd == FIOSEEKHOLE);
noff = *off; noff = *off;
lr = zfs_rangelock_enter(&zv->zv_rangelock, 0, UINT64_MAX,
RL_READER);
error = dmu_offset_next(zv->zv_objset, ZVOL_OBJ, hole, &noff); error = dmu_offset_next(zv->zv_objset, ZVOL_OBJ, hole, &noff);
zfs_rangelock_exit(lr);
*off = noff; *off = noff;
break; break;
} }

View File

@ -2100,18 +2100,18 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
} }
/* /*
* This function is only called from zfs_holey_common() for zpl_llseek() * Reports the location of data and holes in an object. In order to
* in order to determine the location of holes. In order to accurately * accurately report holes all dirty data must be synced to disk. This
* report holes all dirty data must be synced to disk. This causes extremely * causes extremely poor performance when seeking for holes in a dirty file.
* poor performance when seeking for holes in a dirty file. As a compromise, * As a compromise, only provide hole data when the dnode is clean. When
* only provide hole data when the dnode is clean. When a dnode is dirty * a dnode is dirty report the dnode as having no holes by returning EBUSY
* report the dnode as having no holes which is always a safe thing to do. * which is always safe to do.
*/ */
int int
dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
{ {
dnode_t *dn; dnode_t *dn;
int err; int restarted = 0, err;
restart: restart:
err = dnode_hold(os, object, FTAG, &dn); err = dnode_hold(os, object, FTAG, &dn);
@ -2123,19 +2123,23 @@ restart:
if (dnode_is_dirty(dn)) { if (dnode_is_dirty(dn)) {
/* /*
* If the zfs_dmu_offset_next_sync module option is enabled * If the zfs_dmu_offset_next_sync module option is enabled
* then strict hole reporting has been requested. Dirty * then hole reporting has been requested. Dirty dnodes
* dnodes must be synced to disk to accurately report all * must be synced to disk to accurately report holes.
* holes. When disabled dirty dnodes are reported to not
* have any holes which is always safe.
* *
* When called by zfs_holey_common() the zp->z_rangelock * Provided a RL_READER rangelock spanning 0-UINT64_MAX is
* is held to prevent zfs_write() and mmap writeback from * held by the caller only a single restart will be required.
* re-dirtying the dnode after txg_wait_synced(). * We tolerate callers which do not hold the rangelock by
* returning EBUSY and not reporting holes after one restart.
*/ */
if (zfs_dmu_offset_next_sync) { if (zfs_dmu_offset_next_sync) {
rw_exit(&dn->dn_struct_rwlock); rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG); dnode_rele(dn, FTAG);
if (restarted)
return (SET_ERROR(EBUSY));
txg_wait_synced(dmu_objset_pool(os), 0); txg_wait_synced(dmu_objset_pool(os), 0);
restarted = 1;
goto restart; goto restart;
} }

View File

@ -105,7 +105,7 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
if (zn_has_cached_data(zp)) if (zn_has_cached_data(zp))
zn_flush_cached_data(zp, B_FALSE); zn_flush_cached_data(zp, B_FALSE);
lr = zfs_rangelock_enter(&zp->z_rangelock, 0, file_sz, RL_READER); lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER);
error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff); error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
zfs_rangelock_exit(lr); zfs_rangelock_exit(lr);