ZIL: Cleanup sync and commit handling
ZVOL: - Mark all ZVOL ZIL transactions as sync. Since ZVOLs have only one object, it makes no sense to maintain async queue and on each commit merge it into sync. Single sync queue is just cheaper, while it changes nothing until actual commit request arrives. - Remove zsd_sync_cnt and the zil_async_to_sync() calls since we are no longer switching between sync and async queues. ZFS: - Mark write transactions as sync based only on number of sync opens (z_sync_cnt). We can not randomly jump between sync and async unless we want data corruptions due to writes reordering. - When file first opened with O_SYNC (z_sync_cnt incremented to 1) call zil_async_to_sync() for it to preserve correct ordering between past and future writes. - Drop zfs_fsyncer_key logic. Looks like it was an optimization for workloads heavily intermixing async writes with tons of fsyncs. But first it was broken 8 years ago due to Linux tsd implementation not allowing data storage between syscalls, and second, I doubt it is safe to switch from async to sync so often and without calling zil_async_to_sync(). - Rename sync argument of *_log_write() into commit, now only signalling caller's intent to call zil_commit() soon after. It allows WR_COPIED optimizations without extra other meanings. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: George Wilson <george.wilson@delphix.com> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Closes #15366
This commit is contained in:
parent
043c6ee3b6
commit
c3773de168
|
@ -284,7 +284,6 @@ typedef struct zfid_long {
|
||||||
#define SHORT_FID_LEN (sizeof (zfid_short_t) - sizeof (uint16_t))
|
#define SHORT_FID_LEN (sizeof (zfid_short_t) - sizeof (uint16_t))
|
||||||
#define LONG_FID_LEN (sizeof (zfid_long_t) - sizeof (uint16_t))
|
#define LONG_FID_LEN (sizeof (zfid_long_t) - sizeof (uint16_t))
|
||||||
|
|
||||||
extern uint_t zfs_fsyncer_key;
|
|
||||||
extern int zfs_super_owner;
|
extern int zfs_super_owner;
|
||||||
|
|
||||||
extern void zfs_init(void);
|
extern void zfs_init(void);
|
||||||
|
|
|
@ -575,7 +575,6 @@ typedef struct zfsdev_state {
|
||||||
extern void *zfsdev_get_state(minor_t minor, enum zfsdev_state_type which);
|
extern void *zfsdev_get_state(minor_t minor, enum zfsdev_state_type which);
|
||||||
extern int zfsdev_getminor(zfs_file_t *fp, minor_t *minorp);
|
extern int zfsdev_getminor(zfs_file_t *fp, minor_t *minorp);
|
||||||
|
|
||||||
extern uint_t zfs_fsyncer_key;
|
|
||||||
extern uint_t zfs_allow_log_key;
|
extern uint_t zfs_allow_log_key;
|
||||||
|
|
||||||
#endif /* _KERNEL */
|
#endif /* _KERNEL */
|
||||||
|
|
|
@ -307,7 +307,7 @@ extern void zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx,
|
||||||
uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp,
|
uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp,
|
||||||
const char *dname, znode_t *szp, znode_t *wzp);
|
const char *dname, znode_t *szp, znode_t *wzp);
|
||||||
extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||||
znode_t *zp, offset_t off, ssize_t len, int ioflag,
|
znode_t *zp, offset_t off, ssize_t len, boolean_t commit,
|
||||||
zil_callback_t callback, void *callback_data);
|
zil_callback_t callback, void *callback_data);
|
||||||
extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||||
znode_t *zp, uint64_t off, uint64_t len);
|
znode_t *zp, uint64_t off, uint64_t len);
|
||||||
|
|
|
@ -81,9 +81,9 @@ void zvol_remove_minors_impl(const char *name);
|
||||||
void zvol_last_close(zvol_state_t *zv);
|
void zvol_last_close(zvol_state_t *zv);
|
||||||
void zvol_insert(zvol_state_t *zv);
|
void zvol_insert(zvol_state_t *zv);
|
||||||
void zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off,
|
void zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off,
|
||||||
uint64_t len, boolean_t sync);
|
uint64_t len);
|
||||||
void zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
|
void zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
|
||||||
uint64_t size, int sync);
|
uint64_t size, boolean_t commit);
|
||||||
int zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
|
int zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
|
||||||
struct lwb *lwb, zio_t *zio);
|
struct lwb *lwb, zio_t *zio);
|
||||||
int zvol_init_impl(void);
|
int zvol_init_impl(void);
|
||||||
|
|
|
@ -244,9 +244,15 @@ zfs_open(vnode_t **vpp, int flag, cred_t *cr)
|
||||||
return (SET_ERROR(EPERM));
|
return (SET_ERROR(EPERM));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Keep a count of the synchronous opens in the znode */
|
/*
|
||||||
if (flag & O_SYNC)
|
* Keep a count of the synchronous opens in the znode. On first
|
||||||
atomic_inc_32(&zp->z_sync_cnt);
|
* synchronous open we must convert all previous async transactions
|
||||||
|
* into sync to keep correct ordering.
|
||||||
|
*/
|
||||||
|
if (flag & O_SYNC) {
|
||||||
|
if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1)
|
||||||
|
zil_async_to_sync(zfsvfs->z_log, zp->z_id);
|
||||||
|
}
|
||||||
|
|
||||||
zfs_exit(zfsvfs, FTAG);
|
zfs_exit(zfsvfs, FTAG);
|
||||||
return (0);
|
return (0);
|
||||||
|
@ -4201,6 +4207,10 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
|
||||||
}
|
}
|
||||||
zfs_vmobject_wunlock(object);
|
zfs_vmobject_wunlock(object);
|
||||||
|
|
||||||
|
boolean_t commit = (flags & (zfs_vm_pagerput_sync |
|
||||||
|
zfs_vm_pagerput_inval)) != 0 ||
|
||||||
|
zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS;
|
||||||
|
|
||||||
if (ncount == 0)
|
if (ncount == 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
@ -4253,7 +4263,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
|
||||||
* but that would make the locking messier
|
* but that would make the locking messier
|
||||||
*/
|
*/
|
||||||
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off,
|
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off,
|
||||||
len, 0, NULL, NULL);
|
len, commit, NULL, NULL);
|
||||||
|
|
||||||
zfs_vmobject_wlock(object);
|
zfs_vmobject_wlock(object);
|
||||||
for (i = 0; i < ncount; i++) {
|
for (i = 0; i < ncount; i++) {
|
||||||
|
@ -4268,8 +4278,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
|
||||||
|
|
||||||
out:
|
out:
|
||||||
zfs_rangelock_exit(lr);
|
zfs_rangelock_exit(lr);
|
||||||
if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 ||
|
if (commit)
|
||||||
zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
|
|
||||||
zil_commit(zfsvfs->z_log, zp->z_id);
|
zil_commit(zfsvfs->z_log, zp->z_id);
|
||||||
|
|
||||||
dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len);
|
dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len);
|
||||||
|
|
|
@ -123,7 +123,6 @@ struct zvol_state_os {
|
||||||
/* volmode=dev */
|
/* volmode=dev */
|
||||||
struct zvol_state_dev {
|
struct zvol_state_dev {
|
||||||
struct cdev *zsd_cdev;
|
struct cdev *zsd_cdev;
|
||||||
uint64_t zsd_sync_cnt;
|
|
||||||
struct selinfo zsd_selinfo;
|
struct selinfo zsd_selinfo;
|
||||||
} _zso_dev;
|
} _zso_dev;
|
||||||
|
|
||||||
|
@ -669,7 +668,7 @@ zvol_geom_bio_strategy(struct bio *bp)
|
||||||
int error = 0;
|
int error = 0;
|
||||||
boolean_t doread = B_FALSE;
|
boolean_t doread = B_FALSE;
|
||||||
boolean_t is_dumpified;
|
boolean_t is_dumpified;
|
||||||
boolean_t sync;
|
boolean_t commit;
|
||||||
|
|
||||||
if (bp->bio_to)
|
if (bp->bio_to)
|
||||||
zv = bp->bio_to->private;
|
zv = bp->bio_to->private;
|
||||||
|
@ -696,7 +695,7 @@ zvol_geom_bio_strategy(struct bio *bp)
|
||||||
}
|
}
|
||||||
zvol_ensure_zilog(zv);
|
zvol_ensure_zilog(zv);
|
||||||
if (bp->bio_cmd == BIO_FLUSH)
|
if (bp->bio_cmd == BIO_FLUSH)
|
||||||
goto sync;
|
goto commit;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
error = SET_ERROR(EOPNOTSUPP);
|
error = SET_ERROR(EOPNOTSUPP);
|
||||||
|
@ -718,7 +717,7 @@ zvol_geom_bio_strategy(struct bio *bp)
|
||||||
}
|
}
|
||||||
|
|
||||||
is_dumpified = B_FALSE;
|
is_dumpified = B_FALSE;
|
||||||
sync = !doread && !is_dumpified &&
|
commit = !doread && !is_dumpified &&
|
||||||
zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
|
zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -734,7 +733,7 @@ zvol_geom_bio_strategy(struct bio *bp)
|
||||||
if (error != 0) {
|
if (error != 0) {
|
||||||
dmu_tx_abort(tx);
|
dmu_tx_abort(tx);
|
||||||
} else {
|
} else {
|
||||||
zvol_log_truncate(zv, tx, off, resid, sync);
|
zvol_log_truncate(zv, tx, off, resid);
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
|
error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
|
||||||
off, resid);
|
off, resid);
|
||||||
|
@ -755,7 +754,7 @@ zvol_geom_bio_strategy(struct bio *bp)
|
||||||
dmu_tx_abort(tx);
|
dmu_tx_abort(tx);
|
||||||
} else {
|
} else {
|
||||||
dmu_write(os, ZVOL_OBJ, off, size, addr, tx);
|
dmu_write(os, ZVOL_OBJ, off, size, addr, tx);
|
||||||
zvol_log_write(zv, tx, off, size, sync);
|
zvol_log_write(zv, tx, off, size, commit);
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -793,8 +792,8 @@ unlock:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sync) {
|
if (commit) {
|
||||||
sync:
|
commit:
|
||||||
zil_commit(zv->zv_zilog, ZVOL_OBJ);
|
zil_commit(zv->zv_zilog, ZVOL_OBJ);
|
||||||
}
|
}
|
||||||
resume:
|
resume:
|
||||||
|
@ -866,7 +865,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag)
|
||||||
uint64_t volsize;
|
uint64_t volsize;
|
||||||
zfs_locked_range_t *lr;
|
zfs_locked_range_t *lr;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
boolean_t sync;
|
boolean_t commit;
|
||||||
zfs_uio_t uio;
|
zfs_uio_t uio;
|
||||||
|
|
||||||
zv = dev->si_drv2;
|
zv = dev->si_drv2;
|
||||||
|
@ -880,7 +879,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag)
|
||||||
return (SET_ERROR(EIO));
|
return (SET_ERROR(EIO));
|
||||||
|
|
||||||
ssize_t start_resid = zfs_uio_resid(&uio);
|
ssize_t start_resid = zfs_uio_resid(&uio);
|
||||||
sync = (ioflag & IO_SYNC) ||
|
commit = (ioflag & IO_SYNC) ||
|
||||||
(zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
|
(zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
|
||||||
|
|
||||||
rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
|
rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
|
||||||
|
@ -904,7 +903,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag)
|
||||||
}
|
}
|
||||||
error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx);
|
error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx);
|
||||||
if (error == 0)
|
if (error == 0)
|
||||||
zvol_log_write(zv, tx, off, bytes, sync);
|
zvol_log_write(zv, tx, off, bytes, commit);
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
|
|
||||||
if (error)
|
if (error)
|
||||||
|
@ -913,7 +912,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag)
|
||||||
zfs_rangelock_exit(lr);
|
zfs_rangelock_exit(lr);
|
||||||
int64_t nwritten = start_resid - zfs_uio_resid(&uio);
|
int64_t nwritten = start_resid - zfs_uio_resid(&uio);
|
||||||
dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten);
|
dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten);
|
||||||
if (sync)
|
if (commit)
|
||||||
zil_commit(zv->zv_zilog, ZVOL_OBJ);
|
zil_commit(zv->zv_zilog, ZVOL_OBJ);
|
||||||
rw_exit(&zv->zv_suspend_lock);
|
rw_exit(&zv->zv_suspend_lock);
|
||||||
return (error);
|
return (error);
|
||||||
|
@ -923,7 +922,6 @@ static int
|
||||||
zvol_cdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
|
zvol_cdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
|
||||||
{
|
{
|
||||||
zvol_state_t *zv;
|
zvol_state_t *zv;
|
||||||
struct zvol_state_dev *zsd;
|
|
||||||
int err = 0;
|
int err = 0;
|
||||||
boolean_t drop_suspend = B_FALSE;
|
boolean_t drop_suspend = B_FALSE;
|
||||||
|
|
||||||
|
@ -1017,13 +1015,6 @@ retry:
|
||||||
}
|
}
|
||||||
|
|
||||||
zv->zv_open_count++;
|
zv->zv_open_count++;
|
||||||
if (flags & O_SYNC) {
|
|
||||||
zsd = &zv->zv_zso->zso_dev;
|
|
||||||
zsd->zsd_sync_cnt++;
|
|
||||||
if (zsd->zsd_sync_cnt == 1 &&
|
|
||||||
(zv->zv_flags & ZVOL_WRITTEN_TO) != 0)
|
|
||||||
zil_async_to_sync(zv->zv_zilog, ZVOL_OBJ);
|
|
||||||
}
|
|
||||||
out_opened:
|
out_opened:
|
||||||
if (zv->zv_open_count == 0) {
|
if (zv->zv_open_count == 0) {
|
||||||
zvol_last_close(zv);
|
zvol_last_close(zv);
|
||||||
|
@ -1041,7 +1032,6 @@ static int
|
||||||
zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td)
|
zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td)
|
||||||
{
|
{
|
||||||
zvol_state_t *zv;
|
zvol_state_t *zv;
|
||||||
struct zvol_state_dev *zsd;
|
|
||||||
boolean_t drop_suspend = B_TRUE;
|
boolean_t drop_suspend = B_TRUE;
|
||||||
|
|
||||||
rw_enter(&zvol_state_lock, ZVOL_RW_READER);
|
rw_enter(&zvol_state_lock, ZVOL_RW_READER);
|
||||||
|
@ -1091,10 +1081,6 @@ zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td)
|
||||||
* You may get multiple opens, but only one close.
|
* You may get multiple opens, but only one close.
|
||||||
*/
|
*/
|
||||||
zv->zv_open_count--;
|
zv->zv_open_count--;
|
||||||
if (flags & O_SYNC) {
|
|
||||||
zsd = &zv->zv_zso->zso_dev;
|
|
||||||
zsd->zsd_sync_cnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (zv->zv_open_count == 0) {
|
if (zv->zv_open_count == 0) {
|
||||||
ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock));
|
ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock));
|
||||||
|
@ -1163,7 +1149,7 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data,
|
||||||
dmu_tx_abort(tx);
|
dmu_tx_abort(tx);
|
||||||
} else {
|
} else {
|
||||||
sync = (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
|
sync = (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
|
||||||
zvol_log_truncate(zv, tx, offset, length, sync);
|
zvol_log_truncate(zv, tx, offset, length);
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
|
error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
|
||||||
offset, length);
|
offset, length);
|
||||||
|
|
|
@ -192,9 +192,15 @@ zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
|
||||||
return (SET_ERROR(EPERM));
|
return (SET_ERROR(EPERM));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Keep a count of the synchronous opens in the znode */
|
/*
|
||||||
if (flag & O_SYNC)
|
* Keep a count of the synchronous opens in the znode. On first
|
||||||
atomic_inc_32(&zp->z_sync_cnt);
|
* synchronous open we must convert all previous async transactions
|
||||||
|
* into sync to keep correct ordering.
|
||||||
|
*/
|
||||||
|
if (flag & O_SYNC) {
|
||||||
|
if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1)
|
||||||
|
zil_async_to_sync(zfsvfs->z_log, zp->z_id);
|
||||||
|
}
|
||||||
|
|
||||||
zfs_exit(zfsvfs, FTAG);
|
zfs_exit(zfsvfs, FTAG);
|
||||||
return (0);
|
return (0);
|
||||||
|
@ -3826,21 +3832,14 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
|
||||||
|
|
||||||
err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
|
err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
|
||||||
|
|
||||||
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0,
|
boolean_t commit = B_FALSE;
|
||||||
for_sync ? zfs_putpage_sync_commit_cb :
|
|
||||||
zfs_putpage_async_commit_cb, pp);
|
|
||||||
|
|
||||||
dmu_tx_commit(tx);
|
|
||||||
|
|
||||||
zfs_rangelock_exit(lr);
|
|
||||||
|
|
||||||
if (wbc->sync_mode != WB_SYNC_NONE) {
|
if (wbc->sync_mode != WB_SYNC_NONE) {
|
||||||
/*
|
/*
|
||||||
* Note that this is rarely called under writepages(), because
|
* Note that this is rarely called under writepages(), because
|
||||||
* writepages() normally handles the entire commit for
|
* writepages() normally handles the entire commit for
|
||||||
* performance reasons.
|
* performance reasons.
|
||||||
*/
|
*/
|
||||||
zil_commit(zfsvfs->z_log, zp->z_id);
|
commit = B_TRUE;
|
||||||
} else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) {
|
} else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) {
|
||||||
/*
|
/*
|
||||||
* If the caller does not intend to wait synchronously
|
* If the caller does not intend to wait synchronously
|
||||||
|
@ -3850,9 +3849,20 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
|
||||||
* our writeback to complete. Refer to the comment in
|
* our writeback to complete. Refer to the comment in
|
||||||
* zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details.
|
* zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details.
|
||||||
*/
|
*/
|
||||||
zil_commit(zfsvfs->z_log, zp->z_id);
|
commit = B_TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, commit,
|
||||||
|
for_sync ? zfs_putpage_sync_commit_cb :
|
||||||
|
zfs_putpage_async_commit_cb, pp);
|
||||||
|
|
||||||
|
dmu_tx_commit(tx);
|
||||||
|
|
||||||
|
zfs_rangelock_exit(lr);
|
||||||
|
|
||||||
|
if (commit)
|
||||||
|
zil_commit(zfsvfs->z_log, zp->z_id);
|
||||||
|
|
||||||
dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen);
|
dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen);
|
||||||
|
|
||||||
zfs_exit(zfsvfs, FTAG);
|
zfs_exit(zfsvfs, FTAG);
|
||||||
|
|
|
@ -387,7 +387,7 @@ zvol_discard(zv_request_t *zvr)
|
||||||
if (error != 0) {
|
if (error != 0) {
|
||||||
dmu_tx_abort(tx);
|
dmu_tx_abort(tx);
|
||||||
} else {
|
} else {
|
||||||
zvol_log_truncate(zv, tx, start, size, B_TRUE);
|
zvol_log_truncate(zv, tx, start, size);
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
error = dmu_free_long_range(zv->zv_objset,
|
error = dmu_free_long_range(zv->zv_objset,
|
||||||
ZVOL_OBJ, start, size);
|
ZVOL_OBJ, start, size);
|
||||||
|
|
|
@ -238,7 +238,6 @@ uint64_t zfs_max_nvlist_src_size = 0;
|
||||||
*/
|
*/
|
||||||
static uint64_t zfs_history_output_max = 1024 * 1024;
|
static uint64_t zfs_history_output_max = 1024 * 1024;
|
||||||
|
|
||||||
uint_t zfs_fsyncer_key;
|
|
||||||
uint_t zfs_allow_log_key;
|
uint_t zfs_allow_log_key;
|
||||||
|
|
||||||
/* DATA_TYPE_ANY is used when zkey_type can vary. */
|
/* DATA_TYPE_ANY is used when zkey_type can vary. */
|
||||||
|
@ -7882,7 +7881,6 @@ zfs_kmod_init(void)
|
||||||
if ((error = zfsdev_attach()) != 0)
|
if ((error = zfsdev_attach()) != 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
tsd_create(&zfs_fsyncer_key, NULL);
|
|
||||||
tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
|
tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
|
||||||
tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
|
tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
|
||||||
|
|
||||||
|
@ -7919,7 +7917,6 @@ zfs_kmod_fini(void)
|
||||||
spa_fini();
|
spa_fini();
|
||||||
zvol_fini();
|
zvol_fini();
|
||||||
|
|
||||||
tsd_destroy(&zfs_fsyncer_key);
|
|
||||||
tsd_destroy(&rrw_tsd_key);
|
tsd_destroy(&rrw_tsd_key);
|
||||||
tsd_destroy(&zfs_allow_log_key);
|
tsd_destroy(&zfs_allow_log_key);
|
||||||
}
|
}
|
||||||
|
|
|
@ -606,13 +606,12 @@ static int64_t zfs_immediate_write_sz = 32768;
|
||||||
|
|
||||||
void
|
void
|
||||||
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||||
znode_t *zp, offset_t off, ssize_t resid, int ioflag,
|
znode_t *zp, offset_t off, ssize_t resid, boolean_t commit,
|
||||||
zil_callback_t callback, void *callback_data)
|
zil_callback_t callback, void *callback_data)
|
||||||
{
|
{
|
||||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl);
|
dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl);
|
||||||
uint32_t blocksize = zp->z_blksz;
|
uint32_t blocksize = zp->z_blksz;
|
||||||
itx_wr_state_t write_state;
|
itx_wr_state_t write_state;
|
||||||
uintptr_t fsync_cnt;
|
|
||||||
uint64_t gen = 0;
|
uint64_t gen = 0;
|
||||||
ssize_t size = resid;
|
ssize_t size = resid;
|
||||||
|
|
||||||
|
@ -628,15 +627,11 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||||
else if (!spa_has_slogs(zilog->zl_spa) &&
|
else if (!spa_has_slogs(zilog->zl_spa) &&
|
||||||
resid >= zfs_immediate_write_sz)
|
resid >= zfs_immediate_write_sz)
|
||||||
write_state = WR_INDIRECT;
|
write_state = WR_INDIRECT;
|
||||||
else if (ioflag & (O_SYNC | O_DSYNC))
|
else if (commit)
|
||||||
write_state = WR_COPIED;
|
write_state = WR_COPIED;
|
||||||
else
|
else
|
||||||
write_state = WR_NEED_COPY;
|
write_state = WR_NEED_COPY;
|
||||||
|
|
||||||
if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) {
|
|
||||||
(void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &gen,
|
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &gen,
|
||||||
sizeof (gen));
|
sizeof (gen));
|
||||||
|
|
||||||
|
@ -687,12 +682,9 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||||
BP_ZERO(&lr->lr_blkptr);
|
BP_ZERO(&lr->lr_blkptr);
|
||||||
|
|
||||||
itx->itx_private = ZTOZSB(zp);
|
itx->itx_private = ZTOZSB(zp);
|
||||||
|
itx->itx_sync = (zp->z_sync_cnt != 0);
|
||||||
itx->itx_gen = gen;
|
itx->itx_gen = gen;
|
||||||
|
|
||||||
if (!(ioflag & (O_SYNC | O_DSYNC)) && (zp->z_sync_cnt == 0) &&
|
|
||||||
(fsync_cnt == 0))
|
|
||||||
itx->itx_sync = B_FALSE;
|
|
||||||
|
|
||||||
itx->itx_callback = callback;
|
itx->itx_callback = callback;
|
||||||
itx->itx_callback_data = callback_data;
|
itx->itx_callback_data = callback_data;
|
||||||
zil_itx_assign(zilog, itx, tx);
|
zil_itx_assign(zilog, itx, tx);
|
||||||
|
|
|
@ -58,27 +58,20 @@
|
||||||
#include <sys/zfs_znode.h>
|
#include <sys/zfs_znode.h>
|
||||||
|
|
||||||
|
|
||||||
static ulong_t zfs_fsync_sync_cnt = 4;
|
|
||||||
|
|
||||||
int
|
int
|
||||||
zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
|
zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
|
||||||
{
|
{
|
||||||
int error = 0;
|
int error = 0;
|
||||||
zfsvfs_t *zfsvfs = ZTOZSB(zp);
|
zfsvfs_t *zfsvfs = ZTOZSB(zp);
|
||||||
|
|
||||||
(void) tsd_set(zfs_fsyncer_key, (void *)(uintptr_t)zfs_fsync_sync_cnt);
|
|
||||||
|
|
||||||
if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
|
if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
|
||||||
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
|
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
|
||||||
goto out;
|
return (error);
|
||||||
atomic_inc_32(&zp->z_sync_writes_cnt);
|
atomic_inc_32(&zp->z_sync_writes_cnt);
|
||||||
zil_commit(zfsvfs->z_log, zp->z_id);
|
zil_commit(zfsvfs->z_log, zp->z_id);
|
||||||
atomic_dec_32(&zp->z_sync_writes_cnt);
|
atomic_dec_32(&zp->z_sync_writes_cnt);
|
||||||
zfs_exit(zfsvfs, FTAG);
|
zfs_exit(zfsvfs, FTAG);
|
||||||
}
|
}
|
||||||
out:
|
|
||||||
tsd_set(zfs_fsyncer_key, NULL);
|
|
||||||
|
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -520,6 +513,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
||||||
|
|
||||||
uint64_t end_size = MAX(zp->z_size, woff + n);
|
uint64_t end_size = MAX(zp->z_size, woff + n);
|
||||||
zilog_t *zilog = zfsvfs->z_log;
|
zilog_t *zilog = zfsvfs->z_log;
|
||||||
|
boolean_t commit = (ioflag & (O_SYNC | O_DSYNC)) ||
|
||||||
|
(zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS);
|
||||||
|
|
||||||
const uint64_t uid = KUID_TO_SUID(ZTOUID(zp));
|
const uint64_t uid = KUID_TO_SUID(ZTOUID(zp));
|
||||||
const uint64_t gid = KGID_TO_SGID(ZTOGID(zp));
|
const uint64_t gid = KGID_TO_SGID(ZTOGID(zp));
|
||||||
|
@ -741,7 +736,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
||||||
* zfs_clear_setid_bits_if_necessary must precede any of
|
* zfs_clear_setid_bits_if_necessary must precede any of
|
||||||
* the TX_WRITE records logged here.
|
* the TX_WRITE records logged here.
|
||||||
*/
|
*/
|
||||||
zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag,
|
zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, commit,
|
||||||
NULL, NULL);
|
NULL, NULL);
|
||||||
|
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
|
@ -767,8 +762,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ioflag & (O_SYNC | O_DSYNC) ||
|
if (commit)
|
||||||
zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
|
|
||||||
zil_commit(zilog, zp->z_id);
|
zil_commit(zilog, zp->z_id);
|
||||||
|
|
||||||
const int64_t nwritten = start_resid - zfs_uio_resid(uio);
|
const int64_t nwritten = start_resid - zfs_uio_resid(uio);
|
||||||
|
|
|
@ -583,7 +583,7 @@ static const ssize_t zvol_immediate_write_sz = 32768;
|
||||||
|
|
||||||
void
|
void
|
||||||
zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
|
zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
|
||||||
uint64_t size, int sync)
|
uint64_t size, boolean_t commit)
|
||||||
{
|
{
|
||||||
uint32_t blocksize = zv->zv_volblocksize;
|
uint32_t blocksize = zv->zv_volblocksize;
|
||||||
zilog_t *zilog = zv->zv_zilog;
|
zilog_t *zilog = zv->zv_zilog;
|
||||||
|
@ -598,7 +598,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
|
||||||
else if (!spa_has_slogs(zilog->zl_spa) &&
|
else if (!spa_has_slogs(zilog->zl_spa) &&
|
||||||
size >= blocksize && blocksize > zvol_immediate_write_sz)
|
size >= blocksize && blocksize > zvol_immediate_write_sz)
|
||||||
write_state = WR_INDIRECT;
|
write_state = WR_INDIRECT;
|
||||||
else if (sync)
|
else if (commit)
|
||||||
write_state = WR_COPIED;
|
write_state = WR_COPIED;
|
||||||
else
|
else
|
||||||
write_state = WR_NEED_COPY;
|
write_state = WR_NEED_COPY;
|
||||||
|
@ -633,7 +633,6 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
|
||||||
BP_ZERO(&lr->lr_blkptr);
|
BP_ZERO(&lr->lr_blkptr);
|
||||||
|
|
||||||
itx->itx_private = zv;
|
itx->itx_private = zv;
|
||||||
itx->itx_sync = sync;
|
|
||||||
|
|
||||||
(void) zil_itx_assign(zilog, itx, tx);
|
(void) zil_itx_assign(zilog, itx, tx);
|
||||||
|
|
||||||
|
@ -650,8 +649,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
|
||||||
* Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE.
|
* Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len,
|
zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len)
|
||||||
boolean_t sync)
|
|
||||||
{
|
{
|
||||||
itx_t *itx;
|
itx_t *itx;
|
||||||
lr_truncate_t *lr;
|
lr_truncate_t *lr;
|
||||||
|
@ -666,7 +664,6 @@ zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len,
|
||||||
lr->lr_offset = off;
|
lr->lr_offset = off;
|
||||||
lr->lr_length = len;
|
lr->lr_length = len;
|
||||||
|
|
||||||
itx->itx_sync = sync;
|
|
||||||
zil_itx_assign(zilog, itx, tx);
|
zil_itx_assign(zilog, itx, tx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue