Enable lazytime semantic for atime
Linux 4.0 introduces lazytime. The idea is that when we update the atime, we delay writing it to disk for as long as it is reasonably possible. When lazytime is enabled, dirty_inode will be called with only I_DIRTY_TIME flag whenever i_atime is updated. So under such condition, we will set z_atime_dirty. We will only write it to disk if file is closed, inode is evicted or setattr is called. Ideally, we should also write it whenever SA is going to be updated, but it is left for future improvement. There's one thing that we should take care of now that we allow i_atime to be dirty. In original implementation, whenever SA is modified, zfs_inode_update will be called to overwrite every thing in inode. This will cause dirty i_atime to be discarded. We fix this by don't overwrite i_atime in zfs_inode_update. We only overwrite i_atime when allocating new inode or doing zfs_rezget with zfs_inode_update_new. Signed-off-by: Chunwei Chen <david.chen@osnexus.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Issue #4482
This commit is contained in:
parent
5137c95dec
commit
1ad7f89628
|
@ -2934,8 +2934,9 @@ top:
|
|||
}
|
||||
|
||||
|
||||
if (mask & ATTR_ATIME) {
|
||||
ZFS_TIME_ENCODE(&vap->va_atime, atime);
|
||||
if ((mask & ATTR_ATIME) || zp->z_atime_dirty) {
|
||||
zp->z_atime_dirty = 0;
|
||||
ZFS_TIME_ENCODE(&ip->i_atime, atime);
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL,
|
||||
&atime, sizeof (atime));
|
||||
}
|
||||
|
@ -4048,7 +4049,7 @@ zfs_dirty_inode(struct inode *ip, int flags)
|
|||
dmu_tx_t *tx;
|
||||
uint64_t mode, atime[2], mtime[2], ctime[2];
|
||||
sa_bulk_attr_t bulk[4];
|
||||
int error;
|
||||
int error = 0;
|
||||
int cnt = 0;
|
||||
|
||||
if (zfs_is_readonly(zsb) || dmu_objset_is_snapshot(zsb->z_os))
|
||||
|
@ -4057,6 +4058,20 @@ zfs_dirty_inode(struct inode *ip, int flags)
|
|||
ZFS_ENTER(zsb);
|
||||
ZFS_VERIFY_ZP(zp);
|
||||
|
||||
#ifdef I_DIRTY_TIME
|
||||
/*
|
||||
* This is the lazytime semantic indroduced in Linux 4.0
|
||||
* This flag will only be called from update_time when lazytime is set.
|
||||
* (Note, I_DIRTY_SYNC will also set if not lazytime)
|
||||
* Fortunately mtime and ctime are managed within ZFS itself, so we
|
||||
* only need to dirty atime.
|
||||
*/
|
||||
if (flags == I_DIRTY_TIME) {
|
||||
zp->z_atime_dirty = 1;
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
|
||||
tx = dmu_tx_create(zsb->z_os);
|
||||
|
||||
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
|
||||
|
@ -4069,6 +4084,8 @@ zfs_dirty_inode(struct inode *ip, int flags)
|
|||
}
|
||||
|
||||
mutex_enter(&zp->z_lock);
|
||||
zp->z_atime_dirty = 0;
|
||||
|
||||
SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(zsb), NULL, &mode, 8);
|
||||
SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(zsb), NULL, &atime, 16);
|
||||
SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zsb), NULL, &mtime, 16);
|
||||
|
|
|
@ -482,6 +482,90 @@ zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
zfs_set_inode_flags(znode_t *zp, struct inode *ip)
|
||||
{
|
||||
/*
|
||||
* Linux and Solaris have different sets of file attributes, so we
|
||||
* restrict this conversion to the intersection of the two.
|
||||
*/
|
||||
|
||||
if (zp->z_pflags & ZFS_IMMUTABLE)
|
||||
ip->i_flags |= S_IMMUTABLE;
|
||||
else
|
||||
ip->i_flags &= ~S_IMMUTABLE;
|
||||
|
||||
if (zp->z_pflags & ZFS_APPENDONLY)
|
||||
ip->i_flags |= S_APPEND;
|
||||
else
|
||||
ip->i_flags &= ~S_APPEND;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the embedded inode given the znode. We should work toward
|
||||
* eliminating this function as soon as possible by removing values
|
||||
* which are duplicated between the znode and inode. If the generic
|
||||
* inode has the correct field it should be used, and the ZFS code
|
||||
* updated to access the inode. This can be done incrementally.
|
||||
*/
|
||||
static void
|
||||
zfs_inode_update_impl(znode_t *zp, boolean_t new)
|
||||
{
|
||||
zfs_sb_t *zsb;
|
||||
struct inode *ip;
|
||||
uint32_t blksize;
|
||||
u_longlong_t i_blocks;
|
||||
uint64_t atime[2], mtime[2], ctime[2];
|
||||
|
||||
ASSERT(zp != NULL);
|
||||
zsb = ZTOZSB(zp);
|
||||
ip = ZTOI(zp);
|
||||
|
||||
/* Skip .zfs control nodes which do not exist on disk. */
|
||||
if (zfsctl_is_node(ip))
|
||||
return;
|
||||
|
||||
sa_lookup(zp->z_sa_hdl, SA_ZPL_ATIME(zsb), &atime, 16);
|
||||
sa_lookup(zp->z_sa_hdl, SA_ZPL_MTIME(zsb), &mtime, 16);
|
||||
sa_lookup(zp->z_sa_hdl, SA_ZPL_CTIME(zsb), &ctime, 16);
|
||||
|
||||
dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize, &i_blocks);
|
||||
|
||||
spin_lock(&ip->i_lock);
|
||||
ip->i_generation = zp->z_gen;
|
||||
ip->i_uid = SUID_TO_KUID(zp->z_uid);
|
||||
ip->i_gid = SGID_TO_KGID(zp->z_gid);
|
||||
set_nlink(ip, zp->z_links);
|
||||
ip->i_mode = zp->z_mode;
|
||||
zfs_set_inode_flags(zp, ip);
|
||||
ip->i_blkbits = SPA_MINBLOCKSHIFT;
|
||||
ip->i_blocks = i_blocks;
|
||||
|
||||
/*
|
||||
* Only read atime from SA if we are newly created inode (or rezget),
|
||||
* otherwise i_atime might be dirty.
|
||||
*/
|
||||
if (new)
|
||||
ZFS_TIME_DECODE(&ip->i_atime, atime);
|
||||
ZFS_TIME_DECODE(&ip->i_mtime, mtime);
|
||||
ZFS_TIME_DECODE(&ip->i_ctime, ctime);
|
||||
|
||||
i_size_write(ip, zp->z_size);
|
||||
spin_unlock(&ip->i_lock);
|
||||
}
|
||||
|
||||
static void
|
||||
zfs_inode_update_new(znode_t *zp)
|
||||
{
|
||||
zfs_inode_update_impl(zp, B_TRUE);
|
||||
}
|
||||
|
||||
void
|
||||
zfs_inode_update(znode_t *zp)
|
||||
{
|
||||
zfs_inode_update_impl(zp, B_FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Construct a znode+inode and initialize.
|
||||
*
|
||||
|
@ -549,7 +633,7 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
|
|||
zp->z_mode = mode;
|
||||
|
||||
ip->i_ino = obj;
|
||||
zfs_inode_update(zp);
|
||||
zfs_inode_update_new(zp);
|
||||
zfs_inode_set_ops(zsb, ip);
|
||||
|
||||
/*
|
||||
|
@ -576,73 +660,6 @@ error:
|
|||
return (NULL);
|
||||
}
|
||||
|
||||
void
|
||||
zfs_set_inode_flags(znode_t *zp, struct inode *ip)
|
||||
{
|
||||
/*
|
||||
* Linux and Solaris have different sets of file attributes, so we
|
||||
* restrict this conversion to the intersection of the two.
|
||||
*/
|
||||
|
||||
if (zp->z_pflags & ZFS_IMMUTABLE)
|
||||
ip->i_flags |= S_IMMUTABLE;
|
||||
else
|
||||
ip->i_flags &= ~S_IMMUTABLE;
|
||||
|
||||
if (zp->z_pflags & ZFS_APPENDONLY)
|
||||
ip->i_flags |= S_APPEND;
|
||||
else
|
||||
ip->i_flags &= ~S_APPEND;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the embedded inode given the znode. We should work toward
|
||||
* eliminating this function as soon as possible by removing values
|
||||
* which are duplicated between the znode and inode. If the generic
|
||||
* inode has the correct field it should be used, and the ZFS code
|
||||
* updated to access the inode. This can be done incrementally.
|
||||
*/
|
||||
void
|
||||
zfs_inode_update(znode_t *zp)
|
||||
{
|
||||
zfs_sb_t *zsb;
|
||||
struct inode *ip;
|
||||
uint32_t blksize;
|
||||
u_longlong_t i_blocks;
|
||||
uint64_t atime[2], mtime[2], ctime[2];
|
||||
|
||||
ASSERT(zp != NULL);
|
||||
zsb = ZTOZSB(zp);
|
||||
ip = ZTOI(zp);
|
||||
|
||||
/* Skip .zfs control nodes which do not exist on disk. */
|
||||
if (zfsctl_is_node(ip))
|
||||
return;
|
||||
|
||||
sa_lookup(zp->z_sa_hdl, SA_ZPL_ATIME(zsb), &atime, 16);
|
||||
sa_lookup(zp->z_sa_hdl, SA_ZPL_MTIME(zsb), &mtime, 16);
|
||||
sa_lookup(zp->z_sa_hdl, SA_ZPL_CTIME(zsb), &ctime, 16);
|
||||
|
||||
dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize, &i_blocks);
|
||||
|
||||
spin_lock(&ip->i_lock);
|
||||
ip->i_generation = zp->z_gen;
|
||||
ip->i_uid = SUID_TO_KUID(zp->z_uid);
|
||||
ip->i_gid = SGID_TO_KGID(zp->z_gid);
|
||||
set_nlink(ip, zp->z_links);
|
||||
ip->i_mode = zp->z_mode;
|
||||
zfs_set_inode_flags(zp, ip);
|
||||
ip->i_blkbits = SPA_MINBLOCKSHIFT;
|
||||
ip->i_blocks = i_blocks;
|
||||
|
||||
ZFS_TIME_DECODE(&ip->i_atime, atime);
|
||||
ZFS_TIME_DECODE(&ip->i_mtime, mtime);
|
||||
ZFS_TIME_DECODE(&ip->i_ctime, ctime);
|
||||
|
||||
i_size_write(ip, zp->z_size);
|
||||
spin_unlock(&ip->i_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Safely mark an inode dirty. Inodes which are part of a read-only
|
||||
* file system or snapshot may not be dirtied.
|
||||
|
@ -1206,7 +1223,8 @@ zfs_rezget(znode_t *zp)
|
|||
|
||||
zp->z_unlinked = (zp->z_links == 0);
|
||||
zp->z_blksz = doi.doi_data_block_size;
|
||||
zfs_inode_update(zp);
|
||||
zp->z_atime_dirty = 0;
|
||||
zfs_inode_update_new(zp);
|
||||
|
||||
zfs_znode_hold_exit(zsb, zh);
|
||||
|
||||
|
|
|
@ -339,6 +339,9 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
|
|||
vap->va_mtime = ia->ia_mtime;
|
||||
vap->va_ctime = ia->ia_ctime;
|
||||
|
||||
if (vap->va_mask & ATTR_ATIME)
|
||||
ip->i_atime = ia->ia_atime;
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_setattr(ip, vap, 0, cr);
|
||||
if (!error && (ia->ia_valid & ATTR_MODE))
|
||||
|
|
Loading…
Reference in New Issue