Remove zpl_revalidate: fix snapshot rollback
Open files, which aren't present in the snapshot, which is being roll-backed to, need to disappear from the visible VFS image of the dataset. Kernel provides d_drop function to drop invalid entry from the dcache, but inode can be referenced by dentry multiple dentries. The introduced zpl_d_drop_aliases function walks and invalidates all aliases of an inode. Reviewed-by: Ryan Moeller <ryan@iXsystems.com> Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Pavel Snajdr <snajpa@snajpa.net> Closes #9600 Closes #14070
This commit is contained in:
parent
4c59fde1f5
commit
52e658edd7
|
@ -0,0 +1,30 @@
|
||||||
|
dnl #
|
||||||
|
dnl # 3.18 API change
|
||||||
|
dnl # Dentry aliases are in d_u struct dentry member
|
||||||
|
dnl #
|
||||||
|
AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U], [
|
||||||
|
ZFS_LINUX_TEST_SRC([dentry_alias_d_u], [
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/dcache.h>
|
||||||
|
#include <linux/list.h>
|
||||||
|
], [
|
||||||
|
struct inode *inode __attribute__ ((unused)) = NULL;
|
||||||
|
struct dentry *dentry __attribute__ ((unused)) = NULL;
|
||||||
|
hlist_for_each_entry(dentry, &inode->i_dentry,
|
||||||
|
d_u.d_alias) {
|
||||||
|
d_drop(dentry);
|
||||||
|
}
|
||||||
|
])
|
||||||
|
])
|
||||||
|
|
||||||
|
AC_DEFUN([ZFS_AC_KERNEL_DENTRY_ALIAS_D_U], [
|
||||||
|
AC_MSG_CHECKING([whether dentry aliases are in d_u member])
|
||||||
|
ZFS_LINUX_TEST_RESULT([dentry_alias_d_u], [
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
AC_DEFINE(HAVE_DENTRY_D_U_ALIASES, 1,
|
||||||
|
[dentry aliases are in d_u member])
|
||||||
|
],[
|
||||||
|
AC_MSG_RESULT(no)
|
||||||
|
])
|
||||||
|
])
|
||||||
|
|
|
@ -93,6 +93,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
|
||||||
ZFS_AC_KERNEL_SRC_SETATTR_PREPARE
|
ZFS_AC_KERNEL_SRC_SETATTR_PREPARE
|
||||||
ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED
|
ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED
|
||||||
ZFS_AC_KERNEL_SRC_DENTRY
|
ZFS_AC_KERNEL_SRC_DENTRY
|
||||||
|
ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U
|
||||||
ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE
|
ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE
|
||||||
ZFS_AC_KERNEL_SRC_SECURITY_INODE
|
ZFS_AC_KERNEL_SRC_SECURITY_INODE
|
||||||
ZFS_AC_KERNEL_SRC_FST_MOUNT
|
ZFS_AC_KERNEL_SRC_FST_MOUNT
|
||||||
|
@ -209,6 +210,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
|
||||||
ZFS_AC_KERNEL_SETATTR_PREPARE
|
ZFS_AC_KERNEL_SETATTR_PREPARE
|
||||||
ZFS_AC_KERNEL_INSERT_INODE_LOCKED
|
ZFS_AC_KERNEL_INSERT_INODE_LOCKED
|
||||||
ZFS_AC_KERNEL_DENTRY
|
ZFS_AC_KERNEL_DENTRY
|
||||||
|
ZFS_AC_KERNEL_DENTRY_ALIAS_D_U
|
||||||
ZFS_AC_KERNEL_TRUNCATE_SETSIZE
|
ZFS_AC_KERNEL_TRUNCATE_SETSIZE
|
||||||
ZFS_AC_KERNEL_SECURITY_INODE
|
ZFS_AC_KERNEL_SECURITY_INODE
|
||||||
ZFS_AC_KERNEL_FST_MOUNT
|
ZFS_AC_KERNEL_FST_MOUNT
|
||||||
|
|
|
@ -61,4 +61,25 @@ d_clear_d_op(struct dentry *dentry)
|
||||||
DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE);
|
DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Walk and invalidate all dentry aliases of an inode
|
||||||
|
* unless it's a mountpoint
|
||||||
|
*/
|
||||||
|
static inline void
|
||||||
|
zpl_d_drop_aliases(struct inode *inode)
|
||||||
|
{
|
||||||
|
struct dentry *dentry;
|
||||||
|
spin_lock(&inode->i_lock);
|
||||||
|
#ifdef HAVE_DENTRY_D_U_ALIASES
|
||||||
|
hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
|
||||||
|
#else
|
||||||
|
hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
|
||||||
|
#endif
|
||||||
|
if (!IS_ROOT(dentry) && !d_mountpoint(dentry) &&
|
||||||
|
(dentry->d_inode == inode)) {
|
||||||
|
d_drop(dentry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
spin_unlock(&inode->i_lock);
|
||||||
|
}
|
||||||
#endif /* _ZFS_DCACHE_H */
|
#endif /* _ZFS_DCACHE_H */
|
||||||
|
|
|
@ -62,7 +62,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
|
||||||
__field(boolean_t, z_is_sa)
|
__field(boolean_t, z_is_sa)
|
||||||
__field(boolean_t, z_is_mapped)
|
__field(boolean_t, z_is_mapped)
|
||||||
__field(boolean_t, z_is_ctldir)
|
__field(boolean_t, z_is_ctldir)
|
||||||
__field(boolean_t, z_is_stale)
|
|
||||||
|
|
||||||
__field(uint32_t, i_uid)
|
__field(uint32_t, i_uid)
|
||||||
__field(uint32_t, i_gid)
|
__field(uint32_t, i_gid)
|
||||||
|
@ -95,7 +94,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
|
||||||
__entry->z_is_sa = zn->z_is_sa;
|
__entry->z_is_sa = zn->z_is_sa;
|
||||||
__entry->z_is_mapped = zn->z_is_mapped;
|
__entry->z_is_mapped = zn->z_is_mapped;
|
||||||
__entry->z_is_ctldir = zn->z_is_ctldir;
|
__entry->z_is_ctldir = zn->z_is_ctldir;
|
||||||
__entry->z_is_stale = zn->z_is_stale;
|
|
||||||
|
|
||||||
__entry->i_uid = KUID_TO_SUID(ZTOI(zn)->i_uid);
|
__entry->i_uid = KUID_TO_SUID(ZTOI(zn)->i_uid);
|
||||||
__entry->i_gid = KGID_TO_SGID(ZTOI(zn)->i_gid);
|
__entry->i_gid = KGID_TO_SGID(ZTOI(zn)->i_gid);
|
||||||
|
@ -117,7 +115,7 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
|
||||||
"zn_prefetch %u blksz %u seq %u "
|
"zn_prefetch %u blksz %u seq %u "
|
||||||
"mapcnt %llu size %llu pflags %llu "
|
"mapcnt %llu size %llu pflags %llu "
|
||||||
"sync_cnt %u mode 0x%x is_sa %d "
|
"sync_cnt %u mode 0x%x is_sa %d "
|
||||||
"is_mapped %d is_ctldir %d is_stale %d inode { "
|
"is_mapped %d is_ctldir %d inode { "
|
||||||
"uid %u gid %u ino %lu nlink %u size %lli "
|
"uid %u gid %u ino %lu nlink %u size %lli "
|
||||||
"blkbits %u bytes %u mode 0x%x generation %x } } "
|
"blkbits %u bytes %u mode 0x%x generation %x } } "
|
||||||
"ace { type %u flags %u access_mask %u } mask_matched %u",
|
"ace { type %u flags %u access_mask %u } mask_matched %u",
|
||||||
|
@ -126,7 +124,7 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
|
||||||
__entry->z_seq, __entry->z_mapcnt, __entry->z_size,
|
__entry->z_seq, __entry->z_mapcnt, __entry->z_size,
|
||||||
__entry->z_pflags, __entry->z_sync_cnt, __entry->z_mode,
|
__entry->z_pflags, __entry->z_sync_cnt, __entry->z_mode,
|
||||||
__entry->z_is_sa, __entry->z_is_mapped,
|
__entry->z_is_sa, __entry->z_is_mapped,
|
||||||
__entry->z_is_ctldir, __entry->z_is_stale, __entry->i_uid,
|
__entry->z_is_ctldir, __entry->i_uid,
|
||||||
__entry->i_gid, __entry->i_ino, __entry->i_nlink,
|
__entry->i_gid, __entry->i_ino, __entry->i_nlink,
|
||||||
__entry->i_size, __entry->i_blkbits,
|
__entry->i_size, __entry->i_blkbits,
|
||||||
__entry->i_bytes, __entry->i_mode, __entry->i_generation,
|
__entry->i_bytes, __entry->i_mode, __entry->i_generation,
|
||||||
|
|
|
@ -45,7 +45,8 @@ extern const struct inode_operations zpl_inode_operations;
|
||||||
extern const struct inode_operations zpl_dir_inode_operations;
|
extern const struct inode_operations zpl_dir_inode_operations;
|
||||||
extern const struct inode_operations zpl_symlink_inode_operations;
|
extern const struct inode_operations zpl_symlink_inode_operations;
|
||||||
extern const struct inode_operations zpl_special_inode_operations;
|
extern const struct inode_operations zpl_special_inode_operations;
|
||||||
extern dentry_operations_t zpl_dentry_operations;
|
|
||||||
|
/* zpl_file.c */
|
||||||
extern const struct address_space_operations zpl_address_space_operations;
|
extern const struct address_space_operations zpl_address_space_operations;
|
||||||
extern const struct file_operations zpl_file_operations;
|
extern const struct file_operations zpl_file_operations;
|
||||||
extern const struct file_operations zpl_dir_file_operations;
|
extern const struct file_operations zpl_dir_file_operations;
|
||||||
|
|
|
@ -190,7 +190,6 @@ typedef struct znode {
|
||||||
boolean_t z_is_sa; /* are we native sa? */
|
boolean_t z_is_sa; /* are we native sa? */
|
||||||
boolean_t z_is_mapped; /* are we mmap'ed */
|
boolean_t z_is_mapped; /* are we mmap'ed */
|
||||||
boolean_t z_is_ctldir; /* are we .zfs entry */
|
boolean_t z_is_ctldir; /* are we .zfs entry */
|
||||||
boolean_t z_is_stale; /* are we stale due to rollback? */
|
|
||||||
boolean_t z_suspended; /* extra ref from a suspend? */
|
boolean_t z_suspended; /* extra ref from a suspend? */
|
||||||
uint_t z_blksz; /* block size in bytes */
|
uint_t z_blksz; /* block size in bytes */
|
||||||
uint_t z_seq; /* modification sequence number */
|
uint_t z_seq; /* modification sequence number */
|
||||||
|
|
|
@ -470,7 +470,6 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
|
||||||
zp->z_is_sa = B_FALSE;
|
zp->z_is_sa = B_FALSE;
|
||||||
zp->z_is_mapped = B_FALSE;
|
zp->z_is_mapped = B_FALSE;
|
||||||
zp->z_is_ctldir = B_TRUE;
|
zp->z_is_ctldir = B_TRUE;
|
||||||
zp->z_is_stale = B_FALSE;
|
|
||||||
zp->z_sa_hdl = NULL;
|
zp->z_sa_hdl = NULL;
|
||||||
zp->z_blksz = 0;
|
zp->z_blksz = 0;
|
||||||
zp->z_seq = 0;
|
zp->z_seq = 0;
|
||||||
|
|
|
@ -1500,7 +1500,6 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
|
||||||
sb->s_op = &zpl_super_operations;
|
sb->s_op = &zpl_super_operations;
|
||||||
sb->s_xattr = zpl_xattr_handlers;
|
sb->s_xattr = zpl_xattr_handlers;
|
||||||
sb->s_export_op = &zpl_export_operations;
|
sb->s_export_op = &zpl_export_operations;
|
||||||
sb->s_d_op = &zpl_dentry_operations;
|
|
||||||
|
|
||||||
/* Set features for file system. */
|
/* Set features for file system. */
|
||||||
zfs_set_fuid_feature(zfsvfs);
|
zfs_set_fuid_feature(zfsvfs);
|
||||||
|
@ -1859,8 +1858,8 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
|
||||||
zp = list_next(&zfsvfs->z_all_znodes, zp)) {
|
zp = list_next(&zfsvfs->z_all_znodes, zp)) {
|
||||||
err2 = zfs_rezget(zp);
|
err2 = zfs_rezget(zp);
|
||||||
if (err2) {
|
if (err2) {
|
||||||
|
zpl_d_drop_aliases(ZTOI(zp));
|
||||||
remove_inode_hash(ZTOI(zp));
|
remove_inode_hash(ZTOI(zp));
|
||||||
zp->z_is_stale = B_TRUE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* see comment in zfs_suspend_fs() */
|
/* see comment in zfs_suspend_fs() */
|
||||||
|
|
|
@ -544,7 +544,6 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
|
||||||
zp->z_atime_dirty = B_FALSE;
|
zp->z_atime_dirty = B_FALSE;
|
||||||
zp->z_is_mapped = B_FALSE;
|
zp->z_is_mapped = B_FALSE;
|
||||||
zp->z_is_ctldir = B_FALSE;
|
zp->z_is_ctldir = B_FALSE;
|
||||||
zp->z_is_stale = B_FALSE;
|
|
||||||
zp->z_suspended = B_FALSE;
|
zp->z_suspended = B_FALSE;
|
||||||
zp->z_sa_hdl = NULL;
|
zp->z_sa_hdl = NULL;
|
||||||
zp->z_mapcnt = 0;
|
zp->z_mapcnt = 0;
|
||||||
|
|
|
@ -698,46 +698,6 @@ out:
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
#ifdef HAVE_D_REVALIDATE_NAMEIDATA
|
|
||||||
zpl_revalidate(struct dentry *dentry, struct nameidata *nd)
|
|
||||||
{
|
|
||||||
unsigned int flags = (nd ? nd->flags : 0);
|
|
||||||
#else
|
|
||||||
zpl_revalidate(struct dentry *dentry, unsigned int flags)
|
|
||||||
{
|
|
||||||
#endif /* HAVE_D_REVALIDATE_NAMEIDATA */
|
|
||||||
/* CSTYLED */
|
|
||||||
zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
|
|
||||||
int error;
|
|
||||||
|
|
||||||
if (flags & LOOKUP_RCU)
|
|
||||||
return (-ECHILD);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* After a rollback negative dentries created before the rollback
|
|
||||||
* time must be invalidated. Otherwise they can obscure files which
|
|
||||||
* are only present in the rolled back dataset.
|
|
||||||
*/
|
|
||||||
if (dentry->d_inode == NULL) {
|
|
||||||
spin_lock(&dentry->d_lock);
|
|
||||||
error = time_before(dentry->d_time, zfsvfs->z_rollback_time);
|
|
||||||
spin_unlock(&dentry->d_lock);
|
|
||||||
|
|
||||||
if (error)
|
|
||||||
return (0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The dentry may reference a stale inode if a mounted file system
|
|
||||||
* was rolled back to a point in time where the object didn't exist.
|
|
||||||
*/
|
|
||||||
if (dentry->d_inode && ITOZ(dentry->d_inode)->z_is_stale)
|
|
||||||
return (0);
|
|
||||||
|
|
||||||
return (1);
|
|
||||||
}
|
|
||||||
|
|
||||||
const struct inode_operations zpl_inode_operations = {
|
const struct inode_operations zpl_inode_operations = {
|
||||||
.setattr = zpl_setattr,
|
.setattr = zpl_setattr,
|
||||||
.getattr = zpl_getattr,
|
.getattr = zpl_getattr,
|
||||||
|
@ -826,7 +786,3 @@ const struct inode_operations zpl_special_inode_operations = {
|
||||||
.get_acl = zpl_get_acl,
|
.get_acl = zpl_get_acl,
|
||||||
#endif /* CONFIG_FS_POSIX_ACL */
|
#endif /* CONFIG_FS_POSIX_ACL */
|
||||||
};
|
};
|
||||||
|
|
||||||
dentry_operations_t zpl_dentry_operations = {
|
|
||||||
.d_revalidate = zpl_revalidate,
|
|
||||||
};
|
|
||||||
|
|
Loading…
Reference in New Issue