Linux 3.18 compat: Snapshot auto-mounting

Re-factor the .zfs/snapshot auto-mouting code to take in to account
changes made to the upstream kernels.  And to lay the groundwork for
enabling access to .zfs snapshots via NFS clients.  This patch makes
the following core improvements.

* All actively auto-mounted snapshots are now tracked in two global
trees which are indexed by snapshot name and objset id respectively.
This allows for fast lookups of any auto-mounted snapshot regardless
without needing access to the parent dataset.

* Snapshot entries are added to the tree in zfsctl_snapshot_mount().
However, they are now removed from the tree in the context of the
unmount process.  This eliminates the need complicated error logic
in zfsctl_snapshot_unmount() to handle unmount failures.

* References are now taken on the snapshot entries in the tree to
ensure they always remain valid while a task is outstanding.

* The MNT_SHRINKABLE flag is set on the snapshot vfsmount_t right
after the auto-mount succeeds.  This allows to kernel to unmount
idle auto-mounted snapshots if needed removing the need for the
zfsctl_unmount_snapshots() function.

* Snapshots in active use will not be automatically unmounted.  As
long as at least one dentry is revalidated every zfs_expire_snapshot/2
seconds the auto-unmount expiration timer will be extended.

* Commit torvalds/linux@bafc9b7 caused snapshots auto-mounted by ZFS
to be immediately unmounted when the dentry was revalidated.  This
was a consequence of ZFS invaliding all snapdir dentries to ensure that
negative dentries didn't mask new snapshots.  This patch modifies the
behavior such that only negative dentries are invalidated.  This solves
the issue and may result in a performance improvement.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3589
Closes #3344
Closes #3295
Closes #3257
Closes #3243
Closes #3030
Closes #2841
This commit is contained in:
Brian Behlendorf 2015-04-24 16:21:13 -07:00
parent b23975cbe0
commit 278bee9319
13 changed files with 499 additions and 422 deletions

View File

@ -0,0 +1,20 @@
dnl #
dnl # 2.6.38 API change
dnl # follow_down() renamed follow_down_one(). The original follow_down()
dnl # symbol still exists but will traverse down all the layers.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_FOLLOW_DOWN_ONE], [
AC_MSG_CHECKING([whether follow_down_one() is available])
ZFS_LINUX_TRY_COMPILE([
#include <linux/namei.h>
],[
struct path *p = NULL;
follow_down_one(p);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_FOLLOW_DOWN_ONE, 1,
[follow_down_one() is available])
],[
AC_MSG_RESULT(no)
])
])

View File

@ -101,6 +101,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_VFS_ITERATE ZFS_AC_KERNEL_VFS_ITERATE
ZFS_AC_KERNEL_VFS_RW_ITERATE ZFS_AC_KERNEL_VFS_RW_ITERATE
ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS
ZFS_AC_KERNEL_FOLLOW_DOWN_ONE
AS_IF([test "$LINUX_OBJ" != "$LINUX"], [ AS_IF([test "$LINUX_OBJ" != "$LINUX"], [
KERNELMAKE_PARAMS="$KERNELMAKE_PARAMS O=$LINUX_OBJ" KERNELMAKE_PARAMS="$KERNELMAKE_PARAMS O=$LINUX_OBJ"

View File

@ -352,4 +352,15 @@ static inline struct inode *file_inode(const struct file *f)
} }
#endif /* HAVE_FILE_INODE */ #endif /* HAVE_FILE_INODE */
/*
* 2.6.38 API change
*/
#ifdef HAVE_FOLLOW_DOWN_ONE
#define zpl_follow_down_one(path) follow_down_one(path)
#define zpl_follow_up(path) follow_up(path)
#else
#define zpl_follow_down_one(path) follow_down(path)
#define zpl_follow_up(path) follow_up(path)
#endif
#endif /* _ZFS_VFS_H */ #endif /* _ZFS_VFS_H */

View File

@ -32,6 +32,7 @@
#define _ZFS_CTLDIR_H #define _ZFS_CTLDIR_H
#include <sys/vnode.h> #include <sys/vnode.h>
#include <sys/pathname.h>
#include <sys/zfs_vfsops.h> #include <sys/zfs_vfsops.h>
#include <sys/zfs_znode.h> #include <sys/zfs_znode.h>
@ -46,23 +47,16 @@
(zfs_has_ctldir(zdp) && \ (zfs_has_ctldir(zdp) && \
(ZTOZSB(zdp)->z_show_ctldir)) (ZTOZSB(zdp)->z_show_ctldir))
typedef struct { extern int zfs_expire_snapshot;
char *se_name;
char *se_path;
struct inode *se_inode;
taskqid_t se_taskqid;
avl_node_t se_node;
} zfs_snapentry_t;
/* zfsctl generic functions */ /* zfsctl generic functions */
extern int snapentry_compare(const void *a, const void *b);
extern boolean_t zfsctl_is_node(struct inode *ip);
extern boolean_t zfsctl_is_snapdir(struct inode *ip);
extern void zfsctl_inode_inactive(struct inode *ip);
extern void zfsctl_inode_destroy(struct inode *ip);
extern int zfsctl_create(zfs_sb_t *zsb); extern int zfsctl_create(zfs_sb_t *zsb);
extern void zfsctl_destroy(zfs_sb_t *zsb); extern void zfsctl_destroy(zfs_sb_t *zsb);
extern struct inode *zfsctl_root(znode_t *zp); extern struct inode *zfsctl_root(znode_t *zp);
extern void zfsctl_init(void);
extern void zfsctl_fini(void);
extern boolean_t zfsctl_is_node(struct inode *ip);
extern boolean_t zfsctl_is_snapdir(struct inode *ip);
extern int zfsctl_fid(struct inode *ip, fid_t *fidp); extern int zfsctl_fid(struct inode *ip, fid_t *fidp);
/* zfsctl '.zfs' functions */ /* zfsctl '.zfs' functions */
@ -81,9 +75,9 @@ extern int zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr,
extern int zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap, extern int zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
struct inode **ipp, cred_t *cr, int flags); struct inode **ipp, cred_t *cr, int flags);
extern void zfsctl_snapdir_inactive(struct inode *ip); extern void zfsctl_snapdir_inactive(struct inode *ip);
extern int zfsctl_unmount_snapshot(zfs_sb_t *zsb, char *name, int flags); extern int zfsctl_snapshot_mount(struct path *path, int flags);
extern int zfsctl_unmount_snapshots(zfs_sb_t *zsb, int flags, int *count); extern int zfsctl_snapshot_unmount(char *snapname, int flags);
extern int zfsctl_mount_snapshot(struct path *path, int flags); extern int zfsctl_snapshot_unmount_delay(uint64_t objsetid, int delay);
extern int zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, extern int zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid,
zfs_sb_t **zsb); zfs_sb_t **zsb);
@ -92,10 +86,6 @@ extern int zfsctl_shares_lookup(struct inode *dip, char *name,
struct inode **ipp, int flags, cred_t *cr, int *direntflags, struct inode **ipp, int flags, cred_t *cr, int *direntflags,
pathname_t *realpnp); pathname_t *realpnp);
/* zfsctl_init/fini functions */
extern void zfsctl_init(void);
extern void zfsctl_fini(void);
/* /*
* These inodes numbers are reserved for the .zfs control directory. * These inodes numbers are reserved for the .zfs control directory.
* It is important that they be no larger that 48-bits because only * It is important that they be no larger that 48-bits because only

View File

@ -72,11 +72,10 @@ typedef struct zfs_sb {
list_t z_all_znodes; /* all znodes in the fs */ list_t z_all_znodes; /* all znodes in the fs */
uint64_t z_nr_znodes; /* number of znodes in the fs */ uint64_t z_nr_znodes; /* number of znodes in the fs */
unsigned long z_rollback_time; /* last online rollback time */ unsigned long z_rollback_time; /* last online rollback time */
unsigned long z_snap_defer_time; /* last snapshot unmount deferal */
kmutex_t z_znodes_lock; /* lock for z_all_znodes */ kmutex_t z_znodes_lock; /* lock for z_all_znodes */
arc_prune_t *z_arc_prune; /* called by ARC to prune caches */ arc_prune_t *z_arc_prune; /* called by ARC to prune caches */
struct inode *z_ctldir; /* .zfs directory inode */ struct inode *z_ctldir; /* .zfs directory inode */
avl_tree_t z_ctldir_snaps; /* .zfs/snapshot entries */
kmutex_t z_ctldir_lock; /* .zfs ctldir lock */
boolean_t z_show_ctldir; /* expose .zfs in the root dir */ boolean_t z_show_ctldir; /* expose .zfs in the root dir */
boolean_t z_issnap; /* true if this is a snapshot */ boolean_t z_issnap; /* true if this is a snapshot */
boolean_t z_vscan; /* virus scan on/off */ boolean_t z_vscan; /* virus scan on/off */

File diff suppressed because it is too large Load Diff

View File

@ -3410,37 +3410,20 @@ zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
* This function is best-effort. Callers must deal gracefully if it * This function is best-effort. Callers must deal gracefully if it
* remains mounted (or is remounted after this call). * remains mounted (or is remounted after this call).
* *
* XXX: This function should detect a failure to unmount a snapdir of a dataset * Returns 0 if the argument is not a snapshot, or it is not currently a
* and return the appropriate error code when it is mounted. Its Illumos and * filesystem, or we were able to unmount it. Returns error code otherwise.
* FreeBSD counterparts do this. We do not do this on Linux because there is no
* clear way to access the mount information that FreeBSD and Illumos use to
* distinguish between things with mounted snapshot directories, and things
* without mounted snapshot directories, which include zvols. Returning a
* failure for the latter causes `zfs destroy` to fail on zvol snapshots.
*/ */
int int
zfs_unmount_snap(const char *snapname) zfs_unmount_snap(const char *snapname)
{ {
zfs_sb_t *zsb = NULL; int err;
char *dsname;
char *fullname;
char *ptr;
if ((ptr = strchr(snapname, '@')) == NULL) if (strchr(snapname, '@') == NULL)
return (0); return (0);
dsname = kmem_alloc(ptr - snapname + 1, KM_SLEEP); err = zfsctl_snapshot_unmount((char *)snapname, MNT_FORCE);
strlcpy(dsname, snapname, ptr - snapname + 1); if (err != 0 && err != ENOENT)
fullname = strdup(snapname); return (SET_ERROR(err));
if (zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE) == 0) {
ASSERT(!dsl_pool_config_held(dmu_objset_pool(zsb->z_os)));
(void) zfsctl_unmount_snapshot(zsb, fullname, MNT_FORCE);
zfs_sb_rele(zsb, FTAG);
}
kmem_free(dsname, ptr - snapname + 1);
strfree(fullname);
return (0); return (0);
} }

View File

@ -780,10 +780,6 @@ zfs_sb_create(const char *osname, zfs_sb_t **zsbp)
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
avl_create(&zsb->z_ctldir_snaps, snapentry_compare,
sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node));
mutex_init(&zsb->z_ctldir_lock, NULL, MUTEX_DEFAULT, NULL);
*zsbp = zsb; *zsbp = zsb;
return (0); return (0);
@ -896,8 +892,6 @@ zfs_sb_free(zfs_sb_t *zsb)
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
mutex_destroy(&zsb->z_hold_mtx[i]); mutex_destroy(&zsb->z_hold_mtx[i]);
vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ); vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ);
mutex_destroy(&zsb->z_ctldir_lock);
avl_destroy(&zsb->z_ctldir_snaps);
kmem_free(zsb, sizeof (zfs_sb_t)); kmem_free(zsb, sizeof (zfs_sb_t));
} }
EXPORT_SYMBOL(zfs_sb_free); EXPORT_SYMBOL(zfs_sb_free);
@ -1373,6 +1367,7 @@ zfs_domount(struct super_block *sb, void *data, int silent)
acltype_changed_cb(zsb, pval); acltype_changed_cb(zsb, pval);
zsb->z_issnap = B_TRUE; zsb->z_issnap = B_TRUE;
zsb->z_os->os_sync = ZFS_SYNC_DISABLED; zsb->z_os->os_sync = ZFS_SYNC_DISABLED;
zsb->z_snap_defer_time = jiffies;
mutex_enter(&zsb->z_os->os_user_ptr_lock); mutex_enter(&zsb->z_os->os_user_ptr_lock);
dmu_objset_set_user(zsb->z_os, zsb); dmu_objset_set_user(zsb->z_os, zsb);
@ -1422,8 +1417,8 @@ zfs_preumount(struct super_block *sb)
{ {
zfs_sb_t *zsb = sb->s_fs_info; zfs_sb_t *zsb = sb->s_fs_info;
if (zsb != NULL && zsb->z_ctldir != NULL) if (zsb)
zfsctl_destroy(zsb); zfsctl_destroy(sb->s_fs_info);
} }
EXPORT_SYMBOL(zfs_preumount); EXPORT_SYMBOL(zfs_preumount);

View File

@ -4097,11 +4097,6 @@ zfs_inactive(struct inode *ip)
zfs_sb_t *zsb = ITOZSB(ip); zfs_sb_t *zsb = ITOZSB(ip);
int error; int error;
if (zfsctl_is_node(ip)) {
zfsctl_inode_inactive(ip);
return;
}
rw_enter(&zsb->z_teardown_inactive_lock, RW_READER); rw_enter(&zsb->z_teardown_inactive_lock, RW_READER);
if (zp->z_sa_hdl == NULL) { if (zp->z_sa_hdl == NULL) {
rw_exit(&zsb->z_teardown_inactive_lock); rw_exit(&zsb->z_teardown_inactive_lock);

View File

@ -274,9 +274,6 @@ zfs_inode_destroy(struct inode *ip)
znode_t *zp = ITOZ(ip); znode_t *zp = ITOZ(ip);
zfs_sb_t *zsb = ZTOZSB(zp); zfs_sb_t *zsb = ZTOZSB(zp);
if (zfsctl_is_node(ip))
zfsctl_inode_destroy(ip);
mutex_enter(&zsb->z_znodes_lock); mutex_enter(&zsb->z_znodes_lock);
if (list_link_active(&zp->z_link_node)) { if (list_link_active(&zp->z_link_node)) {
list_remove(&zsb->z_all_znodes, zp); list_remove(&zsb->z_all_znodes, zp);

View File

@ -160,19 +160,9 @@ const struct inode_operations zpl_ops_root = {
static struct vfsmount * static struct vfsmount *
zpl_snapdir_automount(struct path *path) zpl_snapdir_automount(struct path *path)
{ {
struct dentry *dentry = path->dentry;
int error; int error;
/* error = -zfsctl_snapshot_mount(path, 0);
* We must briefly disable automounts for this dentry because the
* user space mount utility will trigger another lookup on this
* directory. That will result in zpl_snapdir_automount() being
* called repeatedly. The DCACHE_NEED_AUTOMOUNT flag can be
* safely reset once the mount completes.
*/
dentry->d_flags &= ~DCACHE_NEED_AUTOMOUNT;
error = -zfsctl_mount_snapshot(path, 0);
dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
if (error) if (error)
return (ERR_PTR(error)); return (ERR_PTR(error));
@ -188,8 +178,10 @@ zpl_snapdir_automount(struct path *path)
#endif /* HAVE_AUTOMOUNT */ #endif /* HAVE_AUTOMOUNT */
/* /*
* Revalidate any dentry in the snapshot directory on lookup, since a snapshot * Negative dentries must always be revalidated so newly created snapshots
* having the same name have been created or destroyed since it was cached. * can be detected and automounted. Normal dentries should be kept because
* as of the 3.18 kernel revaliding the mountpoint dentry will result in
* the snapshot being immediately unmounted.
*/ */
static int static int
#ifdef HAVE_D_REVALIDATE_NAMEIDATA #ifdef HAVE_D_REVALIDATE_NAMEIDATA
@ -198,7 +190,7 @@ zpl_snapdir_revalidate(struct dentry *dentry, struct nameidata *i)
zpl_snapdir_revalidate(struct dentry *dentry, unsigned int flags) zpl_snapdir_revalidate(struct dentry *dentry, unsigned int flags)
#endif #endif
{ {
return (0); return (!!dentry->d_inode);
} }
dentry_operations_t zpl_dops_snapdirs = { dentry_operations_t zpl_dops_snapdirs = {
@ -245,6 +237,9 @@ zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
ASSERT(error == 0 || ip == NULL); ASSERT(error == 0 || ip == NULL);
d_clear_d_op(dentry); d_clear_d_op(dentry);
d_set_d_op(dentry, &zpl_dops_snapdirs); d_set_d_op(dentry, &zpl_dops_snapdirs);
#ifdef HAVE_AUTOMOUNT
dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
#endif
return (d_splice_alias(ip, dentry)); return (d_splice_alias(ip, dentry));
} }
@ -373,7 +368,7 @@ zpl_snapdir_getattr(struct vfsmount *mnt, struct dentry *dentry,
ZFS_ENTER(zsb); ZFS_ENTER(zsb);
error = simple_getattr(mnt, dentry, stat); error = simple_getattr(mnt, dentry, stat);
stat->nlink = stat->size = avl_numnodes(&zsb->z_ctldir_snaps) + 2; stat->nlink = stat->size = 2;
stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zsb->z_os); stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zsb->z_os);
stat->atime = CURRENT_TIME; stat->atime = CURRENT_TIME;
ZFS_EXIT(zsb); ZFS_EXIT(zsb);

View File

@ -24,6 +24,7 @@
*/ */
#include <sys/zfs_ctldir.h>
#include <sys/zfs_vfsops.h> #include <sys/zfs_vfsops.h>
#include <sys/zfs_vnops.h> #include <sys/zfs_vnops.h>
#include <sys/zfs_znode.h> #include <sys/zfs_znode.h>
@ -240,21 +241,9 @@ zpl_rmdir(struct inode * dir, struct dentry *dentry)
static int static int
zpl_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) zpl_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{ {
boolean_t issnap = ITOZSB(dentry->d_inode)->z_issnap;
int error; int error;
fstrans_cookie_t cookie; fstrans_cookie_t cookie;
/*
* Ensure MNT_SHRINKABLE is set on snapshots to ensure they are
* unmounted automatically with the parent file system. This
* is done on the first getattr because it's not easy to get the
* vfsmount structure at mount time. This call path is explicitly
* marked unlikely to avoid any performance impact. FWIW, ext4
* resorts to a similar trick for sysadmin convenience.
*/
if (unlikely(issnap && !(mnt->mnt_flags & MNT_SHRINKABLE)))
mnt->mnt_flags |= MNT_SHRINKABLE;
cookie = spl_fstrans_mark(); cookie = spl_fstrans_mark();
error = -zfs_getattr_fast(dentry->d_inode, stat); error = -zfs_getattr_fast(dentry->d_inode, stat);
spl_fstrans_unmark(cookie); spl_fstrans_unmark(cookie);
@ -503,6 +492,19 @@ zpl_revalidate(struct dentry *dentry, unsigned int flags)
if (flags & LOOKUP_RCU) if (flags & LOOKUP_RCU)
return (-ECHILD); return (-ECHILD);
/*
* Automounted snapshots rely on periodic dentry revalidation
* to defer snapshots from being automatically unmounted.
*/
if (zsb->z_issnap) {
if (time_after(jiffies, zsb->z_snap_defer_time +
MAX(zfs_expire_snapshot * HZ / 2, HZ))) {
zsb->z_snap_defer_time = jiffies;
zfsctl_snapshot_unmount_delay(
dmu_objset_id(zsb->z_os), zfs_expire_snapshot);
}
}
/* /*
* After a rollback negative dentries created before the rollback * After a rollback negative dentries created before the rollback
* time must be invalidated. Otherwise they can obscure files which * time must be invalidated. Otherwise they can obscure files which

View File

@ -198,20 +198,6 @@ zpl_remount_fs(struct super_block *sb, int *flags, char *data)
return (error); return (error);
} }
static void
zpl_umount_begin(struct super_block *sb)
{
zfs_sb_t *zsb = sb->s_fs_info;
int count;
/*
* Best effort to unmount snapshots in .zfs/snapshot/. Normally this
* isn't required because snapshots have the MNT_SHRINKABLE flag set.
*/
if (zsb->z_ctldir)
(void) zfsctl_unmount_snapshots(zsb, MNT_FORCE, &count);
}
/* /*
* ZFS specific features must be explicitly handled here, the VFS will * ZFS specific features must be explicitly handled here, the VFS will
* automatically handled the following generic functionality. * automatically handled the following generic functionality.
@ -359,7 +345,6 @@ const struct super_operations zpl_super_operations = {
.sync_fs = zpl_sync_fs, .sync_fs = zpl_sync_fs,
.statfs = zpl_statfs, .statfs = zpl_statfs,
.remount_fs = zpl_remount_fs, .remount_fs = zpl_remount_fs,
.umount_begin = zpl_umount_begin,
.show_options = zpl_show_options, .show_options = zpl_show_options,
.show_stats = NULL, .show_stats = NULL,
#ifdef HAVE_NR_CACHED_OBJECTS #ifdef HAVE_NR_CACHED_OBJECTS