Add zfs_sb_prune_aliases() function
For kernels which do not implement a per-suberblock shrinker, those older than Linux 3.1, the shrink_dcache_parent() function was used to attempt to reclaim dentries. This was found not be entirely reliable and could lead to performance issues on older kernels running meta-data heavy workloads. To address this issue a zfs_sb_prune_aliases() function has been added to implement this functionality. It relies on traversing the list of znodes for a filesystem and adding them to a private list with a reference held. The private list can then be safely walked outside the z_znodes_lock to prune dentires and drop the last reference so the inode can be freed. This provides the same synchronous behavior as the per-filesystem shrinker and has the advantage of depending on only long standing interfaces. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tim Chase <tim@chase2k.com> Closes #3501
This commit is contained in:
parent
4c6a700910
commit
218b4e0a76
|
@ -0,0 +1,19 @@
|
||||||
|
dnl #
|
||||||
|
dnl # 2.6.12 API change
|
||||||
|
dnl # d_prune_aliases() helper function available.
|
||||||
|
dnl #
|
||||||
|
AC_DEFUN([ZFS_AC_KERNEL_D_PRUNE_ALIASES],
|
||||||
|
[AC_MSG_CHECKING([whether d_prune_aliases() is available])
|
||||||
|
ZFS_LINUX_TRY_COMPILE_SYMBOL([
|
||||||
|
#include <linux/dcache.h>
|
||||||
|
], [
|
||||||
|
struct inode *ip = NULL;
|
||||||
|
d_prune_aliases(ip);
|
||||||
|
], [d_prune_aliases], [fs/dcache.c], [
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
AC_DEFINE(HAVE_D_PRUNE_ALIASES, 1,
|
||||||
|
[d_prune_aliases() is available])
|
||||||
|
], [
|
||||||
|
AC_MSG_RESULT(no)
|
||||||
|
])
|
||||||
|
])
|
|
@ -78,6 +78,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
|
||||||
ZFS_AC_KERNEL_INSERT_INODE_LOCKED
|
ZFS_AC_KERNEL_INSERT_INODE_LOCKED
|
||||||
ZFS_AC_KERNEL_D_MAKE_ROOT
|
ZFS_AC_KERNEL_D_MAKE_ROOT
|
||||||
ZFS_AC_KERNEL_D_OBTAIN_ALIAS
|
ZFS_AC_KERNEL_D_OBTAIN_ALIAS
|
||||||
|
ZFS_AC_KERNEL_D_PRUNE_ALIASES
|
||||||
ZFS_AC_KERNEL_D_SET_D_OP
|
ZFS_AC_KERNEL_D_SET_D_OP
|
||||||
ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA
|
ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA
|
||||||
ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS
|
ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS
|
||||||
|
|
|
@ -1072,6 +1072,67 @@ zfs_root(zfs_sb_t *zsb, struct inode **ipp)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(zfs_root);
|
EXPORT_SYMBOL(zfs_root);
|
||||||
|
|
||||||
|
#if !defined(HAVE_SPLIT_SHRINKER_CALLBACK) && !defined(HAVE_SHRINK) && \
|
||||||
|
defined(HAVE_D_PRUNE_ALIASES)
|
||||||
|
/*
|
||||||
|
* Linux kernels older than 3.1 do not support a per-filesystem shrinker.
|
||||||
|
* To accommodate this we must improvise and manually walk the list of znodes
|
||||||
|
* attempting to prune dentries in order to be able to drop the inodes.
|
||||||
|
*
|
||||||
|
* To avoid scanning the same znodes multiple times they are always rotated
|
||||||
|
* to the end of the z_all_znodes list. New znodes are inserted at the
|
||||||
|
* end of the list so we're always scanning the oldest znodes first.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
zfs_sb_prune_aliases(zfs_sb_t *zsb, unsigned long nr_to_scan)
|
||||||
|
{
|
||||||
|
znode_t **zp_array, *zp;
|
||||||
|
int max_array = MIN(nr_to_scan, PAGE_SIZE * 8 / sizeof (znode_t *));
|
||||||
|
int objects = 0;
|
||||||
|
int i = 0, j = 0;
|
||||||
|
|
||||||
|
zp_array = kmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);
|
||||||
|
|
||||||
|
mutex_enter(&zsb->z_znodes_lock);
|
||||||
|
while ((zp = list_head(&zsb->z_all_znodes)) != NULL) {
|
||||||
|
|
||||||
|
if ((i++ > nr_to_scan) || (j >= max_array))
|
||||||
|
break;
|
||||||
|
|
||||||
|
ASSERT(list_link_active(&zp->z_link_node));
|
||||||
|
list_remove(&zsb->z_all_znodes, zp);
|
||||||
|
list_insert_tail(&zsb->z_all_znodes, zp);
|
||||||
|
|
||||||
|
/* Skip active znodes and .zfs entries */
|
||||||
|
if (MUTEX_HELD(&zp->z_lock) || zp->z_is_ctldir)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (igrab(ZTOI(zp)) == NULL)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
zp_array[j] = zp;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
mutex_exit(&zsb->z_znodes_lock);
|
||||||
|
|
||||||
|
for (i = 0; i < j; i++) {
|
||||||
|
zp = zp_array[i];
|
||||||
|
|
||||||
|
ASSERT3P(zp, !=, NULL);
|
||||||
|
d_prune_aliases(ZTOI(zp));
|
||||||
|
|
||||||
|
if (atomic_read(&ZTOI(zp)->i_count) == 1)
|
||||||
|
objects++;
|
||||||
|
|
||||||
|
iput(ZTOI(zp));
|
||||||
|
}
|
||||||
|
|
||||||
|
kmem_free(zp_array, max_array * sizeof (znode_t *));
|
||||||
|
|
||||||
|
return (objects);
|
||||||
|
}
|
||||||
|
#endif /* HAVE_D_PRUNE_ALIASES */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The ARC has requested that the filesystem drop entries from the dentry
|
* The ARC has requested that the filesystem drop entries from the dentry
|
||||||
* and inode caches. This can occur when the ARC needs to free meta data
|
* and inode caches. This can occur when the ARC needs to free meta data
|
||||||
|
@ -1106,18 +1167,10 @@ zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
|
||||||
*objects = (*shrinker->scan_objects)(shrinker, &sc);
|
*objects = (*shrinker->scan_objects)(shrinker, &sc);
|
||||||
#elif defined(HAVE_SHRINK)
|
#elif defined(HAVE_SHRINK)
|
||||||
*objects = (*shrinker->shrink)(shrinker, &sc);
|
*objects = (*shrinker->shrink)(shrinker, &sc);
|
||||||
|
#elif defined(HAVE_D_PRUNE_ALIASES)
|
||||||
|
*objects = zfs_sb_prune_aliases(zsb, nr_to_scan);
|
||||||
#else
|
#else
|
||||||
/*
|
#error "No available dentry and inode cache pruning mechanism."
|
||||||
* Linux kernels older than 3.1 do not support a per-filesystem
|
|
||||||
* shrinker. Therefore, we must fall back to the only available
|
|
||||||
* interface which is to discard all unused dentries and inodes.
|
|
||||||
* This behavior clearly isn't ideal but it's required so the ARC
|
|
||||||
* may free memory. The performance impact is mitigated by the
|
|
||||||
* fact that the frequently accessed dentry and inode buffers will
|
|
||||||
* still be in the ARC making them relatively cheap to recreate.
|
|
||||||
*/
|
|
||||||
*objects = 0;
|
|
||||||
shrink_dcache_parent(sb->s_root);
|
|
||||||
#endif
|
#endif
|
||||||
ZFS_EXIT(zsb);
|
ZFS_EXIT(zsb);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue