Add zfs_sb_prune_aliases() function
For kernels which do not implement a per-suberblock shrinker, those older than Linux 3.1, the shrink_dcache_parent() function was used to attempt to reclaim dentries. This was found not be entirely reliable and could lead to performance issues on older kernels running meta-data heavy workloads. To address this issue a zfs_sb_prune_aliases() function has been added to implement this functionality. It relies on traversing the list of znodes for a filesystem and adding them to a private list with a reference held. The private list can then be safely walked outside the z_znodes_lock to prune dentires and drop the last reference so the inode can be freed. This provides the same synchronous behavior as the per-filesystem shrinker and has the advantage of depending on only long standing interfaces. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tim Chase <tim@chase2k.com> Closes #3501
This commit is contained in:
parent
4c6a700910
commit
218b4e0a76
|
@ -0,0 +1,19 @@
|
|||
dnl #
|
||||
dnl # 2.6.12 API change
|
||||
dnl # d_prune_aliases() helper function available.
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_D_PRUNE_ALIASES],
|
||||
[AC_MSG_CHECKING([whether d_prune_aliases() is available])
|
||||
ZFS_LINUX_TRY_COMPILE_SYMBOL([
|
||||
#include <linux/dcache.h>
|
||||
], [
|
||||
struct inode *ip = NULL;
|
||||
d_prune_aliases(ip);
|
||||
], [d_prune_aliases], [fs/dcache.c], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_D_PRUNE_ALIASES, 1,
|
||||
[d_prune_aliases() is available])
|
||||
], [
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
|
@ -78,6 +78,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
|
|||
ZFS_AC_KERNEL_INSERT_INODE_LOCKED
|
||||
ZFS_AC_KERNEL_D_MAKE_ROOT
|
||||
ZFS_AC_KERNEL_D_OBTAIN_ALIAS
|
||||
ZFS_AC_KERNEL_D_PRUNE_ALIASES
|
||||
ZFS_AC_KERNEL_D_SET_D_OP
|
||||
ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA
|
||||
ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS
|
||||
|
|
|
@ -1072,6 +1072,67 @@ zfs_root(zfs_sb_t *zsb, struct inode **ipp)
|
|||
}
|
||||
EXPORT_SYMBOL(zfs_root);
|
||||
|
||||
#if !defined(HAVE_SPLIT_SHRINKER_CALLBACK) && !defined(HAVE_SHRINK) && \
|
||||
defined(HAVE_D_PRUNE_ALIASES)
|
||||
/*
|
||||
* Linux kernels older than 3.1 do not support a per-filesystem shrinker.
|
||||
* To accommodate this we must improvise and manually walk the list of znodes
|
||||
* attempting to prune dentries in order to be able to drop the inodes.
|
||||
*
|
||||
* To avoid scanning the same znodes multiple times they are always rotated
|
||||
* to the end of the z_all_znodes list. New znodes are inserted at the
|
||||
* end of the list so we're always scanning the oldest znodes first.
|
||||
*/
|
||||
static int
|
||||
zfs_sb_prune_aliases(zfs_sb_t *zsb, unsigned long nr_to_scan)
|
||||
{
|
||||
znode_t **zp_array, *zp;
|
||||
int max_array = MIN(nr_to_scan, PAGE_SIZE * 8 / sizeof (znode_t *));
|
||||
int objects = 0;
|
||||
int i = 0, j = 0;
|
||||
|
||||
zp_array = kmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);
|
||||
|
||||
mutex_enter(&zsb->z_znodes_lock);
|
||||
while ((zp = list_head(&zsb->z_all_znodes)) != NULL) {
|
||||
|
||||
if ((i++ > nr_to_scan) || (j >= max_array))
|
||||
break;
|
||||
|
||||
ASSERT(list_link_active(&zp->z_link_node));
|
||||
list_remove(&zsb->z_all_znodes, zp);
|
||||
list_insert_tail(&zsb->z_all_znodes, zp);
|
||||
|
||||
/* Skip active znodes and .zfs entries */
|
||||
if (MUTEX_HELD(&zp->z_lock) || zp->z_is_ctldir)
|
||||
continue;
|
||||
|
||||
if (igrab(ZTOI(zp)) == NULL)
|
||||
continue;
|
||||
|
||||
zp_array[j] = zp;
|
||||
j++;
|
||||
}
|
||||
mutex_exit(&zsb->z_znodes_lock);
|
||||
|
||||
for (i = 0; i < j; i++) {
|
||||
zp = zp_array[i];
|
||||
|
||||
ASSERT3P(zp, !=, NULL);
|
||||
d_prune_aliases(ZTOI(zp));
|
||||
|
||||
if (atomic_read(&ZTOI(zp)->i_count) == 1)
|
||||
objects++;
|
||||
|
||||
iput(ZTOI(zp));
|
||||
}
|
||||
|
||||
kmem_free(zp_array, max_array * sizeof (znode_t *));
|
||||
|
||||
return (objects);
|
||||
}
|
||||
#endif /* HAVE_D_PRUNE_ALIASES */
|
||||
|
||||
/*
|
||||
* The ARC has requested that the filesystem drop entries from the dentry
|
||||
* and inode caches. This can occur when the ARC needs to free meta data
|
||||
|
@ -1106,18 +1167,10 @@ zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
|
|||
*objects = (*shrinker->scan_objects)(shrinker, &sc);
|
||||
#elif defined(HAVE_SHRINK)
|
||||
*objects = (*shrinker->shrink)(shrinker, &sc);
|
||||
#elif defined(HAVE_D_PRUNE_ALIASES)
|
||||
*objects = zfs_sb_prune_aliases(zsb, nr_to_scan);
|
||||
#else
|
||||
/*
|
||||
* Linux kernels older than 3.1 do not support a per-filesystem
|
||||
* shrinker. Therefore, we must fall back to the only available
|
||||
* interface which is to discard all unused dentries and inodes.
|
||||
* This behavior clearly isn't ideal but it's required so the ARC
|
||||
* may free memory. The performance impact is mitigated by the
|
||||
* fact that the frequently accessed dentry and inode buffers will
|
||||
* still be in the ARC making them relatively cheap to recreate.
|
||||
*/
|
||||
*objects = 0;
|
||||
shrink_dcache_parent(sb->s_root);
|
||||
#error "No available dentry and inode cache pruning mechanism."
|
||||
#endif
|
||||
ZFS_EXIT(zsb);
|
||||
|
||||
|
|
Loading…
Reference in New Issue