From 3ae05e34e5dde41c48705b39eb6dbec8676f9bc9 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Thu, 8 Aug 2024 18:33:36 -0400 Subject: [PATCH] Linux: Make zfs_prune() fair on NUMA systems Previous code evicted nr_to_scan items from each NUMA node. This not only multiplied the eviction by the number of nodes, but could exhaust the smaller ones, evicting inodes used by acive workload and requiring their immediate recreation. This patch spreads the requested eviction between all NUMA nodes proportionally to their evictable counts, which should be closer to expected LRU logic. See kernel's super_cache_scan() as a similar logic example. Reviewed-by: Brian Behlendorf Reviewed-by: Ameer Hamza Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #16397 --- module/os/linux/zfs/zfs_vfsops.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c index 2015c20d73..a52f08868d 100644 --- a/module/os/linux/zfs/zfs_vfsops.c +++ b/module/os/linux/zfs/zfs_vfsops.c @@ -1264,14 +1264,22 @@ zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects) defined(SHRINK_CONTROL_HAS_NID) && \ defined(SHRINKER_NUMA_AWARE) if (shrinker->flags & SHRINKER_NUMA_AWARE) { + long tc = 1; + for_each_online_node(sc.nid) { + long c = shrinker->count_objects(shrinker, &sc); + if (c == 0 || c == SHRINK_EMPTY) + continue; + tc += c; + } *objects = 0; for_each_online_node(sc.nid) { + long c = shrinker->count_objects(shrinker, &sc); + if (c == 0 || c == SHRINK_EMPTY) + continue; + if (c > tc) + tc = c; + sc.nr_to_scan = mult_frac(nr_to_scan, c, tc) + 1; *objects += (*shrinker->scan_objects)(shrinker, &sc); - /* - * reset sc.nr_to_scan, modified by - * scan_objects == super_cache_scan - */ - sc.nr_to_scan = nr_to_scan; } } else { *objects = (*shrinker->scan_objects)(shrinker, &sc);