From 3c878354712c93e2c9bde768bac981040c1f7746 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 29 Jul 2024 17:08:32 -0400 Subject: [PATCH] Linux: Make zfs_prune() fair on NUMA systems Previous code evicted nr_to_scan items from each NUMA node. This not only multiplied the eviction by the number of nodes, but could exhaust the smaller ones, evicting inodes used by acive workload and requiring their immediate recreation. This patch spreads the requested eviction between all NUMA nodes proportionally to their evictable counts, which should be closer to expected LRU logic. See kernel's super_cache_scan() as a similar logic example. Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. --- module/os/linux/zfs/zfs_vfsops.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c index 2015c20d73..a52f08868d 100644 --- a/module/os/linux/zfs/zfs_vfsops.c +++ b/module/os/linux/zfs/zfs_vfsops.c @@ -1264,14 +1264,22 @@ zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects) defined(SHRINK_CONTROL_HAS_NID) && \ defined(SHRINKER_NUMA_AWARE) if (shrinker->flags & SHRINKER_NUMA_AWARE) { + long tc = 1; + for_each_online_node(sc.nid) { + long c = shrinker->count_objects(shrinker, &sc); + if (c == 0 || c == SHRINK_EMPTY) + continue; + tc += c; + } *objects = 0; for_each_online_node(sc.nid) { + long c = shrinker->count_objects(shrinker, &sc); + if (c == 0 || c == SHRINK_EMPTY) + continue; + if (c > tc) + tc = c; + sc.nr_to_scan = mult_frac(nr_to_scan, c, tc) + 1; *objects += (*shrinker->scan_objects)(shrinker, &sc); - /* - * reset sc.nr_to_scan, modified by - * scan_objects == super_cache_scan - */ - sc.nr_to_scan = nr_to_scan; } } else { *objects = (*shrinker->scan_objects)(shrinker, &sc);