From 061daa06f41b04f913fe28937bb3de2c79a98f33 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 31 Jul 2024 16:24:42 -0400 Subject: [PATCH] Soften pruning threshold on not evictable metadata Previous code pruned 10% of dnodes once 3/4 of metadata appeared unevictable. On workloads with many millions of dnodes and little other metadata it creates significant load spikes for many seconds straight. This change instead gradually increases pruning as unevictable metadata grow above the 3/4, which may allow it to stabilize at some level. Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. --- module/zfs/arc.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index d01bf0947d..c936e9210e 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -4235,6 +4235,18 @@ arc_evict_adj(uint64_t frac, uint64_t total, uint64_t up, uint64_t down, return (frac + up - down); } +/* + * Calculate (x * multiplier / divisor) without unnecesary overflows. + */ +static uint64_t +arc_mf(uint64_t x, uint64_t multiplier, uint64_t divisor) +{ + uint64_t q = (x / divisor); + uint64_t r = (x % divisor); + + return ((q * multiplier) + ((r * multiplier) / divisor)); +} + /* * Evict buffers from the cache, such that arcstat_size is capped by arc_c. */ @@ -4287,17 +4299,20 @@ arc_evict(void) */ int64_t prune = 0; int64_t dn = wmsum_value(&arc_sums.arcstat_dnode_size); + int64_t nem = zfs_refcount_count(&arc_mru->arcs_size[ARC_BUFC_METADATA]) + + zfs_refcount_count(&arc_mfu->arcs_size[ARC_BUFC_METADATA]) + - zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA]) + - zfs_refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_METADATA]); w = wt * (int64_t)(arc_meta >> 16) >> 16; - if (zfs_refcount_count(&arc_mru->arcs_size[ARC_BUFC_METADATA]) + - zfs_refcount_count(&arc_mfu->arcs_size[ARC_BUFC_METADATA]) - - zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA]) - - zfs_refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_METADATA]) > - w * 3 / 4) { + if (nem > w * 3 / 4) { prune = dn / sizeof (dnode_t) * zfs_arc_dnode_reduce_percent / 100; - } else if (dn > arc_dnode_limit) { - prune = (dn - arc_dnode_limit) / sizeof (dnode_t) * - zfs_arc_dnode_reduce_percent / 100; + if (nem < w && w > 4) + prune = arc_mf(prune, nem - w * 3 / 4, w / 4); + } + if (dn > arc_dnode_limit) { + prune = MAX(prune, (dn - arc_dnode_limit) / sizeof (dnode_t) * + zfs_arc_dnode_reduce_percent / 100); } if (prune > 0) arc_prune_async(prune);