Soften pruning threshold on not evictable metadata

Previous code pruned 10% of dnodes once 3/4 of metadata appeared
unevictable.  On workloads with many millions of dnodes and little
other metadata it creates significant load spikes for many seconds
straight.  This change instead gradually increases pruning as
unevictable metadata grow above the 3/4, which may allow it to
stabilize at some level.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by:	Alexander Motin <mav@FreeBSD.org>
Sponsored by:	iXsystems, Inc.
Closes #16401
This commit is contained in:
Alexander Motin 2024-08-08 18:26:35 -04:00 committed by GitHub
parent aef452f108
commit 5b9f3b7664
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 23 additions and 8 deletions

View File

@ -4235,6 +4235,18 @@ arc_evict_adj(uint64_t frac, uint64_t total, uint64_t up, uint64_t down,
return (frac + up - down); return (frac + up - down);
} }
/*
* Calculate (x * multiplier / divisor) without unnecesary overflows.
*/
static uint64_t
arc_mf(uint64_t x, uint64_t multiplier, uint64_t divisor)
{
uint64_t q = (x / divisor);
uint64_t r = (x % divisor);
return ((q * multiplier) + ((r * multiplier) / divisor));
}
/* /*
* Evict buffers from the cache, such that arcstat_size is capped by arc_c. * Evict buffers from the cache, such that arcstat_size is capped by arc_c.
*/ */
@ -4287,17 +4299,20 @@ arc_evict(void)
*/ */
int64_t prune = 0; int64_t prune = 0;
int64_t dn = wmsum_value(&arc_sums.arcstat_dnode_size); int64_t dn = wmsum_value(&arc_sums.arcstat_dnode_size);
int64_t nem = zfs_refcount_count(&arc_mru->arcs_size[ARC_BUFC_METADATA])
+ zfs_refcount_count(&arc_mfu->arcs_size[ARC_BUFC_METADATA])
- zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA])
- zfs_refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_METADATA]);
w = wt * (int64_t)(arc_meta >> 16) >> 16; w = wt * (int64_t)(arc_meta >> 16) >> 16;
if (zfs_refcount_count(&arc_mru->arcs_size[ARC_BUFC_METADATA]) + if (nem > w * 3 / 4) {
zfs_refcount_count(&arc_mfu->arcs_size[ARC_BUFC_METADATA]) -
zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA]) -
zfs_refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_METADATA]) >
w * 3 / 4) {
prune = dn / sizeof (dnode_t) * prune = dn / sizeof (dnode_t) *
zfs_arc_dnode_reduce_percent / 100; zfs_arc_dnode_reduce_percent / 100;
} else if (dn > arc_dnode_limit) { if (nem < w && w > 4)
prune = (dn - arc_dnode_limit) / sizeof (dnode_t) * prune = arc_mf(prune, nem - w * 3 / 4, w / 4);
zfs_arc_dnode_reduce_percent / 100; }
if (dn > arc_dnode_limit) {
prune = MAX(prune, (dn - arc_dnode_limit) / sizeof (dnode_t) *
zfs_arc_dnode_reduce_percent / 100);
} }
if (prune > 0) if (prune > 0)
arc_prune_async(prune); arc_prune_async(prune);