Make slab reclaim more aggressive

Many people have noticed that the kmem cache implementation is slow
to release its memory.  This patch makes the reclaim behavior more
aggressive by immediately freeing a slab once it is empty.  Unused
objects which are cached in the magazines will still prevent a slab
from being freed.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
Brian Behlendorf 2014-12-05 17:11:18 -05:00
parent a988a35a93
commit 1a20496834
2 changed files with 69 additions and 29 deletions

View File

@ -120,6 +120,28 @@ value will use kmalloc(), but shift to vmalloc() when exceeding this value.
Default value: \fBKMALLOC_MAX_SIZE/4\fR. Default value: \fBKMALLOC_MAX_SIZE/4\fR.
.RE .RE
.sp
.ne 2
.na
\fBspl_kmem_cache_magazine_size\fR (uint)
.ad
.RS 12n
Cache magazines are an optimization designed to minimize the cost of
allocating memory. They do this by keeping a per-cpu cache of recently
freed objects, which can then be reallocated without taking a lock. This
can improve performance on highly contended caches. However, because
objects in magazines will prevent otherwise empty slabs from being
immediately released this may not be ideal for low memory machines.
.sp
For this reason \fBspl_kmem_cache_magazine_size\fR can be used to set a
maximum magazine size. When this value is set to 0 the magazine size will
be automatically determined based on the object size. Otherwise magazines
will be limited to 2-256 objects per magazine (i.e per cpu). Magazines
may never be entirely disabled in this implementation.
.sp
Default value: \fB0\fR.
.RE
.sp .sp
.ne 2 .ne 2
.na .na

View File

@ -70,6 +70,25 @@ EXPORT_SYMBOL(spl_kmem_cache_expire);
module_param(spl_kmem_cache_expire, uint, 0644); module_param(spl_kmem_cache_expire, uint, 0644);
MODULE_PARM_DESC(spl_kmem_cache_expire, "By age (0x1) or low memory (0x2)"); MODULE_PARM_DESC(spl_kmem_cache_expire, "By age (0x1) or low memory (0x2)");
/*
* Cache magazines are an optimization designed to minimize the cost of
* allocating memory. They do this by keeping a per-cpu cache of recently
* freed objects, which can then be reallocated without taking a lock. This
* can improve performance on highly contended caches. However, because
* objects in magazines will prevent otherwise empty slabs from being
* immediately released this may not be ideal for low memory machines.
*
* For this reason spl_kmem_cache_magazine_size can be used to set a maximum
* magazine size. When this value is set to 0 the magazine size will be
* automatically determined based on the object size. Otherwise magazines
* will be limited to 2-256 objects per magazine (i.e per cpu). Magazines
* may never be entirely disabled in this implementation.
*/
unsigned int spl_kmem_cache_magazine_size = 0;
module_param(spl_kmem_cache_magazine_size, uint, 0444);
MODULE_PARM_DESC(spl_kmem_cache_magazine_size,
"Default magazine size (2-256), set automatically (0)\n");
/* /*
* The default behavior is to report the number of objects remaining in the * The default behavior is to report the number of objects remaining in the
* cache. This allows the Linux VM to repeatedly reclaim objects from the * cache. This allows the Linux VM to repeatedly reclaim objects from the
@ -362,45 +381,31 @@ spl_slab_free(spl_kmem_slab_t *sks,
} }
/* /*
* Traverse all the partial slabs attached to a cache and free those which * Reclaim empty slabs at the end of the partial list.
* are currently empty, and have not been touched for skc_delay seconds to
* avoid thrashing. The count argument is passed to optionally cap the
* number of slabs reclaimed, a count of zero means try and reclaim
* everything. When flag the is set available slabs freed regardless of age.
*/ */
static void static void
spl_slab_reclaim(spl_kmem_cache_t *skc, int count, int flag) spl_slab_reclaim(spl_kmem_cache_t *skc)
{ {
spl_kmem_slab_t *sks, *m; spl_kmem_slab_t *sks, *m;
spl_kmem_obj_t *sko, *n; spl_kmem_obj_t *sko, *n;
LIST_HEAD(sks_list); LIST_HEAD(sks_list);
LIST_HEAD(sko_list); LIST_HEAD(sko_list);
uint32_t size = 0; uint32_t size = 0;
int i = 0;
/* /*
* Move empty slabs and objects which have not been touched in * Empty slabs and objects must be moved to a private list so they
* skc_delay seconds on to private lists to be freed outside * can be safely freed outside the spin lock. All empty slabs are
* the spin lock. This delay time is important to avoid thrashing * at the end of skc->skc_partial_list, therefore once a non-empty
* however when flag is set the delay will not be used. * slab is found we can stop scanning.
*/ */
spin_lock(&skc->skc_lock); spin_lock(&skc->skc_lock);
list_for_each_entry_safe_reverse(sks, m, list_for_each_entry_safe_reverse(sks, m,
&skc->skc_partial_list, sks_list) { &skc->skc_partial_list, sks_list) {
/*
* All empty slabs are at the end of skc->skc_partial_list, if (sks->sks_ref > 0)
* therefore once a non-empty slab is found we can stop
* scanning. Additionally, stop when reaching the target
* reclaim 'count' if a non-zero threshold is given.
*/
if ((sks->sks_ref > 0) || (count && i >= count))
break; break;
if (time_after(jiffies, sks->sks_age + skc->skc_delay * HZ) || spl_slab_free(sks, &sks_list, &sko_list);
flag) {
spl_slab_free(sks, &sks_list, &sko_list);
i++;
}
} }
spin_unlock(&skc->skc_lock); spin_unlock(&skc->skc_lock);
@ -633,7 +638,7 @@ spl_cache_age(void *data)
if (!(skc->skc_flags & KMC_NOMAGAZINE)) if (!(skc->skc_flags & KMC_NOMAGAZINE))
on_each_cpu(spl_magazine_age, skc, 1); on_each_cpu(spl_magazine_age, skc, 1);
spl_slab_reclaim(skc, skc->skc_reap, 0); spl_slab_reclaim(skc);
while (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags) && !id) { while (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags) && !id) {
id = taskq_dispatch_delay( id = taskq_dispatch_delay(
@ -710,6 +715,9 @@ spl_magazine_size(spl_kmem_cache_t *skc)
uint32_t obj_size = spl_obj_size(skc); uint32_t obj_size = spl_obj_size(skc);
int size; int size;
if (spl_kmem_cache_magazine_size > 0)
return (MAX(MIN(spl_kmem_cache_magazine_size, 256), 2));
/* Per-magazine sizes below assume a 4Kib page size */ /* Per-magazine sizes below assume a 4Kib page size */
if (obj_size > (PAGE_SIZE * 256)) if (obj_size > (PAGE_SIZE * 256))
size = 4; /* Minimum 4Mib per-magazine */ size = 4; /* Minimum 4Mib per-magazine */
@ -1030,7 +1038,7 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
if (skc->skc_flags & (KMC_KMEM | KMC_VMEM)) { if (skc->skc_flags & (KMC_KMEM | KMC_VMEM)) {
spl_magazine_destroy(skc); spl_magazine_destroy(skc);
spl_slab_reclaim(skc, 0, 1); spl_slab_reclaim(skc);
} else { } else {
ASSERT(skc->skc_flags & KMC_SLAB); ASSERT(skc->skc_flags & KMC_SLAB);
kmem_cache_destroy(skc->skc_linux_cache); kmem_cache_destroy(skc->skc_linux_cache);
@ -1433,6 +1441,7 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
{ {
spl_kmem_magazine_t *skm; spl_kmem_magazine_t *skm;
unsigned long flags; unsigned long flags;
int do_reclaim = 0;
ASSERT(skc->skc_magic == SKC_MAGIC); ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags)); ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
@ -1473,14 +1482,23 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
skm = skc->skc_mag[smp_processor_id()]; skm = skc->skc_mag[smp_processor_id()];
ASSERT(skm->skm_magic == SKM_MAGIC); ASSERT(skm->skm_magic == SKM_MAGIC);
/* Per-CPU cache full, flush it to make space */ /*
if (unlikely(skm->skm_avail >= skm->skm_size)) * Per-CPU cache full, flush it to make space for this object,
* this may result in an empty slab which can be reclaimed once
* interrupts are re-enabled.
*/
if (unlikely(skm->skm_avail >= skm->skm_size)) {
spl_cache_flush(skc, skm, skm->skm_refill); spl_cache_flush(skc, skm, skm->skm_refill);
do_reclaim = 1;
}
/* Available space in cache, use it */ /* Available space in cache, use it */
skm->skm_objs[skm->skm_avail++] = obj; skm->skm_objs[skm->skm_avail++] = obj;
local_irq_restore(flags); local_irq_restore(flags);
if (do_reclaim)
spl_slab_reclaim(skc);
out: out:
atomic_dec(&skc->skc_ref); atomic_dec(&skc->skc_ref);
} }
@ -1621,7 +1639,7 @@ spl_kmem_cache_reap_now(spl_kmem_cache_t *skc, int count)
} while (do_reclaim); } while (do_reclaim);
} }
/* Reclaim from the magazine then the slabs ignoring age and delay. */ /* Reclaim from the magazine and free all now empty slabs. */
if (spl_kmem_cache_expire & KMC_EXPIRE_MEM) { if (spl_kmem_cache_expire & KMC_EXPIRE_MEM) {
spl_kmem_magazine_t *skm; spl_kmem_magazine_t *skm;
unsigned long irq_flags; unsigned long irq_flags;
@ -1632,7 +1650,7 @@ spl_kmem_cache_reap_now(spl_kmem_cache_t *skc, int count)
local_irq_restore(irq_flags); local_irq_restore(irq_flags);
} }
spl_slab_reclaim(skc, count, 1); spl_slab_reclaim(skc);
clear_bit_unlock(KMC_BIT_REAPING, &skc->skc_flags); clear_bit_unlock(KMC_BIT_REAPING, &skc->skc_flags);
smp_mb__after_atomic(); smp_mb__after_atomic();
wake_up_bit(&skc->skc_flags, KMC_BIT_REAPING); wake_up_bit(&skc->skc_flags, KMC_BIT_REAPING);