Add spl_kmem_cache_reclaim module option

The correct behavior for all registered shrinkers is to return the
number of objects in their cache.  In theory this allows the Linux
VM to balance memory reclaim across all registered caches.

In commit b9b3715 this behavior was disabled in favor of returning
-1 which notifies the VM that no additional objects are available
for reclaim.  This was done as a workaround to resolve thrashing
in shrink_slabs() which could occur when memory was low and numerous
core where in reclaim.  Unfortunately, this has been observed to
increase the likelihood of OOM events when SPL slab consumers are
responsible for consuming the majority of memory.

Therefore, this patch makes this behavior tunable.  Setting the
spl_kmem_cache_reclaim module option to 0x1 will result in the
shrinker only being called once.  This is the default behavior.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Prakash Surya <surya1@llnl.gov>
Closes #358
This commit is contained in:
Brian Behlendorf 2014-05-16 09:29:21 -07:00
parent a073aeb060
commit 376dc35e22
2 changed files with 22 additions and 9 deletions

View File

@ -385,6 +385,8 @@ typedef enum kmem_cbrc {
#define KMC_EXPIRE_AGE 0x1 /* Due to age */ #define KMC_EXPIRE_AGE 0x1 /* Due to age */
#define KMC_EXPIRE_MEM 0x2 /* Due to low memory */ #define KMC_EXPIRE_MEM 0x2 /* Due to low memory */
#define KMC_RECLAIM_ONCE 0x1 /* Force a single shrinker pass */
extern unsigned int spl_kmem_cache_expire; extern unsigned int spl_kmem_cache_expire;
extern struct list_head spl_kmem_cache_list; extern struct list_head spl_kmem_cache_list;
extern struct rw_semaphore spl_kmem_cache_sem; extern struct rw_semaphore spl_kmem_cache_sem;

View File

@ -57,6 +57,16 @@ EXPORT_SYMBOL(spl_kmem_cache_expire);
module_param(spl_kmem_cache_expire, uint, 0644); module_param(spl_kmem_cache_expire, uint, 0644);
MODULE_PARM_DESC(spl_kmem_cache_expire, "By age (0x1) or low memory (0x2)"); MODULE_PARM_DESC(spl_kmem_cache_expire, "By age (0x1) or low memory (0x2)");
/*
* KMC_RECLAIM_ONCE is set as the default until zfsonlinux/spl#268 is
* definitively resolved. Depending on the system configuration and
* workload this may increase the likelihood of out of memory events.
* For those cases it is advised that this option be set to zero.
*/
unsigned int spl_kmem_cache_reclaim = KMC_RECLAIM_ONCE;
module_param(spl_kmem_cache_reclaim, uint, 0644);
MODULE_PARM_DESC(spl_kmem_cache_reclaim, "Single reclaim pass (0x1)");
unsigned int spl_kmem_cache_obj_per_slab = SPL_KMEM_CACHE_OBJ_PER_SLAB; unsigned int spl_kmem_cache_obj_per_slab = SPL_KMEM_CACHE_OBJ_PER_SLAB;
module_param(spl_kmem_cache_obj_per_slab, uint, 0644); module_param(spl_kmem_cache_obj_per_slab, uint, 0644);
MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab, "Number of objects per slab"); MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab, "Number of objects per slab");
@ -2235,7 +2245,7 @@ __spl_kmem_cache_generic_shrinker(struct shrinker *shrink,
struct shrink_control *sc) struct shrink_control *sc)
{ {
spl_kmem_cache_t *skc; spl_kmem_cache_t *skc;
int unused = 0; int alloc = 0;
down_read(&spl_kmem_cache_sem); down_read(&spl_kmem_cache_sem);
list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) { list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
@ -2244,24 +2254,25 @@ __spl_kmem_cache_generic_shrinker(struct shrinker *shrink,
MAX(sc->nr_to_scan >> fls64(skc->skc_slab_objs), 1)); MAX(sc->nr_to_scan >> fls64(skc->skc_slab_objs), 1));
/* /*
* Presume everything alloc'ed in reclaimable, this ensures * Presume everything alloc'ed is reclaimable, this ensures
* we are called again with nr_to_scan > 0 so can try and * we are called again with nr_to_scan > 0 so can try and
* reclaim. The exact number is not important either so * reclaim. The exact number is not important either so
* we forgo taking this already highly contented lock. * we forgo taking this already highly contented lock.
*/ */
unused += skc->skc_obj_alloc; alloc += skc->skc_obj_alloc;
} }
up_read(&spl_kmem_cache_sem); up_read(&spl_kmem_cache_sem);
/* /*
* After performing reclaim always return -1 to indicate we cannot * When KMC_RECLAIM_ONCE is set allow only a single reclaim pass.
* perform additional reclaim. This prevents shrink_slabs() from * This functionality only exists to work around a rare issue where
* repeatedly invoking this generic shrinker and potentially spinning. * shrink_slabs() is repeatedly invoked by many cores causing the
* system to thrash.
*/ */
if (sc->nr_to_scan) if ((spl_kmem_cache_reclaim & KMC_RECLAIM_ONCE) && sc->nr_to_scan)
return -1; return (-1);
return unused; return MAX((alloc * sysctl_vfs_cache_pressure) / 100, 0);
} }
SPL_SHRINKER_CALLBACK_WRAPPER(spl_kmem_cache_generic_shrinker); SPL_SHRINKER_CALLBACK_WRAPPER(spl_kmem_cache_generic_shrinker);