Implement per-cpu local caches. This seems to have bough me another

factor of 10x improvement on SMP system due to reduced lock contention.
This may put me in the ballpark of what is needed.  We can still further
improve things on NUMA systems by creating an additional L3 cache per 
memory node instead of the current global pool.  With luck this won't
be needed.  I should also take another look at the locking now that
everything is working.  There's a good chance I can tighten it up a
little bit and improve things a little more.

   kmem_lock: time (sec)        slabs           objs            hash
   kmem_lock:                   tot/max/calc    tot/max/calc    size/depth
   kmem_lock:  0.000999926      6/6/1           192/192/32      32768/0
   kmem_lock:  0.000999926      4/4/2           128/128/64      32768/0
   kmem_lock:  0.000999926      4/4/4           128/128/128     32768/0
   kmem_lock:  0.000999926      4/4/8           128/128/256     32768/0
   kmem_lock:  0.000999926      4/4/16          128/128/512     32768/0
   kmem_lock:  0.000999926      4/4/32          128/128/1024    32768/0
   kmem_lock:  0.000999926      4/4/64          128/128/2048    32768/0
   kmem_lock:  0.000999926      8/8/128         256/256/4096    32768/0
   kmem_lock:  0.003999704      24/23/256       768/736/8192    32768/1
   kmem_lock:  0.012999038      44/41/512       1408/1312/16384 32768/1
   kmem_lock:  0.051996153      96/93/1024      3072/2976/32768 32768/2
   kmem_lock:  0.181986536      187/184/2048    5984/5888/65536 32768/3
   kmem_lock:  0.655951469      342/339/4096    10944/10848/131072 32768/4



git-svn-id: https://outreach.scidac.gov/svn/spl/trunk@136 7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c
This commit is contained in:
behlendo 2008-06-25 20:57:45 +00:00
parent d46630e0f3
commit 4afaaefa05
5 changed files with 427 additions and 169 deletions

View File

@ -360,6 +360,7 @@ kmem_debugging(void)
extern int kmem_set_warning(int flag); extern int kmem_set_warning(int flag);
#define SKM_MAGIC 0x2e2e2e2e
#define SKO_MAGIC 0x20202020 #define SKO_MAGIC 0x20202020
#define SKS_MAGIC 0x22222222 #define SKS_MAGIC 0x22222222
#define SKC_MAGIC 0x2c2c2c2c #define SKC_MAGIC 0x2c2c2c2c
@ -376,6 +377,15 @@ typedef int (*spl_kmem_ctor_t)(void *, void *, int);
typedef void (*spl_kmem_dtor_t)(void *, void *); typedef void (*spl_kmem_dtor_t)(void *, void *);
typedef void (*spl_kmem_reclaim_t)(void *); typedef void (*spl_kmem_reclaim_t)(void *);
typedef struct spl_kmem_magazine {
uint32_t skm_magic; /* Sanity magic */
uint32_t skm_avail; /* Available objects */
uint32_t skm_size; /* Magazine size */
uint32_t skm_refill; /* Batch refill size */
unsigned long skm_age; /* Last cache access */
void *skm_objs[0]; /* Object pointers */
} spl_kmem_magazine_t;
typedef struct spl_kmem_obj { typedef struct spl_kmem_obj {
uint32_t sko_magic; /* Sanity magic */ uint32_t sko_magic; /* Sanity magic */
uint32_t sko_flags; /* Per object flags */ uint32_t sko_flags; /* Per object flags */
@ -392,13 +402,16 @@ typedef struct spl_kmem_slab {
struct list_head sks_list; /* Slab list linkage */ struct list_head sks_list; /* Slab list linkage */
struct list_head sks_free_list; /* Free object list */ struct list_head sks_free_list; /* Free object list */
unsigned long sks_age; /* Last modify jiffie */ unsigned long sks_age; /* Last modify jiffie */
atomic_t sks_ref; /* Ref count used objects */ uint32_t sks_ref; /* Ref count used objects */
} spl_kmem_slab_t; } spl_kmem_slab_t;
typedef struct spl_kmem_cache { typedef struct spl_kmem_cache {
uint32_t skc_magic; /* Sanity magic */ uint32_t skc_magic; /* Sanity magic */
uint32_t skc_name_size; /* Name length */ uint32_t skc_name_size; /* Name length */
char *skc_name; /* Name string */ char *skc_name; /* Name string */
spl_kmem_magazine_t *skc_mag[NR_CPUS]; /* Per-CPU warm cache */
uint32_t skc_mag_size; /* Magazine size */
uint32_t skc_mag_refill; /* Magazine refill count */
spl_kmem_ctor_t skc_ctor; /* Constructor */ spl_kmem_ctor_t skc_ctor; /* Constructor */
spl_kmem_dtor_t skc_dtor; /* Destructor */ spl_kmem_dtor_t skc_dtor; /* Destructor */
spl_kmem_reclaim_t skc_reclaim; /* Reclaimator */ spl_kmem_reclaim_t skc_reclaim; /* Reclaimator */
@ -427,8 +440,8 @@ typedef struct spl_kmem_cache {
uint64_t skc_obj_total; /* Obj total current */ uint64_t skc_obj_total; /* Obj total current */
uint64_t skc_obj_alloc; /* Obj alloc current */ uint64_t skc_obj_alloc; /* Obj alloc current */
uint64_t skc_obj_max; /* Obj max historic */ uint64_t skc_obj_max; /* Obj max historic */
uint64_t skc_hash_depth; /* Hash depth */ uint64_t skc_hash_depth; /* Lazy hash depth */
uint64_t skc_hash_max; /* Hash depth max */ uint64_t skc_hash_count; /* Hash entries current */
} spl_kmem_cache_t; } spl_kmem_cache_t;
extern spl_kmem_cache_t * extern spl_kmem_cache_t *

View File

@ -109,13 +109,10 @@ EXPORT_SYMBOL(kmem_set_warning);
* small virtual address space on 32bit arches. This will seriously * small virtual address space on 32bit arches. This will seriously
* constrain the size of the slab caches and their performance. * constrain the size of the slab caches and their performance.
* *
* XXX: Refactor the below code in to smaller functions. This works
* for a first pass but each function is doing to much.
*
* XXX: Implement SPL proc interface to export full per cache stats. * XXX: Implement SPL proc interface to export full per cache stats.
* *
* XXX: Implement work requests to keep an eye on each cache and * XXX: Implement work requests to keep an eye on each cache and
* shrink them via slab_reclaim() when they are wasting lots * shrink them via spl_slab_reclaim() when they are wasting lots
* of space. Currently this process is driven by the reapers. * of space. Currently this process is driven by the reapers.
* *
* XXX: Implement proper small cache object support by embedding * XXX: Implement proper small cache object support by embedding
@ -138,6 +135,8 @@ EXPORT_SYMBOL(kmem_set_warning);
* *
* XXX: Slab coloring may also yield performance improvements and would * XXX: Slab coloring may also yield performance improvements and would
* be desirable to implement. * be desirable to implement.
*
* XXX: Proper hardware cache alignment would be good too.
*/ */
/* Ensure the __kmem_cache_create/__kmem_cache_destroy macros are /* Ensure the __kmem_cache_create/__kmem_cache_destroy macros are
@ -155,18 +154,22 @@ static struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */
static kmem_cache_t *spl_slab_cache; /* Cache for slab structs */ static kmem_cache_t *spl_slab_cache; /* Cache for slab structs */
static kmem_cache_t *spl_obj_cache; /* Cache for obj structs */ static kmem_cache_t *spl_obj_cache; /* Cache for obj structs */
static int spl_cache_flush(spl_kmem_cache_t *skc,
spl_kmem_magazine_t *skm, int flush);
#ifdef HAVE_SET_SHRINKER #ifdef HAVE_SET_SHRINKER
static struct shrinker *spl_kmem_cache_shrinker; static struct shrinker *spl_kmem_cache_shrinker;
#else #else
static int kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask); static int spl_kmem_cache_generic_shrinker(int nr_to_scan,
unsigned int gfp_mask);
static struct shrinker spl_kmem_cache_shrinker = { static struct shrinker spl_kmem_cache_shrinker = {
.shrink = kmem_cache_generic_shrinker, .shrink = spl_kmem_cache_generic_shrinker,
.seeks = KMC_DEFAULT_SEEKS, .seeks = KMC_DEFAULT_SEEKS,
}; };
#endif #endif
static spl_kmem_slab_t * static spl_kmem_slab_t *
slab_alloc(spl_kmem_cache_t *skc, int flags) { spl_slab_alloc(spl_kmem_cache_t *skc, int flags) {
spl_kmem_slab_t *sks; spl_kmem_slab_t *sks;
spl_kmem_obj_t *sko, *n; spl_kmem_obj_t *sko, *n;
int i; int i;
@ -182,7 +185,7 @@ slab_alloc(spl_kmem_cache_t *skc, int flags) {
sks->sks_cache = skc; sks->sks_cache = skc;
INIT_LIST_HEAD(&sks->sks_list); INIT_LIST_HEAD(&sks->sks_list);
INIT_LIST_HEAD(&sks->sks_free_list); INIT_LIST_HEAD(&sks->sks_free_list);
atomic_set(&sks->sks_ref, 0); sks->sks_ref = 0;
for (i = 0; i < sks->sks_objs; i++) { for (i = 0; i < sks->sks_objs; i++) {
sko = kmem_cache_alloc(spl_obj_cache, flags); sko = kmem_cache_alloc(spl_obj_cache, flags);
@ -224,21 +227,19 @@ out:
* be called with the 'skc->skc_lock' held. * be called with the 'skc->skc_lock' held.
* */ * */
static void static void
slab_free(spl_kmem_slab_t *sks) { spl_slab_free(spl_kmem_slab_t *sks) {
spl_kmem_cache_t *skc; spl_kmem_cache_t *skc;
spl_kmem_obj_t *sko, *n; spl_kmem_obj_t *sko, *n;
int i = 0; int i = 0;
ENTRY; ENTRY;
ASSERT(sks->sks_magic == SKS_MAGIC); ASSERT(sks->sks_magic == SKS_MAGIC);
ASSERT(atomic_read(&sks->sks_ref) == 0); ASSERT(sks->sks_ref == 0);
skc = sks->sks_cache; skc = sks->sks_cache;
skc->skc_obj_total -= sks->sks_objs; skc->skc_obj_total -= sks->sks_objs;
skc->skc_slab_total--; skc->skc_slab_total--;
//#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
ASSERT(spin_is_locked(&skc->skc_lock)); ASSERT(spin_is_locked(&skc->skc_lock));
//#endif
list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) { list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) {
ASSERT(sko->sko_magic == SKO_MAGIC); ASSERT(sko->sko_magic == SKO_MAGIC);
@ -261,15 +262,13 @@ slab_free(spl_kmem_slab_t *sks) {
} }
static int static int
__slab_reclaim(spl_kmem_cache_t *skc) __spl_slab_reclaim(spl_kmem_cache_t *skc)
{ {
spl_kmem_slab_t *sks, *m; spl_kmem_slab_t *sks, *m;
int rc = 0; int rc = 0;
ENTRY; ENTRY;
//#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
ASSERT(spin_is_locked(&skc->skc_lock)); ASSERT(spin_is_locked(&skc->skc_lock));
//#endif
/* /*
* Free empty slabs which have not been touched in skc_delay * Free empty slabs which have not been touched in skc_delay
* seconds. This delay time is important to avoid thrashing. * seconds. This delay time is important to avoid thrashing.
@ -277,11 +276,11 @@ __slab_reclaim(spl_kmem_cache_t *skc)
*/ */
list_for_each_entry_safe_reverse(sks, m, &skc->skc_partial_list, list_for_each_entry_safe_reverse(sks, m, &skc->skc_partial_list,
sks_list) { sks_list) {
if (atomic_read(&sks->sks_ref) > 0) if (sks->sks_ref > 0)
break; break;
if (time_after(jiffies, sks->sks_age + skc->skc_delay * HZ)) { if (time_after(jiffies, sks->sks_age + skc->skc_delay * HZ)) {
slab_free(sks); spl_slab_free(sks);
rc++; rc++;
} }
} }
@ -291,18 +290,110 @@ __slab_reclaim(spl_kmem_cache_t *skc)
} }
static int static int
slab_reclaim(spl_kmem_cache_t *skc) spl_slab_reclaim(spl_kmem_cache_t *skc)
{ {
int rc; int rc;
ENTRY; ENTRY;
spin_lock(&skc->skc_lock); spin_lock(&skc->skc_lock);
rc = __slab_reclaim(skc); rc = __spl_slab_reclaim(skc);
spin_unlock(&skc->skc_lock); spin_unlock(&skc->skc_lock);
RETURN(rc); RETURN(rc);
} }
static int
spl_magazine_size(spl_kmem_cache_t *skc)
{
int size;
ENTRY;
/* Guesses for reasonable magazine sizes, they
* should really adapt based on observed usage. */
if (skc->skc_obj_size > (PAGE_SIZE * 256))
size = 1;
else if (skc->skc_obj_size > (PAGE_SIZE * 32))
size = 4;
else if (skc->skc_obj_size > (PAGE_SIZE))
size = 16;
else if (skc->skc_obj_size > (PAGE_SIZE / 4))
size = 32;
else if (skc->skc_obj_size > (PAGE_SIZE / 16))
size = 64;
else
size = 128;
RETURN(size);
}
static spl_kmem_magazine_t *
spl_magazine_alloc(spl_kmem_cache_t *skc, int node)
{
spl_kmem_magazine_t *skm;
int size = sizeof(spl_kmem_magazine_t) +
sizeof(void *) * skc->skc_mag_size;
ENTRY;
skm = kmalloc_node(size, GFP_KERNEL, node);
if (skm) {
skm->skm_magic = SKM_MAGIC;
skm->skm_avail = 0;
skm->skm_size = skc->skc_mag_size;
skm->skm_refill = skc->skc_mag_refill;
skm->skm_age = jiffies;
}
RETURN(skm);
}
static void
spl_magazine_free(spl_kmem_magazine_t *skm)
{
ENTRY;
ASSERT(skm->skm_magic == SKM_MAGIC);
ASSERT(skm->skm_avail == 0);
kfree(skm);
EXIT;
}
static int
spl_magazine_create(spl_kmem_cache_t *skc)
{
int i;
ENTRY;
skc->skc_mag_size = spl_magazine_size(skc);
skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2;
for_each_online_cpu(i) {
skc->skc_mag[i] = spl_magazine_alloc(skc, cpu_to_node(i));
if (!skc->skc_mag[i]) {
for (i--; i >= 0; i--)
spl_magazine_free(skc->skc_mag[i]);
RETURN(-ENOMEM);
}
}
RETURN(0);
}
static void
spl_magazine_destroy(spl_kmem_cache_t *skc)
{
spl_kmem_magazine_t *skm;
int i;
ENTRY;
for_each_online_cpu(i) {
skm = skc->skc_mag[i];
(void)spl_cache_flush(skc, skm, skm->skm_avail);
spl_magazine_free(skm);
}
EXIT;
}
spl_kmem_cache_t * spl_kmem_cache_t *
spl_kmem_cache_create(char *name, size_t size, size_t align, spl_kmem_cache_create(char *name, size_t size, size_t align,
spl_kmem_ctor_t ctor, spl_kmem_ctor_t ctor,
@ -311,7 +402,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
void *priv, void *vmp, int flags) void *priv, void *vmp, int flags)
{ {
spl_kmem_cache_t *skc; spl_kmem_cache_t *skc;
int i, kmem_flags = KM_SLEEP; int i, rc, kmem_flags = KM_SLEEP;
ENTRY; ENTRY;
/* We may be called when there is a non-zero preempt_count or /* We may be called when there is a non-zero preempt_count or
@ -326,7 +417,6 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
RETURN(NULL); RETURN(NULL);
skc->skc_magic = SKC_MAGIC; skc->skc_magic = SKC_MAGIC;
skc->skc_name_size = strlen(name) + 1; skc->skc_name_size = strlen(name) + 1;
skc->skc_name = (char *)kmem_alloc(skc->skc_name_size, kmem_flags); skc->skc_name = (char *)kmem_alloc(skc->skc_name_size, kmem_flags);
if (skc->skc_name == NULL) { if (skc->skc_name == NULL) {
@ -355,6 +445,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
if (skc->skc_hash == NULL) { if (skc->skc_hash == NULL) {
kmem_free(skc->skc_name, skc->skc_name_size); kmem_free(skc->skc_name, skc->skc_name_size);
kmem_free(skc, sizeof(*skc)); kmem_free(skc, sizeof(*skc));
RETURN(NULL);
} }
for (i = 0; i < skc->skc_hash_elts; i++) for (i = 0; i < skc->skc_hash_elts; i++)
@ -374,7 +465,15 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
skc->skc_obj_alloc = 0; skc->skc_obj_alloc = 0;
skc->skc_obj_max = 0; skc->skc_obj_max = 0;
skc->skc_hash_depth = 0; skc->skc_hash_depth = 0;
skc->skc_hash_max = 0; skc->skc_hash_count = 0;
rc = spl_magazine_create(skc);
if (rc) {
kmem_free(skc->skc_hash, skc->skc_hash_size);
kmem_free(skc->skc_name, skc->skc_name_size);
kmem_free(skc, sizeof(*skc));
RETURN(NULL);
}
down_write(&spl_kmem_cache_sem); down_write(&spl_kmem_cache_sem);
list_add_tail(&skc->skc_list, &spl_kmem_cache_list); list_add_tail(&skc->skc_list, &spl_kmem_cache_list);
@ -385,8 +484,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
EXPORT_SYMBOL(spl_kmem_cache_create); EXPORT_SYMBOL(spl_kmem_cache_create);
/* The caller must ensure there are no racing calls to /* The caller must ensure there are no racing calls to
* spl_kmem_cache_alloc() for this spl_kmem_cache_t when * spl_kmem_cache_alloc() for this spl_kmem_cache_t.
* it is being destroyed.
*/ */
void void
spl_kmem_cache_destroy(spl_kmem_cache_t *skc) spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
@ -398,20 +496,22 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
list_del_init(&skc->skc_list); list_del_init(&skc->skc_list);
up_write(&spl_kmem_cache_sem); up_write(&spl_kmem_cache_sem);
spl_magazine_destroy(skc);
spin_lock(&skc->skc_lock); spin_lock(&skc->skc_lock);
/* Validate there are no objects in use and free all the /* Validate there are no objects in use and free all the
* spl_kmem_slab_t, spl_kmem_obj_t, and object buffers. * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers. */
*/
ASSERT(list_empty(&skc->skc_complete_list)); ASSERT(list_empty(&skc->skc_complete_list));
ASSERTF(skc->skc_hash_count == 0, "skc->skc_hash_count=%d\n",
skc->skc_hash_count);
list_for_each_entry_safe(sks, m, &skc->skc_partial_list, sks_list) list_for_each_entry_safe(sks, m, &skc->skc_partial_list, sks_list)
slab_free(sks); spl_slab_free(sks);
kmem_free(skc->skc_hash, skc->skc_hash_size); kmem_free(skc->skc_hash, skc->skc_hash_size);
kmem_free(skc->skc_name, skc->skc_name_size); kmem_free(skc->skc_name, skc->skc_name_size);
kmem_free(skc, sizeof(*skc));
spin_unlock(&skc->skc_lock); spin_unlock(&skc->skc_lock);
kmem_free(skc, sizeof(*skc));
EXIT; EXIT;
} }
@ -427,88 +527,92 @@ spl_hash_ptr(void *ptr, unsigned int bits)
return hash_long((unsigned long)ptr >> PAGE_SHIFT, bits); return hash_long((unsigned long)ptr >> PAGE_SHIFT, bits);
} }
#ifndef list_first_entry static spl_kmem_obj_t *
#define list_first_entry(ptr, type, member) \ spl_hash_obj(spl_kmem_cache_t *skc, void *obj)
list_entry((ptr)->next, type, member) {
#endif struct hlist_node *node;
spl_kmem_obj_t *sko = NULL;
unsigned long key = spl_hash_ptr(obj, skc->skc_hash_bits);
int i = 0;
void * ASSERT(spin_is_locked(&skc->skc_lock));
spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
hlist_for_each_entry(sko, node, &skc->skc_hash[key], sko_hlist) {
if (unlikely((++i) > skc->skc_hash_depth))
skc->skc_hash_depth = i;
if (sko->sko_addr == obj) {
ASSERT(sko->sko_magic == SKO_MAGIC);
RETURN(sko);
}
}
RETURN(NULL);
}
static void *
spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
{
spl_kmem_obj_t *sko;
unsigned long key;
ASSERT(spin_is_locked(&skc->skc_lock));
sko = list_entry((&sks->sks_free_list)->next,spl_kmem_obj_t,sko_list);
ASSERT(sko->sko_magic == SKO_MAGIC);
ASSERT(sko->sko_addr != NULL);
/* Remove from sks_free_list and add to used hash */
list_del_init(&sko->sko_list);
key = spl_hash_ptr(sko->sko_addr, skc->skc_hash_bits);
hlist_add_head(&sko->sko_hlist, &skc->skc_hash[key]);
sks->sks_age = jiffies;
sks->sks_ref++;
skc->skc_obj_alloc++;
skc->skc_hash_count++;
/* Track max obj usage statistics */
if (skc->skc_obj_alloc > skc->skc_obj_max)
skc->skc_obj_max = skc->skc_obj_alloc;
/* Track max slab usage statistics */
if (sks->sks_ref == 1) {
skc->skc_slab_alloc++;
if (skc->skc_slab_alloc > skc->skc_slab_max)
skc->skc_slab_max = skc->skc_slab_alloc;
}
return sko->sko_addr;
}
/* No available objects create a new slab. Since this is an
* expensive operation we do it without holding the spinlock
* and only briefly aquire it when we link in the fully
* allocated and constructed slab.
*/
static spl_kmem_slab_t *
spl_cache_grow(spl_kmem_cache_t *skc, int flags)
{ {
spl_kmem_slab_t *sks; spl_kmem_slab_t *sks;
spl_kmem_obj_t *sko; spl_kmem_obj_t *sko;
void *obj;
unsigned long key;
ENTRY; ENTRY;
spin_lock(&skc->skc_lock); if (flags & __GFP_WAIT) {
restart: flags |= __GFP_NOFAIL;
/* Check for available objects from the partial slabs */ might_sleep();
if (!list_empty(&skc->skc_partial_list)) { local_irq_enable();
sks = list_first_entry(&skc->skc_partial_list,
spl_kmem_slab_t, sks_list);
ASSERT(sks->sks_magic == SKS_MAGIC);
ASSERT(atomic_read(&sks->sks_ref) < sks->sks_objs);
ASSERT(!list_empty(&sks->sks_free_list));
sko = list_first_entry(&sks->sks_free_list,
spl_kmem_obj_t, sko_list);
ASSERT(sko->sko_magic == SKO_MAGIC);
ASSERT(sko->sko_addr != NULL);
/* Remove from sks_free_list, add to used hash */
list_del_init(&sko->sko_list);
key = spl_hash_ptr(sko->sko_addr, skc->skc_hash_bits);
hlist_add_head(&sko->sko_hlist, &skc->skc_hash[key]);
sks->sks_age = jiffies;
atomic_inc(&sks->sks_ref);
skc->skc_obj_alloc++;
if (skc->skc_obj_alloc > skc->skc_obj_max)
skc->skc_obj_max = skc->skc_obj_alloc;
if (atomic_read(&sks->sks_ref) == 1) {
skc->skc_slab_alloc++;
if (skc->skc_slab_alloc > skc->skc_slab_max)
skc->skc_slab_max = skc->skc_slab_alloc;
}
/* Move slab to skc_complete_list when full */
if (atomic_read(&sks->sks_ref) == sks->sks_objs) {
list_del(&sks->sks_list);
list_add(&sks->sks_list, &skc->skc_complete_list);
}
GOTO(out_lock, obj = sko->sko_addr);
} }
spin_unlock(&skc->skc_lock); sks = spl_slab_alloc(skc, flags);
if (sks == NULL) {
if (flags & __GFP_WAIT)
local_irq_disable();
/* No available objects create a new slab. Since this is an RETURN(NULL);
* expensive operation we do it without holding the semaphore }
* and only briefly aquire it when we link in the fully
* allocated and constructed slab.
*/
/* Under Solaris if the KM_SLEEP flag is passed we may never
* fail, so sleep as long as needed. Additionally, since we are
* using vmem_alloc() KM_NOSLEEP is not an option and we must
* fail. Shifting to allocating our own pages and mapping the
* virtual address space may allow us to bypass this issue.
*/
if (!flags)
flags |= KM_SLEEP;
if (flags & KM_SLEEP)
flags |= __GFP_NOFAIL;
else
GOTO(out, obj = NULL);
sks = slab_alloc(skc, flags);
if (sks == NULL)
GOTO(out, obj = NULL);
/* Run all the constructors now that the slab is fully allocated */ /* Run all the constructors now that the slab is fully allocated */
list_for_each_entry(sko, &sks->sks_free_list, sko_list) { list_for_each_entry(sko, &sks->sks_free_list, sko_list) {
@ -518,18 +622,171 @@ restart:
skc->skc_ctor(sko->sko_addr, skc->skc_private, flags); skc->skc_ctor(sko->sko_addr, skc->skc_private, flags);
} }
/* Link the newly created slab in to the skc_partial_list, if (flags & __GFP_WAIT)
* and retry the allocation which will now succeed. local_irq_disable();
*/
/* Link the new empty slab in to the end of skc_partial_list */
spin_lock(&skc->skc_lock); spin_lock(&skc->skc_lock);
skc->skc_slab_total++; skc->skc_slab_total++;
skc->skc_obj_total += sks->sks_objs; skc->skc_obj_total += sks->sks_objs;
list_add_tail(&sks->sks_list, &skc->skc_partial_list); list_add_tail(&sks->sks_list, &skc->skc_partial_list);
GOTO(restart, obj = NULL); spin_unlock(&skc->skc_lock);
RETURN(sks);
}
static int
spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags)
{
spl_kmem_slab_t *sks;
int refill = skm->skm_refill;
ENTRY;
/* XXX: Check for refill bouncing by age perhaps */
spin_lock(&skc->skc_lock);
while (refill > 0) {
/* No slabs available we must grow the cache */
if (list_empty(&skc->skc_partial_list)) {
spin_unlock(&skc->skc_lock);
sks = spl_cache_grow(skc, flags);
if (!sks)
GOTO(out, refill);
/* Rescheduled to different CPU skm is not local */
if (skm != skc->skc_mag[smp_processor_id()])
GOTO(out, refill);
spin_lock(&skc->skc_lock);
continue;
}
/* Grab the next available slab */
sks = list_entry((&skc->skc_partial_list)->next,
spl_kmem_slab_t, sks_list);
ASSERT(sks->sks_magic == SKS_MAGIC);
ASSERT(sks->sks_ref < sks->sks_objs);
ASSERT(!list_empty(&sks->sks_free_list));
/* Consume as many objects as needed to refill the requested
* cache. We must be careful to lock here because our local
* magazine may not be local anymore due to spl_cache_grow. */
while ((sks->sks_ref < sks->sks_objs) && (refill-- > 0))
skm->skm_objs[skm->skm_avail++]=spl_cache_obj(skc,sks);
/* Move slab to skc_complete_list when full */
if (sks->sks_ref == sks->sks_objs) {
list_del(&sks->sks_list);
list_add(&sks->sks_list, &skc->skc_complete_list);
}
}
out_lock:
spin_unlock(&skc->skc_lock); spin_unlock(&skc->skc_lock);
out: out:
/* Returns the number of entries added to cache */
RETURN(skm->skm_refill - refill);
}
static void
spl_cache_shrink(spl_kmem_cache_t *skc, void *obj)
{
spl_kmem_slab_t *sks = NULL;
spl_kmem_obj_t *sko = NULL;
ENTRY;
ASSERT(spin_is_locked(&skc->skc_lock));
sko = spl_hash_obj(skc, obj);
ASSERTF(sko, "Obj %p missing from in-use hash (%d) for cache %s\n",
obj, skc->skc_hash_count, skc->skc_name);
sks = sko->sko_slab;
ASSERTF(sks, "Obj %p/%p linked to invalid slab for cache %s\n",
obj, sko, skc->skc_name);
ASSERT(sks->sks_cache == skc);
hlist_del_init(&sko->sko_hlist);
list_add(&sko->sko_list, &sks->sks_free_list);
sks->sks_age = jiffies;
sks->sks_ref--;
skc->skc_obj_alloc--;
skc->skc_hash_count--;
/* Move slab to skc_partial_list when no longer full. Slabs
* are added to the head to keep the partial list is quasi-full
* sorted order. Fuller at the head, emptier at the tail. */
if (sks->sks_ref == (sks->sks_objs - 1)) {
list_del(&sks->sks_list);
list_add(&sks->sks_list, &skc->skc_partial_list);
}
/* Move emply slabs to the end of the partial list so
* they can be easily found and freed during reclamation. */
if (sks->sks_ref == 0) {
list_del(&sks->sks_list);
list_add_tail(&sks->sks_list, &skc->skc_partial_list);
skc->skc_slab_alloc--;
}
EXIT;
}
static int
spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush)
{
int i, count = MIN(flush, skm->skm_avail);
ENTRY;
spin_lock(&skc->skc_lock);
for (i = 0; i < count; i++)
spl_cache_shrink(skc, skm->skm_objs[i]);
__spl_slab_reclaim(skc);
skm->skm_avail -= count;
memmove(skm->skm_objs, &(skm->skm_objs[count]),
sizeof(void *) * skm->skm_avail);
spin_unlock(&skc->skc_lock);
RETURN(count);
}
void *
spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
{
spl_kmem_magazine_t *skm;
unsigned long irq_flags;
void *obj = NULL;
ENTRY;
ASSERT(flags & KM_SLEEP);
local_irq_save(irq_flags);
restart:
/* Safe to update per-cpu structure without lock, but
* in the restart case we must be careful to reaquire
* the local magazine since this may have changed
* when we need to grow the cache. */
skm = skc->skc_mag[smp_processor_id()];
if (likely(skm->skm_avail)) {
/* Object available in CPU cache, use it */
obj = skm->skm_objs[--skm->skm_avail];
skm->skm_age = jiffies;
} else {
/* Per-CPU cache empty, directly allocate from
* the slab and refill the per-CPU cache. */
(void)spl_cache_refill(skc, skm, flags);
GOTO(restart, obj = NULL);
}
local_irq_restore(irq_flags);
/* Pre-emptively migrate object to CPU L1 cache */
prefetchw(obj);
RETURN(obj); RETURN(obj);
} }
EXPORT_SYMBOL(spl_kmem_cache_alloc); EXPORT_SYMBOL(spl_kmem_cache_alloc);
@ -537,62 +794,33 @@ EXPORT_SYMBOL(spl_kmem_cache_alloc);
void void
spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj) spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
{ {
struct hlist_node *node; spl_kmem_magazine_t *skm;
spl_kmem_slab_t *sks = NULL; unsigned long flags;
spl_kmem_obj_t *sko = NULL;
unsigned long key = spl_hash_ptr(obj, skc->skc_hash_bits);
int i = 0;
ENTRY; ENTRY;
spin_lock(&skc->skc_lock); local_irq_save(flags);
hlist_for_each_entry(sko, node, &skc->skc_hash[key], sko_hlist) { /* Safe to update per-cpu structure without lock, but
* no remote memory allocation tracking is being performed
* it is entirely possible to allocate an object from one
* CPU cache and return it to another. */
skm = skc->skc_mag[smp_processor_id()];
if (unlikely((++i) > skc->skc_hash_depth)) /* Per-CPU cache full, flush it to make space */
skc->skc_hash_depth = i; if (unlikely(skm->skm_avail >= skm->skm_size))
(void)spl_cache_flush(skc, skm, skm->skm_refill);
if (sko->sko_addr == obj) { /* Available space in cache, use it */
ASSERT(sko->sko_magic == SKO_MAGIC); skm->skm_objs[skm->skm_avail++] = obj;
sks = sko->sko_slab;
break;
}
}
ASSERT(sko != NULL); /* Obj must be in hash */ local_irq_restore(flags);
ASSERT(sks != NULL); /* Obj must reference slab */
ASSERT(sks->sks_cache == skc);
hlist_del_init(&sko->sko_hlist);
list_add(&sko->sko_list, &sks->sks_free_list);
sks->sks_age = jiffies; EXIT;
atomic_dec(&sks->sks_ref);
skc->skc_obj_alloc--;
/* Move slab to skc_partial_list when no longer full. Slabs
* are added to the kead to keep the partial list is quasi
* full sorted order. Fuller at the head, emptier at the tail.
*/
if (atomic_read(&sks->sks_ref) == (sks->sks_objs - 1)) {
list_del(&sks->sks_list);
list_add(&sks->sks_list, &skc->skc_partial_list);
}
/* Move emply slabs to the end of the partial list so
* they can be easily found and freed during reclamation.
*/
if (atomic_read(&sks->sks_ref) == 0) {
list_del(&sks->sks_list);
list_add_tail(&sks->sks_list, &skc->skc_partial_list);
skc->skc_slab_alloc--;
}
__slab_reclaim(skc);
spin_unlock(&skc->skc_lock);
} }
EXPORT_SYMBOL(spl_kmem_cache_free); EXPORT_SYMBOL(spl_kmem_cache_free);
static int static int
kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask) spl_kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask)
{ {
spl_kmem_cache_t *skc; spl_kmem_cache_t *skc;
@ -619,13 +847,24 @@ kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask)
void void
spl_kmem_cache_reap_now(spl_kmem_cache_t *skc) spl_kmem_cache_reap_now(spl_kmem_cache_t *skc)
{ {
spl_kmem_magazine_t *skm;
int i;
ENTRY; ENTRY;
ASSERT(skc && skc->skc_magic == SKC_MAGIC); ASSERT(skc && skc->skc_magic == SKC_MAGIC);
if (skc->skc_reclaim) if (skc->skc_reclaim)
skc->skc_reclaim(skc->skc_private); skc->skc_reclaim(skc->skc_private);
slab_reclaim(skc); /* Ensure per-CPU caches which are idle gradually flush */
for_each_online_cpu(i) {
skm = skc->skc_mag[i];
if (time_after(jiffies, skm->skm_age + skc->skc_delay * HZ))
(void)spl_cache_flush(skc, skm, skm->skm_refill);
}
spl_slab_reclaim(skc);
EXIT; EXIT;
} }
EXPORT_SYMBOL(spl_kmem_cache_reap_now); EXPORT_SYMBOL(spl_kmem_cache_reap_now);
@ -633,7 +872,7 @@ EXPORT_SYMBOL(spl_kmem_cache_reap_now);
void void
spl_kmem_reap(void) spl_kmem_reap(void)
{ {
kmem_cache_generic_shrinker(KMC_REAP_CHUNK, GFP_KERNEL); spl_kmem_cache_generic_shrinker(KMC_REAP_CHUNK, GFP_KERNEL);
} }
EXPORT_SYMBOL(spl_kmem_reap); EXPORT_SYMBOL(spl_kmem_reap);
@ -663,7 +902,7 @@ spl_kmem_init(void)
#ifdef HAVE_SET_SHRINKER #ifdef HAVE_SET_SHRINKER
spl_kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS, spl_kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS,
kmem_cache_generic_shrinker); spl_kmem_cache_generic_shrinker);
if (spl_kmem_cache_shrinker == NULL) if (spl_kmem_cache_shrinker == NULL)
GOTO(out_cache, rc = -ENOMEM); GOTO(out_cache, rc = -ENOMEM);
#else #else
@ -703,7 +942,7 @@ out_cache:
#ifdef DEBUG_KMEM #ifdef DEBUG_KMEM
static char * static char *
sprintf_addr(kmem_debug_t *kd, char *str, int len, int min) spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
{ {
int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size; int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size;
int i, flag = 1; int i, flag = 1;
@ -769,7 +1008,7 @@ spl_kmem_fini(void)
list_for_each_entry(kd, &kmem_list, kd_list) list_for_each_entry(kd, &kmem_list, kd_list)
CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n", CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
kd->kd_addr, kd->kd_size, kd->kd_addr, kd->kd_size,
sprintf_addr(kd, str, 17, 8), spl_sprintf_addr(kd, str, 17, 8),
kd->kd_func, kd->kd_line); kd->kd_func, kd->kd_line);
spin_unlock_irqrestore(&kmem_lock, flags); spin_unlock_irqrestore(&kmem_lock, flags);
@ -786,7 +1025,7 @@ spl_kmem_fini(void)
list_for_each_entry(kd, &vmem_list, kd_list) list_for_each_entry(kd, &vmem_list, kd_list)
CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n", CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
kd->kd_addr, kd->kd_size, kd->kd_addr, kd->kd_size,
sprintf_addr(kd, str, 17, 8), spl_sprintf_addr(kd, str, 17, 8),
kd->kd_func, kd->kd_line); kd->kd_func, kd->kd_line);
spin_unlock_irqrestore(&vmem_lock, flags); spin_unlock_irqrestore(&vmem_lock, flags);

View File

@ -913,7 +913,9 @@ out:
if (rc) { if (rc) {
remove_proc_entry("kstat", proc_spl); remove_proc_entry("kstat", proc_spl);
remove_proc_entry("kmem", proc_spl); remove_proc_entry("kmem", proc_spl);
#ifdef DEBUG_MUTEX
remove_proc_entry("stats_per", proc_spl_mutex); remove_proc_entry("stats_per", proc_spl_mutex);
#endif
remove_proc_entry("mutex", proc_spl); remove_proc_entry("mutex", proc_spl);
remove_proc_entry("spl", NULL); remove_proc_entry("spl", NULL);
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
@ -933,7 +935,9 @@ proc_fini(void)
#if defined(DEBUG_MUTEX) || defined(DEBUG_KMEM) || defined(DEBUG_KSTAT) #if defined(DEBUG_MUTEX) || defined(DEBUG_KMEM) || defined(DEBUG_KSTAT)
remove_proc_entry("kstat", proc_spl); remove_proc_entry("kstat", proc_spl);
remove_proc_entry("kmem", proc_spl); remove_proc_entry("kmem", proc_spl);
#ifdef DEBUG_MUTEX
remove_proc_entry("stats_per", proc_spl_mutex); remove_proc_entry("stats_per", proc_spl_mutex);
#endif
remove_proc_entry("mutex", proc_spl); remove_proc_entry("mutex", proc_spl);
remove_proc_entry("spl", NULL); remove_proc_entry("spl", NULL);
#endif /* DEBUG_MUTEX || DEBUG_KMEM || DEBUG_KSTAT */ #endif /* DEBUG_MUTEX || DEBUG_KMEM || DEBUG_KSTAT */

View File

@ -486,7 +486,7 @@ vn_getf(int fd)
spin_unlock(&vn_file_lock); spin_unlock(&vn_file_lock);
/* File was not yet opened create the object and setup */ /* File was not yet opened create the object and setup */
fp = kmem_cache_alloc(vn_file_cache, 0); fp = kmem_cache_alloc(vn_file_cache, KM_SLEEP);
if (fp == NULL) if (fp == NULL)
GOTO(out, rc); GOTO(out, rc);

View File

@ -525,6 +525,9 @@ splat_kmem_test8_thread(void *arg)
objs = vmem_zalloc(count * sizeof(void *), KM_SLEEP); objs = vmem_zalloc(count * sizeof(void *), KM_SLEEP);
if (!objs) { if (!objs) {
splat_vprint(kcp->kcp_file, SPLAT_KMEM_TEST8_NAME,
"Unable to alloc objp array for cache '%s'\n",
kcp->kcp_cache->skc_name);
rc = -ENOMEM; rc = -ENOMEM;
goto out; goto out;
} }
@ -533,14 +536,13 @@ splat_kmem_test8_thread(void *arg)
objs[i] = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP); objs[i] = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP);
if (!objs[i]) { if (!objs[i]) {
splat_vprint(kcp->kcp_file, SPLAT_KMEM_TEST8_NAME, splat_vprint(kcp->kcp_file, SPLAT_KMEM_TEST8_NAME,
"Unable to allocate from '%s'\n", "Unable to allocate from cache '%s'\n",
SPLAT_KMEM_CACHE_NAME); kcp->kcp_cache->skc_name);
rc = -ENOMEM; rc = -ENOMEM;
goto out_free; break;
} }
} }
out_free:
for (i = 0; i < count; i++) for (i = 0; i < count; i++)
if (objs[i]) if (objs[i])
kmem_cache_free(kcp->kcp_cache, objs[i]); kmem_cache_free(kcp->kcp_cache, objs[i]);
@ -578,6 +580,7 @@ splat_kmem_test8(struct file *file, void *arg)
kmem_cache_priv_t kcp; kmem_cache_priv_t kcp;
kthread_t *thr; kthread_t *thr;
struct timespec start, stop, delta; struct timespec start, stop, delta;
char cache_name[16];
int alloc, i; int alloc, i;
kcp.kcp_magic = SPLAT_KMEM_TEST_MAGIC; kcp.kcp_magic = SPLAT_KMEM_TEST_MAGIC;
@ -588,7 +591,7 @@ splat_kmem_test8(struct file *file, void *arg)
splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%s", splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%s",
" \ttot/max/calc\ttot/max/calc\tsize/depth\n"); " \ttot/max/calc\ttot/max/calc\tsize/depth\n");
for (alloc = 64; alloc <= 4096; alloc *= 2) { for (alloc = 1; alloc <= 4096; alloc *= 2) {
kcp.kcp_size = 256; kcp.kcp_size = 256;
kcp.kcp_count = 0; kcp.kcp_count = 0;
kcp.kcp_threads = 0; kcp.kcp_threads = 0;
@ -597,9 +600,8 @@ splat_kmem_test8(struct file *file, void *arg)
spin_lock_init(&kcp.kcp_lock); spin_lock_init(&kcp.kcp_lock);
init_waitqueue_head(&kcp.kcp_waitq); init_waitqueue_head(&kcp.kcp_waitq);
sprintf(cache_name, "%s-%d", SPLAT_KMEM_CACHE_NAME, alloc);
kcp.kcp_cache = kmem_cache_create(SPLAT_KMEM_CACHE_NAME, kcp.kcp_cache = kmem_cache_create(cache_name, kcp.kcp_size, 0,
kcp.kcp_size, 0,
splat_kmem_cache_test_constructor, splat_kmem_cache_test_constructor,
splat_kmem_cache_test_destructor, splat_kmem_cache_test_destructor,
NULL, &kcp, NULL, 0); NULL, &kcp, NULL, 0);