The first locking issue was due to the semaphore I used. I was trying

to be overly clever and the context switch when the semaphore was busy
was destroying performance.  Converting to a simple spin lock bough me
a factor of 50 or so.  That said it's still not good enough.  Tests
show bad performance and we are still CPU bound.  The logical fix is
I need to implement per-cpu hot caches to minimize the SMP contention.
Linux and Solaris both have this, I was hoping to do without but it
looks like that's not to be.

   kmem_lock: time (sec)        slabs           objs            hash
   kmem_lock:                   tot/max/calc    tot/max/calc    size/depth
   kmem_lock:  0.022000000      7/6/64  224/177/2048    32768/1
   kmem_lock:  0.039000000      13/13/128       416/404/4096    32768/1
   kmem_lock:  0.079000000      23/21/256       736/672/8192    32768/1
   kmem_lock:  0.158000000      48/47/512       1536/1504/16384 32768/1
   kmem_lock:  0.345000000      105/105/1024    3360/3358/32768 32768/2
   kmem_lock:  0.760000000      202/200/2048    6464/6400/65536 32768/3



git-svn-id: https://outreach.scidac.gov/svn/spl/trunk@135 7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c
This commit is contained in:
behlendo 2008-06-24 17:18:15 +00:00
parent 44b8f1769f
commit d46630e0f3
3 changed files with 41 additions and 35 deletions

View File

@ -364,7 +364,7 @@ extern int kmem_set_warning(int flag);
#define SKS_MAGIC 0x22222222 #define SKS_MAGIC 0x22222222
#define SKC_MAGIC 0x2c2c2c2c #define SKC_MAGIC 0x2c2c2c2c
#define SPL_KMEM_CACHE_HASH_BITS 12 /* 4k, sized for 1000's of objs */ #define SPL_KMEM_CACHE_HASH_BITS 12
#define SPL_KMEM_CACHE_HASH_ELTS (1 << SPL_KMEM_CACHE_HASH_BITS) #define SPL_KMEM_CACHE_HASH_ELTS (1 << SPL_KMEM_CACHE_HASH_BITS)
#define SPL_KMEM_CACHE_HASH_SIZE (sizeof(struct hlist_head) * \ #define SPL_KMEM_CACHE_HASH_SIZE (sizeof(struct hlist_head) * \
SPL_KMEM_CACHE_HASH_ELTS) SPL_KMEM_CACHE_HASH_ELTS)
@ -417,16 +417,16 @@ typedef struct spl_kmem_cache {
struct list_head skc_list; /* List of caches linkage */ struct list_head skc_list; /* List of caches linkage */
struct list_head skc_complete_list;/* Completely alloc'ed */ struct list_head skc_complete_list;/* Completely alloc'ed */
struct list_head skc_partial_list; /* Partially alloc'ed */ struct list_head skc_partial_list; /* Partially alloc'ed */
struct rw_semaphore skc_sem; /* Cache semaphore */ spinlock_t skc_lock; /* Cache lock */
uint64_t skc_slab_fail; /* Slab alloc failures */ uint64_t skc_slab_fail; /* Slab alloc failures */
uint64_t skc_slab_create;/* Slab creates */ uint64_t skc_slab_create;/* Slab creates */
uint64_t skc_slab_destroy;/* Slab destroys */ uint64_t skc_slab_destroy;/* Slab destroys */
uint64_t skc_slab_total; /* Slab total */ uint64_t skc_slab_total; /* Slab total current */
uint64_t skc_slab_alloc; /* Slab alloc */ uint64_t skc_slab_alloc; /* Slab alloc current */
uint64_t skc_slab_max; /* Slab max */ uint64_t skc_slab_max; /* Slab max historic */
uint64_t skc_obj_total; /* Obj total */ uint64_t skc_obj_total; /* Obj total current */
uint64_t skc_obj_alloc; /* Obj alloc */ uint64_t skc_obj_alloc; /* Obj alloc current */
uint64_t skc_obj_max; /* Obj max */ uint64_t skc_obj_max; /* Obj max historic */
uint64_t skc_hash_depth; /* Hash depth */ uint64_t skc_hash_depth; /* Hash depth */
uint64_t skc_hash_max; /* Hash depth max */ uint64_t skc_hash_max; /* Hash depth max */
} spl_kmem_cache_t; } spl_kmem_cache_t;

View File

@ -221,7 +221,7 @@ out:
} }
/* Removes slab from complete or partial list, so it must /* Removes slab from complete or partial list, so it must
* be called with the 'skc->skc_sem' semaphore held. * be called with the 'skc->skc_lock' held.
* */ * */
static void static void
slab_free(spl_kmem_slab_t *sks) { slab_free(spl_kmem_slab_t *sks) {
@ -236,9 +236,9 @@ slab_free(spl_kmem_slab_t *sks) {
skc->skc_obj_total -= sks->sks_objs; skc->skc_obj_total -= sks->sks_objs;
skc->skc_slab_total--; skc->skc_slab_total--;
#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK //#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
ASSERT(rwsem_is_locked(&skc->skc_sem)); ASSERT(spin_is_locked(&skc->skc_lock));
#endif //#endif
list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) { list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) {
ASSERT(sko->sko_magic == SKO_MAGIC); ASSERT(sko->sko_magic == SKO_MAGIC);
@ -267,9 +267,9 @@ __slab_reclaim(spl_kmem_cache_t *skc)
int rc = 0; int rc = 0;
ENTRY; ENTRY;
#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK //#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
ASSERT(rwsem_is_locked(&skc->skc_sem)); ASSERT(spin_is_locked(&skc->skc_lock));
#endif //#endif
/* /*
* Free empty slabs which have not been touched in skc_delay * Free empty slabs which have not been touched in skc_delay
* seconds. This delay time is important to avoid thrashing. * seconds. This delay time is important to avoid thrashing.
@ -296,9 +296,9 @@ slab_reclaim(spl_kmem_cache_t *skc)
int rc; int rc;
ENTRY; ENTRY;
down_write(&skc->skc_sem); spin_lock(&skc->skc_lock);
rc = __slab_reclaim(skc); rc = __slab_reclaim(skc);
up_write(&skc->skc_sem); spin_unlock(&skc->skc_lock);
RETURN(rc); RETURN(rc);
} }
@ -363,7 +363,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
INIT_LIST_HEAD(&skc->skc_list); INIT_LIST_HEAD(&skc->skc_list);
INIT_LIST_HEAD(&skc->skc_complete_list); INIT_LIST_HEAD(&skc->skc_complete_list);
INIT_LIST_HEAD(&skc->skc_partial_list); INIT_LIST_HEAD(&skc->skc_partial_list);
init_rwsem(&skc->skc_sem); spin_lock_init(&skc->skc_lock);
skc->skc_slab_fail = 0; skc->skc_slab_fail = 0;
skc->skc_slab_create = 0; skc->skc_slab_create = 0;
skc->skc_slab_destroy = 0; skc->skc_slab_destroy = 0;
@ -398,7 +398,7 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
list_del_init(&skc->skc_list); list_del_init(&skc->skc_list);
up_write(&spl_kmem_cache_sem); up_write(&spl_kmem_cache_sem);
down_write(&skc->skc_sem); spin_lock(&skc->skc_lock);
/* Validate there are no objects in use and free all the /* Validate there are no objects in use and free all the
* spl_kmem_slab_t, spl_kmem_obj_t, and object buffers. * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers.
@ -411,7 +411,7 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
kmem_free(skc->skc_hash, skc->skc_hash_size); kmem_free(skc->skc_hash, skc->skc_hash_size);
kmem_free(skc->skc_name, skc->skc_name_size); kmem_free(skc->skc_name, skc->skc_name_size);
kmem_free(skc, sizeof(*skc)); kmem_free(skc, sizeof(*skc));
up_write(&skc->skc_sem); spin_unlock(&skc->skc_lock);
EXIT; EXIT;
} }
@ -441,7 +441,7 @@ spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
unsigned long key; unsigned long key;
ENTRY; ENTRY;
down_write(&skc->skc_sem); spin_lock(&skc->skc_lock);
restart: restart:
/* Check for available objects from the partial slabs */ /* Check for available objects from the partial slabs */
if (!list_empty(&skc->skc_partial_list)) { if (!list_empty(&skc->skc_partial_list)) {
@ -459,7 +459,7 @@ restart:
/* Remove from sks_free_list, add to used hash */ /* Remove from sks_free_list, add to used hash */
list_del_init(&sko->sko_list); list_del_init(&sko->sko_list);
key = spl_hash_ptr(sko->sko_addr, skc->skc_hash_bits); key = spl_hash_ptr(sko->sko_addr, skc->skc_hash_bits);
hlist_add_head_rcu(&sko->sko_hlist, &skc->skc_hash[key]); hlist_add_head(&sko->sko_hlist, &skc->skc_hash[key]);
sks->sks_age = jiffies; sks->sks_age = jiffies;
atomic_inc(&sks->sks_ref); atomic_inc(&sks->sks_ref);
@ -484,7 +484,7 @@ restart:
GOTO(out_lock, obj = sko->sko_addr); GOTO(out_lock, obj = sko->sko_addr);
} }
up_write(&skc->skc_sem); spin_unlock(&skc->skc_lock);
/* No available objects create a new slab. Since this is an /* No available objects create a new slab. Since this is an
* expensive operation we do it without holding the semaphore * expensive operation we do it without holding the semaphore
@ -521,14 +521,14 @@ restart:
/* Link the newly created slab in to the skc_partial_list, /* Link the newly created slab in to the skc_partial_list,
* and retry the allocation which will now succeed. * and retry the allocation which will now succeed.
*/ */
down_write(&skc->skc_sem); spin_lock(&skc->skc_lock);
skc->skc_slab_total++; skc->skc_slab_total++;
skc->skc_obj_total += sks->sks_objs; skc->skc_obj_total += sks->sks_objs;
list_add_tail(&sks->sks_list, &skc->skc_partial_list); list_add_tail(&sks->sks_list, &skc->skc_partial_list);
GOTO(restart, obj = NULL); GOTO(restart, obj = NULL);
out_lock: out_lock:
up_write(&skc->skc_sem); spin_unlock(&skc->skc_lock);
out: out:
RETURN(obj); RETURN(obj);
} }
@ -537,16 +537,20 @@ EXPORT_SYMBOL(spl_kmem_cache_alloc);
void void
spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj) spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
{ {
struct hlist_head *head;
struct hlist_node *node; struct hlist_node *node;
spl_kmem_slab_t *sks = NULL; spl_kmem_slab_t *sks = NULL;
spl_kmem_obj_t *sko = NULL; spl_kmem_obj_t *sko = NULL;
unsigned long key = spl_hash_ptr(obj, skc->skc_hash_bits);
int i = 0;
ENTRY; ENTRY;
down_write(&skc->skc_sem); spin_lock(&skc->skc_lock);
hlist_for_each_entry(sko, node, &skc->skc_hash[key], sko_hlist) {
if (unlikely((++i) > skc->skc_hash_depth))
skc->skc_hash_depth = i;
head = &skc->skc_hash[spl_hash_ptr(obj, skc->skc_hash_bits)];
hlist_for_each_entry_rcu(sko, node, head, sko_hlist) {
if (sko->sko_addr == obj) { if (sko->sko_addr == obj) {
ASSERT(sko->sko_magic == SKO_MAGIC); ASSERT(sko->sko_magic == SKO_MAGIC);
sks = sko->sko_slab; sks = sko->sko_slab;
@ -583,7 +587,7 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
} }
__slab_reclaim(skc); __slab_reclaim(skc);
up_write(&skc->skc_sem); spin_unlock(&skc->skc_lock);
} }
EXPORT_SYMBOL(spl_kmem_cache_free); EXPORT_SYMBOL(spl_kmem_cache_free);

View File

@ -584,11 +584,11 @@ splat_kmem_test8(struct file *file, void *arg)
kcp.kcp_file = file; kcp.kcp_file = file;
splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%s", splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%s",
"time (sec)\tslabs \tobjs\n"); "time (sec)\tslabs \tobjs \thash\n");
splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%s", splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%s",
" \ttot/max/calc\ttot/max/calc\n"); " \ttot/max/calc\ttot/max/calc\tsize/depth\n");
for (alloc = 64; alloc <= 1024; alloc *= 2) { for (alloc = 64; alloc <= 4096; alloc *= 2) {
kcp.kcp_size = 256; kcp.kcp_size = 256;
kcp.kcp_count = 0; kcp.kcp_count = 0;
kcp.kcp_threads = 0; kcp.kcp_threads = 0;
@ -625,14 +625,16 @@ splat_kmem_test8(struct file *file, void *arg)
delta = timespec_sub(stop, start); delta = timespec_sub(stop, start);
splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%2ld.%09ld\t" splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%2ld.%09ld\t"
"%lu/%lu/%lu\t%lu/%lu/%lu\n", "%lu/%lu/%lu\t%lu/%lu/%lu\t%lu/%lu\n",
delta.tv_sec, delta.tv_nsec, delta.tv_sec, delta.tv_nsec,
(unsigned long)kcp.kcp_cache->skc_slab_total, (unsigned long)kcp.kcp_cache->skc_slab_total,
(unsigned long)kcp.kcp_cache->skc_slab_max, (unsigned long)kcp.kcp_cache->skc_slab_max,
(unsigned long)(kcp.kcp_alloc * 32 / SPL_KMEM_CACHE_OBJ_PER_SLAB), (unsigned long)(kcp.kcp_alloc * 32 / SPL_KMEM_CACHE_OBJ_PER_SLAB),
(unsigned long)kcp.kcp_cache->skc_obj_total, (unsigned long)kcp.kcp_cache->skc_obj_total,
(unsigned long)kcp.kcp_cache->skc_obj_max, (unsigned long)kcp.kcp_cache->skc_obj_max,
(unsigned long)(kcp.kcp_alloc * 32)); (unsigned long)(kcp.kcp_alloc * 32),
(unsigned long)kcp.kcp_cache->skc_hash_size,
(unsigned long)kcp.kcp_cache->skc_hash_depth);
kmem_cache_destroy(kcp.kcp_cache); kmem_cache_destroy(kcp.kcp_cache);