The first locking issue was due to the semaphore I used. I was trying

to be overly clever and the context switch when the semaphore was busy was destroying performance. Converting to a simple spin lock bough me a factor of 50 or so. That said it's still not good enough. Tests show bad performance and we are still CPU bound. The logical fix is I need to implement per-cpu hot caches to minimize the SMP contention. Linux and Solaris both have this, I was hoping to do without but it looks like that's not to be. kmem_lock: time (sec) slabs objs hash kmem_lock: tot/max/calc tot/max/calc size/depth kmem_lock: 0.022000000 7/6/64 224/177/2048 32768/1 kmem_lock: 0.039000000 13/13/128 416/404/4096 32768/1 kmem_lock: 0.079000000 23/21/256 736/672/8192 32768/1 kmem_lock: 0.158000000 48/47/512 1536/1504/16384 32768/1 kmem_lock: 0.345000000 105/105/1024 3360/3358/32768 32768/2 kmem_lock: 0.760000000 202/200/2048 6464/6400/65536 32768/3 git-svn-id: https://outreach.scidac.gov/svn/spl/trunk@135 7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c
2008-06-24 17:18:15 +00:00 · 2008-06-24 17:18:15 +00:00 · d46630e0f3
parent 44b8f1769f
commit d46630e0f3
3 changed files with 41 additions and 35 deletions
--- a/include/sys/kmem.h
+++ b/include/sys/kmem.h
@ -364,7 +364,7 @@ extern int kmem_set_warning(int flag);
 #define SKS_MAGIC			0x22222222
 #define SKC_MAGIC			0x2c2c2c2c

-#define SPL_KMEM_CACHE_HASH_BITS	12 /* 4k, sized for 1000's of objs */
+#define SPL_KMEM_CACHE_HASH_BITS	12
 #define SPL_KMEM_CACHE_HASH_ELTS	(1 << SPL_KMEM_CACHE_HASH_BITS)
 #define SPL_KMEM_CACHE_HASH_SIZE	(sizeof(struct hlist_head) * \
 					 SPL_KMEM_CACHE_HASH_ELTS)
@ -417,16 +417,16 @@ typedef struct spl_kmem_cache {
        struct list_head	skc_list;	/* List of caches linkage */
 	struct list_head	skc_complete_list;/* Completely alloc'ed */
 	struct list_head	skc_partial_list; /* Partially alloc'ed */
-	struct rw_semaphore	skc_sem;	/* Cache semaphore */
+	spinlock_t		skc_lock;	/* Cache lock */
 	uint64_t		skc_slab_fail;	/* Slab alloc failures */
 	uint64_t		skc_slab_create;/* Slab creates */
 	uint64_t		skc_slab_destroy;/* Slab destroys */
-	uint64_t		skc_slab_total;	/* Slab total */
-	uint64_t		skc_slab_alloc; /* Slab alloc */
-	uint64_t		skc_slab_max;	/* Slab max */
-	uint64_t		skc_obj_total;	/* Obj total */
-	uint64_t		skc_obj_alloc;	/* Obj alloc */
-	uint64_t		skc_obj_max;	/* Obj max */
+	uint64_t		skc_slab_total;	/* Slab total current */
+	uint64_t		skc_slab_alloc; /* Slab alloc current */
+	uint64_t		skc_slab_max;	/* Slab max historic  */
+	uint64_t		skc_obj_total;	/* Obj total current */
+	uint64_t		skc_obj_alloc;	/* Obj alloc current */
+	uint64_t		skc_obj_max;	/* Obj max historic */
 	uint64_t		skc_hash_depth;	/* Hash depth */
 	uint64_t		skc_hash_max;	/* Hash depth max */
 } spl_kmem_cache_t;
--- a/modules/spl/spl-kmem.c
+++ b/modules/spl/spl-kmem.c
@ -221,7 +221,7 @@ out:
 }

 /* Removes slab from complete or partial list, so it must
- * be called with the 'skc->skc_sem' semaphore held.
+ * be called with the 'skc->skc_lock' held.
 *                         */
 static void
 slab_free(spl_kmem_slab_t *sks) {
@ -236,9 +236,9 @@ slab_free(spl_kmem_slab_t *sks) {
 	skc->skc_obj_total -= sks->sks_objs;
 	skc->skc_slab_total--;

-#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
-	ASSERT(rwsem_is_locked(&skc->skc_sem));
-#endif
+//#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+	ASSERT(spin_is_locked(&skc->skc_lock));
+//#endif

 	list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) {
 		ASSERT(sko->sko_magic == SKO_MAGIC);
@ -267,9 +267,9 @@ __slab_reclaim(spl_kmem_cache_t *skc)
 	int rc = 0;
 	ENTRY;

-#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
-	ASSERT(rwsem_is_locked(&skc->skc_sem));
-#endif
+//#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+	ASSERT(spin_is_locked(&skc->skc_lock));
+//#endif
 	/*
 	 * Free empty slabs which have not been touched in skc_delay
 	 * seconds.  This delay time is important to avoid thrashing.
@ -296,9 +296,9 @@ slab_reclaim(spl_kmem_cache_t *skc)
 	int rc;
 	ENTRY;

-	down_write(&skc->skc_sem);
+	spin_lock(&skc->skc_lock);
 	rc = __slab_reclaim(skc);
-	up_write(&skc->skc_sem);
+	spin_unlock(&skc->skc_lock);

 	RETURN(rc);
 }
@ -363,7 +363,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
 	INIT_LIST_HEAD(&skc->skc_list);
 	INIT_LIST_HEAD(&skc->skc_complete_list);
 	INIT_LIST_HEAD(&skc->skc_partial_list);
-	init_rwsem(&skc->skc_sem);
+	spin_lock_init(&skc->skc_lock);
        skc->skc_slab_fail = 0;
        skc->skc_slab_create = 0;
        skc->skc_slab_destroy = 0;
@ -398,7 +398,7 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
        list_del_init(&skc->skc_list);
        up_write(&spl_kmem_cache_sem);

-	down_write(&skc->skc_sem);
+	spin_lock(&skc->skc_lock);

 	/* Validate there are no objects in use and free all the
 	 * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers.
@ -411,7 +411,7 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
 	kmem_free(skc->skc_hash, skc->skc_hash_size);
 	kmem_free(skc->skc_name, skc->skc_name_size);
 	kmem_free(skc, sizeof(*skc));
-	up_write(&skc->skc_sem);
+	spin_unlock(&skc->skc_lock);

 	EXIT;
 }
@ -441,7 +441,7 @@ spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
 	unsigned long key;
 	ENTRY;

-	down_write(&skc->skc_sem);
+	spin_lock(&skc->skc_lock);
 restart:
 	/* Check for available objects from the partial slabs */
 	if (!list_empty(&skc->skc_partial_list)) {
@ -459,7 +459,7 @@ restart:
 		/* Remove from sks_free_list, add to used hash */
 		list_del_init(&sko->sko_list);
 		key = spl_hash_ptr(sko->sko_addr, skc->skc_hash_bits);
-		hlist_add_head_rcu(&sko->sko_hlist, &skc->skc_hash[key]);
+		hlist_add_head(&sko->sko_hlist, &skc->skc_hash[key]);

 		sks->sks_age = jiffies;
 		atomic_inc(&sks->sks_ref);
@ -484,7 +484,7 @@ restart:
 		GOTO(out_lock, obj = sko->sko_addr);
 	}

-	up_write(&skc->skc_sem);
+	spin_unlock(&skc->skc_lock);

 	/* No available objects create a new slab.  Since this is an
 	 * expensive operation we do it without holding the semaphore
@ -521,14 +521,14 @@ restart:
 	/* Link the newly created slab in to the skc_partial_list,
 	 * and retry the allocation which will now succeed.
 	 */
-	down_write(&skc->skc_sem);
+	spin_lock(&skc->skc_lock);
 	skc->skc_slab_total++;
 	skc->skc_obj_total += sks->sks_objs;
 	list_add_tail(&sks->sks_list, &skc->skc_partial_list);
 	GOTO(restart, obj = NULL);

 out_lock:
-	up_write(&skc->skc_sem);
+	spin_unlock(&skc->skc_lock);
 out:
 	RETURN(obj);
 }
@ -537,16 +537,20 @@ EXPORT_SYMBOL(spl_kmem_cache_alloc);
 void
 spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
 {
-        struct hlist_head *head;
        struct hlist_node *node;
        spl_kmem_slab_t *sks = NULL;
 	spl_kmem_obj_t *sko = NULL;
+	unsigned long key = spl_hash_ptr(obj, skc->skc_hash_bits);
+	int i = 0;
 	ENTRY;

-	down_write(&skc->skc_sem);
+	spin_lock(&skc->skc_lock);
+
+        hlist_for_each_entry(sko, node, &skc->skc_hash[key], sko_hlist) {
+
+		if (unlikely((++i) > skc->skc_hash_depth))
+			skc->skc_hash_depth = i;

-        head = &skc->skc_hash[spl_hash_ptr(obj, skc->skc_hash_bits)];
-        hlist_for_each_entry_rcu(sko, node, head, sko_hlist) {
                if (sko->sko_addr == obj) {
 			ASSERT(sko->sko_magic == SKO_MAGIC);
 			sks = sko->sko_slab;
@ -583,7 +587,7 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
 	}

 	__slab_reclaim(skc);
-	up_write(&skc->skc_sem);
+	spin_unlock(&skc->skc_lock);
 }
 EXPORT_SYMBOL(spl_kmem_cache_free);

--- a/modules/splat/splat-kmem.c
+++ b/modules/splat/splat-kmem.c
@ -584,11 +584,11 @@ splat_kmem_test8(struct file *file, void *arg)
 	kcp.kcp_file = file;

        splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%s",
-	             "time (sec)\tslabs       \tobjs\n");
+	             "time (sec)\tslabs       \tobjs        \thash\n");
        splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%s",
-	             "          \ttot/max/calc\ttot/max/calc\n");
+	             "          \ttot/max/calc\ttot/max/calc\tsize/depth\n");

-	for (alloc = 64; alloc <= 1024; alloc *= 2) {
+	for (alloc = 64; alloc <= 4096; alloc *= 2) {
 		kcp.kcp_size = 256;
 		kcp.kcp_count = 0;
 		kcp.kcp_threads = 0;
@ -625,14 +625,16 @@ splat_kmem_test8(struct file *file, void *arg)
 		delta = timespec_sub(stop, start);

 	        splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%2ld.%09ld\t"
-			     "%lu/%lu/%lu\t%lu/%lu/%lu\n",
+			     "%lu/%lu/%lu\t%lu/%lu/%lu\t%lu/%lu\n",
 			     delta.tv_sec, delta.tv_nsec,
 			     (unsigned long)kcp.kcp_cache->skc_slab_total,
 			     (unsigned long)kcp.kcp_cache->skc_slab_max,
 			     (unsigned long)(kcp.kcp_alloc * 32 / SPL_KMEM_CACHE_OBJ_PER_SLAB),
 			     (unsigned long)kcp.kcp_cache->skc_obj_total,
 			     (unsigned long)kcp.kcp_cache->skc_obj_max,
-			     (unsigned long)(kcp.kcp_alloc * 32));
+			     (unsigned long)(kcp.kcp_alloc * 32),
+			     (unsigned long)kcp.kcp_cache->skc_hash_size,
+			     (unsigned long)kcp.kcp_cache->skc_hash_depth);

 		kmem_cache_destroy(kcp.kcp_cache);