- Remove hash functionality from slab in favor of direct lookups

based of the spl_kmem_obj_t tacked on the end of each object.
  This actually isn't so back because we are now allocing large
  chunks for the slab and partitioning it ourselves.  So there's
  not a ton of wasted space.  We may suffer a performance hit
  however due to alignment issues.

- Remove remaining depenancies on the linux slab implementation.
  We're standing on our own now for better or worse.

- Rework slabs to be either kmem or vmem based.  If neither
  KMC_VMEM of KMC_KMEM are specified we make a decent guess
  about what will work best for their based on the object 
  size.  Additionally we provide a kmem_virt() function caller
  can use to see if they have a virtual or physical address.

- Minor fixups in the test suite.



git-svn-id: https://outreach.scidac.gov/svn/spl/trunk@141 7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c
This commit is contained in:
behlendo 2008-07-01 03:28:54 +00:00
parent 1c3832576d
commit a1502d76ae
4 changed files with 241 additions and 358 deletions

View File

@ -403,11 +403,14 @@ kmem_alloc_tryhard(size_t size, size_t *alloc_size, int kmflags)
/* /*
* Slab allocation interfaces * Slab allocation interfaces
*/ */
#undef KMC_NOTOUCH /* XXX: Unsupported */ #define KMC_NOTOUCH 0x00000001
#define KMC_NODEBUG 0x00000000 /* Default behavior */ #define KMC_NODEBUG 0x00000002 /* Default behavior */
#define KMC_NOMAGAZINE /* XXX: Unsupported */ #define KMC_NOMAGAZINE 0x00000004 /* XXX: No disable support available */
#define KMC_NOHASH /* XXX: Unsupported */ #define KMC_NOHASH 0x00000008 /* XXX: No hash available */
#define KMC_QCACHE /* XXX: Unsupported */ #define KMC_QCACHE 0x00000010 /* XXX: Unsupported */
#define KMC_KMEM 0x00000100 /* Use kmem cache */
#define KMC_VMEM 0x00000200 /* Use vmem cache */
#define KMC_OFFSLAB 0x00000400 /* Objects not on slab */
#define KMC_REAP_CHUNK 256 #define KMC_REAP_CHUNK 256
#define KMC_DEFAULT_SEEKS DEFAULT_SEEKS #define KMC_DEFAULT_SEEKS DEFAULT_SEEKS
@ -462,11 +465,6 @@ extern struct rw_semaphore spl_kmem_cache_sem;
#define SKS_MAGIC 0x22222222 #define SKS_MAGIC 0x22222222
#define SKC_MAGIC 0x2c2c2c2c #define SKC_MAGIC 0x2c2c2c2c
#define SPL_KMEM_CACHE_HASH_BITS 12
#define SPL_KMEM_CACHE_HASH_ELTS (1 << SPL_KMEM_CACHE_HASH_BITS)
#define SPL_KMEM_CACHE_HASH_SIZE (sizeof(struct hlist_head) * \
SPL_KMEM_CACHE_HASH_ELTS)
#define SPL_KMEM_CACHE_DELAY 5 #define SPL_KMEM_CACHE_DELAY 5
#define SPL_KMEM_CACHE_OBJ_PER_SLAB 32 #define SPL_KMEM_CACHE_OBJ_PER_SLAB 32
@ -488,7 +486,6 @@ typedef struct spl_kmem_obj {
void *sko_addr; /* Buffer address */ void *sko_addr; /* Buffer address */
struct spl_kmem_slab *sko_slab; /* Owned by slab */ struct spl_kmem_slab *sko_slab; /* Owned by slab */
struct list_head sko_list; /* Free object list linkage */ struct list_head sko_list; /* Free object list linkage */
struct hlist_node sko_hlist; /* Used object hash linkage */
} spl_kmem_obj_t; } spl_kmem_obj_t;
typedef struct spl_kmem_slab { typedef struct spl_kmem_slab {
@ -515,14 +512,9 @@ typedef struct spl_kmem_cache {
void *skc_vmp; /* Unused */ void *skc_vmp; /* Unused */
uint32_t skc_flags; /* Flags */ uint32_t skc_flags; /* Flags */
uint32_t skc_obj_size; /* Object size */ uint32_t skc_obj_size; /* Object size */
uint32_t skc_chunk_size; /* sizeof(*obj) + alignment */ uint32_t skc_slab_objs; /* Objects per slab */
uint32_t skc_slab_size; /* slab size */ uint32_t skc_slab_size; /* Slab size */
uint32_t skc_max_chunks; /* max chunks per slab */
uint32_t skc_delay; /* slab reclaim interval */ uint32_t skc_delay; /* slab reclaim interval */
uint32_t skc_hash_bits; /* Hash table bits */
uint32_t skc_hash_size; /* Hash table size */
uint32_t skc_hash_elts; /* Hash table elements */
struct hlist_head *skc_hash; /* Hash table address */
struct list_head skc_list; /* List of caches linkage */ struct list_head skc_list; /* List of caches linkage */
struct list_head skc_complete_list;/* Completely alloc'ed */ struct list_head skc_complete_list;/* Completely alloc'ed */
struct list_head skc_partial_list; /* Partially alloc'ed */ struct list_head skc_partial_list; /* Partially alloc'ed */
@ -536,8 +528,6 @@ typedef struct spl_kmem_cache {
uint64_t skc_obj_total; /* Obj total current */ uint64_t skc_obj_total; /* Obj total current */
uint64_t skc_obj_alloc; /* Obj alloc current */ uint64_t skc_obj_alloc; /* Obj alloc current */
uint64_t skc_obj_max; /* Obj max historic */ uint64_t skc_obj_max; /* Obj max historic */
uint64_t skc_hash_depth; /* Lazy hash depth */
uint64_t skc_hash_count; /* Hash entries current */
} spl_kmem_cache_t; } spl_kmem_cache_t;
extern spl_kmem_cache_t * extern spl_kmem_cache_t *
@ -561,6 +551,8 @@ void spl_kmem_fini(void);
#define kmem_cache_free(skc, obj) spl_kmem_cache_free(skc, obj) #define kmem_cache_free(skc, obj) spl_kmem_cache_free(skc, obj)
#define kmem_cache_reap_now(skc) spl_kmem_cache_reap_now(skc) #define kmem_cache_reap_now(skc) spl_kmem_cache_reap_now(skc)
#define kmem_reap() spl_kmem_reap() #define kmem_reap() spl_kmem_reap()
#define kmem_virt(ptr) (((ptr) >= (void *)VMALLOC_START) && \
((ptr) < (void *)VMALLOC_END))
#ifdef HAVE_KMEM_CACHE_CREATE_DTOR #ifdef HAVE_KMEM_CACHE_CREATE_DTOR
#define __kmem_cache_create(name, size, align, flags, ctor, dtor) \ #define __kmem_cache_create(name, size, align, flags, ctor, dtor) \

View File

@ -114,10 +114,6 @@ EXPORT_SYMBOL(kmem_set_warning);
* shrink them via spl_slab_reclaim() when they are wasting lots * shrink them via spl_slab_reclaim() when they are wasting lots
* of space. Currently this process is driven by the reapers. * of space. Currently this process is driven by the reapers.
* *
* XXX: Implement a resizable used object hash. Currently the hash
* is statically sized for thousands of objects but it should
* grow based on observed worst case slab depth.
*
* XXX: Improve the partial slab list by carefully maintaining a * XXX: Improve the partial slab list by carefully maintaining a
* strict ordering of fullest to emptiest slabs based on * strict ordering of fullest to emptiest slabs based on
* the slab reference count. This gaurentees the when freeing * the slab reference count. This gaurentees the when freeing
@ -134,20 +130,8 @@ EXPORT_SYMBOL(kmem_set_warning);
* XXX: Proper hardware cache alignment would be good too. * XXX: Proper hardware cache alignment would be good too.
*/ */
/* Ensure the __kmem_cache_create/__kmem_cache_destroy macros are
* removed here to prevent a recursive substitution, we want to call
* the native linux version.
*/
#undef kmem_cache_t
#undef kmem_cache_create
#undef kmem_cache_destroy
#undef kmem_cache_alloc
#undef kmem_cache_free
struct list_head spl_kmem_cache_list; /* List of caches */ struct list_head spl_kmem_cache_list; /* List of caches */
struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */ struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */
static kmem_cache_t *spl_slab_cache; /* Cache for slab structs */
static kmem_cache_t *spl_obj_cache; /* Cache for obj structs */
static int spl_cache_flush(spl_kmem_cache_t *skc, static int spl_cache_flush(spl_kmem_cache_t *skc,
spl_kmem_magazine_t *skm, int flush); spl_kmem_magazine_t *skm, int flush);
@ -163,182 +147,121 @@ static struct shrinker spl_kmem_cache_shrinker = {
}; };
#endif #endif
static void static void *
spl_slab_init(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks) kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
{ {
void *ptr;
if (skc->skc_flags & KMC_KMEM) {
if (size > (2 * PAGE_SIZE)) {
ptr = (void *)__get_free_pages(flags, get_order(size));
} else
ptr = kmem_alloc(size, flags);
} else {
ptr = vmem_alloc(size, flags);
}
return ptr;
}
static void
kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
{
if (skc->skc_flags & KMC_KMEM) {
if (size > (2 * PAGE_SIZE))
free_pages((unsigned long)ptr, get_order(size));
else
kmem_free(ptr, size);
} else {
vmem_free(ptr, size);
}
}
static spl_kmem_slab_t *
spl_slab_alloc(spl_kmem_cache_t *skc, int flags)
{
spl_kmem_slab_t *sks;
spl_kmem_obj_t *sko, *n;
void *base, *obj;
int i, size, rc = 0;
/* It's important that we pack the spl_kmem_obj_t structure
* and the actual objects in to one large address space
* to minimize the number of calls to the allocator. It
* is far better to do a few large allocations and then
* subdivide it ourselves. Now which allocator we use
* requires balancling a few trade offs.
*
* For small objects we use kmem_alloc() because as long
* as you are only requesting a small number of pages
* (ideally just one) its cheap. However, when you start
* requesting multiple pages kmem_alloc() get increasingly
* expensive since it requires contigeous pages. For this
* reason we shift to vmem_alloc() for slabs of large
* objects which removes the need for contigeous pages.
* We do not use vmem_alloc() in all cases because there
* is significant locking overhead in __get_vm_area_node().
* This function takes a single global lock when aquiring
* an available virtual address range which serialize all
* vmem_alloc()'s for all slab caches. Using slightly
* different allocation functions for small and large
* objects should give us the best of both worlds.
*
* sks struct: sizeof(spl_kmem_slab_t)
* obj data: skc->skc_obj_size
* obj struct: sizeof(spl_kmem_obj_t)
* <N obj data + obj structs>
*
* XXX: It would probably be a good idea to more carefully
* align these data structures in memory.
*/
base = kv_alloc(skc, skc->skc_slab_size, flags);
if (base == NULL)
RETURN(NULL);
sks = (spl_kmem_slab_t *)base;
sks->sks_magic = SKS_MAGIC; sks->sks_magic = SKS_MAGIC;
sks->sks_objs = SPL_KMEM_CACHE_OBJ_PER_SLAB; sks->sks_objs = skc->skc_slab_objs;
sks->sks_age = jiffies; sks->sks_age = jiffies;
sks->sks_cache = skc; sks->sks_cache = skc;
INIT_LIST_HEAD(&sks->sks_list); INIT_LIST_HEAD(&sks->sks_list);
INIT_LIST_HEAD(&sks->sks_free_list); INIT_LIST_HEAD(&sks->sks_free_list);
sks->sks_ref = 0; sks->sks_ref = 0;
} size = sizeof(spl_kmem_obj_t) + skc->skc_obj_size;
static int
spl_slab_alloc_kmem(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks, int flags)
{
spl_kmem_obj_t *sko, *n;
int i, rc = 0;
/* This is based on the linux slab cache for now simply because
* it means I get slab coloring, hardware cache alignment, etc
* for free. There's no reason we can't do this ourselves. And
* we probably should at in the future. For now I'll just
* leverage the existing linux slab here. */
for (i = 0; i < sks->sks_objs; i++) {
sko = kmem_cache_alloc(spl_obj_cache, flags);
if (sko == NULL) {
rc = -ENOMEM;
break;
}
sko->sko_addr = kmem_alloc(skc->skc_obj_size, flags);
if (sko->sko_addr == NULL) {
kmem_cache_free(spl_obj_cache, sko);
rc = -ENOMEM;
break;
}
sko->sko_magic = SKO_MAGIC;
sko->sko_slab = sks;
INIT_LIST_HEAD(&sko->sko_list);
INIT_HLIST_NODE(&sko->sko_hlist);
list_add(&sko->sko_list, &sks->sks_free_list);
}
/* Unable to fully construct slab, unwind everything */
if (rc) {
list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) {
ASSERT(sko->sko_magic == SKO_MAGIC);
kmem_free(sko->sko_addr, skc->skc_obj_size);
list_del(&sko->sko_list);
kmem_cache_free(spl_obj_cache, sko);
}
}
RETURN(rc);
}
static spl_kmem_slab_t *
spl_slab_alloc_vmem(spl_kmem_cache_t *skc, int flags)
{
spl_kmem_slab_t *sks;
spl_kmem_obj_t *sko, *sko_base;
void *slab, *obj, *obj_base;
int i, size;
/* For large vmem_alloc'ed buffers it's important that we pack the
* spl_kmem_obj_t structure and the actual objects in to one large
* virtual address zone to minimize the number of calls to
* vmalloc(). Mapping the virtual address in done under a single
* global lock which walks a list of all virtual zones. So doing
* lots of allocations simply results in lock contention and a
* longer list of mapped addresses. It is far better to do a
* few large allocations and then subdivide it ourselves. The
* large vmem_alloc'ed space is divied as follows:
*
* 1 slab struct: sizeof(spl_kmem_slab_t)
* N obj structs: sizeof(spl_kmem_obj_t) * skc->skc_objs
* N objects: skc->skc_obj_size * skc->skc_objs
*
* XXX: It would probably be a good idea to more carefully
* align the starts of these objects in memory.
*/
size = sizeof(spl_kmem_slab_t) + SPL_KMEM_CACHE_OBJ_PER_SLAB *
(skc->skc_obj_size + sizeof(spl_kmem_obj_t));
slab = vmem_alloc(size, flags);
if (slab == NULL)
RETURN(NULL);
sks = (spl_kmem_slab_t *)slab;
spl_slab_init(skc, sks);
sko_base = (spl_kmem_obj_t *)(slab + sizeof(spl_kmem_slab_t));
obj_base = (void *)sko_base + sizeof(spl_kmem_obj_t) * sks->sks_objs;
for (i = 0; i < sks->sks_objs; i++) { for (i = 0; i < sks->sks_objs; i++) {
sko = &sko_base[i]; if (skc->skc_flags & KMC_OFFSLAB) {
obj = obj_base + skc->skc_obj_size * i; obj = kv_alloc(skc, size, flags);
if (!obj)
GOTO(out, rc = -ENOMEM);
} else {
obj = base + sizeof(spl_kmem_slab_t) + i * size;
}
sko = obj + skc->skc_obj_size;
sko->sko_addr = obj; sko->sko_addr = obj;
sko->sko_magic = SKO_MAGIC; sko->sko_magic = SKO_MAGIC;
sko->sko_slab = sks; sko->sko_slab = sks;
INIT_LIST_HEAD(&sko->sko_list); INIT_LIST_HEAD(&sko->sko_list);
INIT_HLIST_NODE(&sko->sko_hlist);
list_add_tail(&sko->sko_list, &sks->sks_free_list); list_add_tail(&sko->sko_list, &sks->sks_free_list);
} }
RETURN(sks);
}
static spl_kmem_slab_t *
spl_slab_alloc(spl_kmem_cache_t *skc, int flags) {
spl_kmem_slab_t *sks;
spl_kmem_obj_t *sko;
int rc;
ENTRY;
/* Objects less than a page can use kmem_alloc() and avoid
* the locking overhead in __get_vm_area_node() when locking
* for a free address. For objects over a page we use
* vmem_alloc() because it is usually worth paying this
* overhead to avoid the need to find contigeous pages.
* This should give us the best of both worlds. */
if (skc->skc_obj_size <= PAGE_SIZE) {
sks = kmem_cache_alloc(spl_slab_cache, flags);
if (sks == NULL)
GOTO(out, sks = NULL);
spl_slab_init(skc, sks);
rc = spl_slab_alloc_kmem(skc, sks, flags);
if (rc) {
kmem_cache_free(spl_slab_cache, sks);
GOTO(out, sks = NULL);
}
} else {
sks = spl_slab_alloc_vmem(skc, flags);
if (sks == NULL)
GOTO(out, sks = NULL);
}
ASSERT(sks);
list_for_each_entry(sko, &sks->sks_free_list, sko_list) list_for_each_entry(sko, &sks->sks_free_list, sko_list)
if (skc->skc_ctor) if (skc->skc_ctor)
skc->skc_ctor(sko->sko_addr, skc->skc_private, flags); skc->skc_ctor(sko->sko_addr, skc->skc_private, flags);
out: out:
if (rc) {
if (skc->skc_flags & KMC_OFFSLAB)
list_for_each_entry_safe(sko,n,&sks->sks_free_list,sko_list)
kv_free(skc, sko->sko_addr, size);
kv_free(skc, base, skc->skc_slab_size);
sks = NULL;
}
RETURN(sks); RETURN(sks);
} }
static void
spl_slab_free_kmem(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
{
spl_kmem_obj_t *sko, *n;
ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(sks->sks_magic == SKS_MAGIC);
list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) {
ASSERT(sko->sko_magic == SKO_MAGIC);
kmem_free(sko->sko_addr, skc->skc_obj_size);
list_del(&sko->sko_list);
kmem_cache_free(spl_obj_cache, sko);
}
kmem_cache_free(spl_slab_cache, sks);
}
static void
spl_slab_free_vmem(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
{
ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(sks->sks_magic == SKS_MAGIC);
vmem_free(sks, SPL_KMEM_CACHE_OBJ_PER_SLAB *
(skc->skc_obj_size + sizeof(spl_kmem_obj_t)));
}
/* Removes slab from complete or partial list, so it must /* Removes slab from complete or partial list, so it must
* be called with the 'skc->skc_lock' held. * be called with the 'skc->skc_lock' held.
*/ */
@ -346,6 +269,7 @@ static void
spl_slab_free(spl_kmem_slab_t *sks) { spl_slab_free(spl_kmem_slab_t *sks) {
spl_kmem_cache_t *skc; spl_kmem_cache_t *skc;
spl_kmem_obj_t *sko, *n; spl_kmem_obj_t *sko, *n;
int size;
ENTRY; ENTRY;
ASSERT(sks->sks_magic == SKS_MAGIC); ASSERT(sks->sks_magic == SKS_MAGIC);
@ -358,17 +282,20 @@ spl_slab_free(spl_kmem_slab_t *sks) {
skc->skc_obj_total -= sks->sks_objs; skc->skc_obj_total -= sks->sks_objs;
skc->skc_slab_total--; skc->skc_slab_total--;
list_del(&sks->sks_list); list_del(&sks->sks_list);
size = sizeof(spl_kmem_obj_t) + skc->skc_obj_size;
/* Run destructors slab is being released */ /* Run destructors slab is being released */
list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) {
ASSERT(sko->sko_magic == SKO_MAGIC);
if (skc->skc_dtor) if (skc->skc_dtor)
skc->skc_dtor(sko->sko_addr, skc->skc_private); skc->skc_dtor(sko->sko_addr, skc->skc_private);
if (skc->skc_obj_size <= PAGE_SIZE) if (skc->skc_flags & KMC_OFFSLAB)
spl_slab_free_kmem(skc, sks); kv_free(skc, sko->sko_addr, size);
else }
spl_slab_free_vmem(skc, sks);
kv_free(skc, sks, skc->skc_slab_size);
EXIT; EXIT;
} }
@ -449,6 +376,7 @@ spl_magazine_alloc(spl_kmem_cache_t *skc, int node)
skm->skm_avail = 0; skm->skm_avail = 0;
skm->skm_size = skc->skc_mag_size; skm->skm_size = skc->skc_mag_size;
skm->skm_refill = skc->skc_mag_refill; skm->skm_refill = skc->skc_mag_refill;
if (!(skc->skc_flags & KMC_NOTOUCH))
skm->skm_age = jiffies; skm->skm_age = jiffies;
} }
@ -511,9 +439,14 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
void *priv, void *vmp, int flags) void *priv, void *vmp, int flags)
{ {
spl_kmem_cache_t *skc; spl_kmem_cache_t *skc;
int i, rc, kmem_flags = KM_SLEEP; uint32_t slab_max, slab_size, slab_objs;
int rc, kmem_flags = KM_SLEEP;
ENTRY; ENTRY;
ASSERTF(!(flags & KMC_NOMAGAZINE), "Bad KMC_NOMAGAZINE (%x)\n", flags);
ASSERTF(!(flags & KMC_NOHASH), "Bad KMC_NOHASH (%x)\n", flags);
ASSERTF(!(flags & KMC_QCACHE), "Bad KMC_QCACHE (%x)\n", flags);
/* We may be called when there is a non-zero preempt_count or /* We may be called when there is a non-zero preempt_count or
* interrupts are disabled is which case we must not sleep. * interrupts are disabled is which case we must not sleep.
*/ */
@ -541,25 +474,8 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
skc->skc_vmp = vmp; skc->skc_vmp = vmp;
skc->skc_flags = flags; skc->skc_flags = flags;
skc->skc_obj_size = size; skc->skc_obj_size = size;
skc->skc_chunk_size = 0; /* XXX: Needed only when implementing */
skc->skc_slab_size = 0; /* small slab object optimizations */
skc->skc_max_chunks = 0; /* which are yet supported. */
skc->skc_delay = SPL_KMEM_CACHE_DELAY; skc->skc_delay = SPL_KMEM_CACHE_DELAY;
skc->skc_hash_bits = SPL_KMEM_CACHE_HASH_BITS;
skc->skc_hash_size = SPL_KMEM_CACHE_HASH_SIZE;
skc->skc_hash_elts = SPL_KMEM_CACHE_HASH_ELTS;
skc->skc_hash = (struct hlist_head *)
vmem_alloc(skc->skc_hash_size, kmem_flags);
if (skc->skc_hash == NULL) {
kmem_free(skc->skc_name, skc->skc_name_size);
kmem_free(skc, sizeof(*skc));
RETURN(NULL);
}
for (i = 0; i < skc->skc_hash_elts; i++)
INIT_HLIST_HEAD(&skc->skc_hash[i]);
INIT_LIST_HEAD(&skc->skc_list); INIT_LIST_HEAD(&skc->skc_list);
INIT_LIST_HEAD(&skc->skc_complete_list); INIT_LIST_HEAD(&skc->skc_complete_list);
INIT_LIST_HEAD(&skc->skc_partial_list); INIT_LIST_HEAD(&skc->skc_partial_list);
@ -573,12 +489,37 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
skc->skc_obj_total = 0; skc->skc_obj_total = 0;
skc->skc_obj_alloc = 0; skc->skc_obj_alloc = 0;
skc->skc_obj_max = 0; skc->skc_obj_max = 0;
skc->skc_hash_depth = 0;
skc->skc_hash_count = 0; /* If none passed select a cache type based on object size */
if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM))) {
if (skc->skc_obj_size < (PAGE_SIZE / 8)) {
skc->skc_flags |= KMC_KMEM;
} else {
skc->skc_flags |= KMC_VMEM;
}
}
/* Size slabs properly so ensure they are not too large */
slab_max = ((uint64_t)1 << (MAX_ORDER - 1)) * PAGE_SIZE;
if (skc->skc_flags & KMC_OFFSLAB) {
skc->skc_slab_objs = SPL_KMEM_CACHE_OBJ_PER_SLAB;
skc->skc_slab_size = sizeof(spl_kmem_slab_t);
ASSERT(skc->skc_obj_size < slab_max);
} else {
slab_objs = SPL_KMEM_CACHE_OBJ_PER_SLAB + 1;
do {
slab_objs--;
slab_size = sizeof(spl_kmem_slab_t) + slab_objs *
(skc->skc_obj_size+sizeof(spl_kmem_obj_t));
} while (slab_size > slab_max);
skc->skc_slab_objs = slab_objs;
skc->skc_slab_size = slab_size;
}
rc = spl_magazine_create(skc); rc = spl_magazine_create(skc);
if (rc) { if (rc) {
vmem_free(skc->skc_hash, skc->skc_hash_size);
kmem_free(skc->skc_name, skc->skc_name_size); kmem_free(skc->skc_name, skc->skc_name_size);
kmem_free(skc, sizeof(*skc)); kmem_free(skc, sizeof(*skc));
RETURN(NULL); RETURN(NULL);
@ -592,9 +533,6 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
} }
EXPORT_SYMBOL(spl_kmem_cache_create); EXPORT_SYMBOL(spl_kmem_cache_create);
/* The caller must ensure there are no racing calls to
* spl_kmem_cache_alloc() for this spl_kmem_cache_t.
*/
void void
spl_kmem_cache_destroy(spl_kmem_cache_t *skc) spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
{ {
@ -613,13 +551,15 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
/* Validate there are no objects in use and free all the /* Validate there are no objects in use and free all the
* spl_kmem_slab_t, spl_kmem_obj_t, and object buffers. */ * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers. */
ASSERT(list_empty(&skc->skc_complete_list)); ASSERT(list_empty(&skc->skc_complete_list));
ASSERTF(skc->skc_hash_count == 0, "skc->skc_hash_count=%d\n", ASSERT(skc->skc_slab_alloc == 0);
skc->skc_hash_count); ASSERT(skc->skc_obj_alloc == 0);
list_for_each_entry_safe(sks, m, &skc->skc_partial_list, sks_list) list_for_each_entry_safe(sks, m, &skc->skc_partial_list, sks_list)
spl_slab_free(sks); spl_slab_free(sks);
vmem_free(skc->skc_hash, skc->skc_hash_size); ASSERT(skc->skc_slab_total == 0);
ASSERT(skc->skc_obj_total == 0);
kmem_free(skc->skc_name, skc->skc_name_size); kmem_free(skc->skc_name, skc->skc_name_size);
spin_unlock(&skc->skc_lock); spin_unlock(&skc->skc_lock);
@ -629,64 +569,25 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
} }
EXPORT_SYMBOL(spl_kmem_cache_destroy); EXPORT_SYMBOL(spl_kmem_cache_destroy);
/* The kernel provided hash_ptr() function behaves exceptionally badly
* when all the addresses are page aligned which is likely the case
* here. To avoid this issue shift off the low order non-random bits.
*/
static unsigned long
spl_hash_ptr(void *ptr, unsigned int bits)
{
return hash_long((unsigned long)ptr >> PAGE_SHIFT, bits);
}
static spl_kmem_obj_t *
spl_hash_obj(spl_kmem_cache_t *skc, void *obj)
{
struct hlist_node *node;
spl_kmem_obj_t *sko = NULL;
unsigned long key = spl_hash_ptr(obj, skc->skc_hash_bits);
int i = 0;
ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(spin_is_locked(&skc->skc_lock));
hlist_for_each_entry(sko, node, &skc->skc_hash[key], sko_hlist) {
if (unlikely((++i) > skc->skc_hash_depth))
skc->skc_hash_depth = i;
if (sko->sko_addr == obj) {
ASSERT(sko->sko_magic == SKO_MAGIC);
RETURN(sko);
}
}
RETURN(NULL);
}
static void * static void *
spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks) spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
{ {
spl_kmem_obj_t *sko; spl_kmem_obj_t *sko;
unsigned long key;
ASSERT(skc->skc_magic == SKC_MAGIC); ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(sks->sks_magic == SKS_MAGIC); ASSERT(sks->sks_magic == SKS_MAGIC);
ASSERT(spin_is_locked(&skc->skc_lock)); ASSERT(spin_is_locked(&skc->skc_lock));
sko = list_entry((&sks->sks_free_list)->next,spl_kmem_obj_t,sko_list); sko = list_entry(sks->sks_free_list.next, spl_kmem_obj_t, sko_list);
ASSERT(sko->sko_magic == SKO_MAGIC); ASSERT(sko->sko_magic == SKO_MAGIC);
ASSERT(sko->sko_addr != NULL); ASSERT(sko->sko_addr != NULL);
/* Remove from sks_free_list and add to used hash */ /* Remove from sks_free_list */
list_del_init(&sko->sko_list); list_del_init(&sko->sko_list);
key = spl_hash_ptr(sko->sko_addr, skc->skc_hash_bits);
hlist_add_head(&sko->sko_hlist, &skc->skc_hash[key]);
sks->sks_age = jiffies; sks->sks_age = jiffies;
sks->sks_ref++; sks->sks_ref++;
skc->skc_obj_alloc++; skc->skc_obj_alloc++;
skc->skc_hash_count++;
/* Track max obj usage statistics */ /* Track max obj usage statistics */
if (skc->skc_obj_alloc > skc->skc_obj_max) if (skc->skc_obj_alloc > skc->skc_obj_max)
@ -818,22 +719,17 @@ spl_cache_shrink(spl_kmem_cache_t *skc, void *obj)
ASSERT(skc->skc_magic == SKC_MAGIC); ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(spin_is_locked(&skc->skc_lock)); ASSERT(spin_is_locked(&skc->skc_lock));
sko = spl_hash_obj(skc, obj); sko = obj + skc->skc_obj_size;
ASSERTF(sko, "Obj %p missing from in-use hash (%d/%d) for cache %s\n", ASSERT(sko->sko_magic == SKO_MAGIC);
obj, skc->skc_hash_depth, skc->skc_hash_count, skc->skc_name);
sks = sko->sko_slab; sks = sko->sko_slab;
ASSERTF(sks, "Obj %p/%p linked to invalid slab for cache %s\n", ASSERT(sks->sks_magic == SKS_MAGIC);
obj, sko, skc->skc_name);
ASSERT(sks->sks_cache == skc); ASSERT(sks->sks_cache == skc);
hlist_del_init(&sko->sko_hlist);
list_add(&sko->sko_list, &sks->sks_free_list); list_add(&sko->sko_list, &sks->sks_free_list);
sks->sks_age = jiffies; sks->sks_age = jiffies;
sks->sks_ref--; sks->sks_ref--;
skc->skc_obj_alloc--; skc->skc_obj_alloc--;
skc->skc_hash_count--;
/* Move slab to skc_partial_list when no longer full. Slabs /* Move slab to skc_partial_list when no longer full. Slabs
* are added to the head to keep the partial list is quasi-full * are added to the head to keep the partial list is quasi-full
@ -906,6 +802,7 @@ restart:
if (likely(skm->skm_avail)) { if (likely(skm->skm_avail)) {
/* Object available in CPU cache, use it */ /* Object available in CPU cache, use it */
obj = skm->skm_objs[--skm->skm_avail]; obj = skm->skm_objs[--skm->skm_avail];
if (!(skc->skc_flags & KMC_NOTOUCH))
skm->skm_age = jiffies; skm->skm_age = jiffies;
} else { } else {
/* Per-CPU cache empty, directly allocate from /* Per-CPU cache empty, directly allocate from
@ -1012,71 +909,6 @@ spl_kmem_reap(void)
} }
EXPORT_SYMBOL(spl_kmem_reap); EXPORT_SYMBOL(spl_kmem_reap);
int
spl_kmem_init(void)
{
int rc = 0;
ENTRY;
init_rwsem(&spl_kmem_cache_sem);
INIT_LIST_HEAD(&spl_kmem_cache_list);
spl_slab_cache = NULL;
spl_obj_cache = NULL;
spl_slab_cache = __kmem_cache_create("spl_slab_cache",
sizeof(spl_kmem_slab_t),
0, 0, NULL, NULL);
if (spl_slab_cache == NULL)
GOTO(out_cache, rc = -ENOMEM);
spl_obj_cache = __kmem_cache_create("spl_obj_cache",
sizeof(spl_kmem_obj_t),
0, 0, NULL, NULL);
if (spl_obj_cache == NULL)
GOTO(out_cache, rc = -ENOMEM);
#ifdef HAVE_SET_SHRINKER
spl_kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS,
spl_kmem_cache_generic_shrinker);
if (spl_kmem_cache_shrinker == NULL)
GOTO(out_cache, rc = -ENOMEM);
#else
register_shrinker(&spl_kmem_cache_shrinker);
#endif
#ifdef DEBUG_KMEM
atomic64_set(&kmem_alloc_used, 0);
atomic64_set(&vmem_alloc_used, 0);
#ifdef DEBUG_KMEM_TRACKING
{ int i;
spin_lock_init(&kmem_lock);
INIT_LIST_HEAD(&kmem_list);
for (i = 0; i < KMEM_TABLE_SIZE; i++)
INIT_HLIST_HEAD(&kmem_table[i]);
spin_lock_init(&vmem_lock);
INIT_LIST_HEAD(&vmem_list);
for (i = 0; i < VMEM_TABLE_SIZE; i++)
INIT_HLIST_HEAD(&vmem_table[i]);
}
#endif
#endif
RETURN(rc);
out_cache:
if (spl_obj_cache)
(void)kmem_cache_destroy(spl_obj_cache);
if (spl_slab_cache)
(void)kmem_cache_destroy(spl_slab_cache);
RETURN(rc);
}
#if defined(DEBUG_KMEM) && defined(DEBUG_KMEM_TRACKING) #if defined(DEBUG_KMEM) && defined(DEBUG_KMEM_TRACKING)
static char * static char *
spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min) spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
@ -1119,12 +951,28 @@ spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
return str; return str;
} }
static int
spl_kmem_init_tracking(struct list_head *list, spinlock_t *lock, int size)
{
int i;
ENTRY;
spin_lock_init(lock);
INIT_LIST_HEAD(list);
for (i = 0; i < size; i++)
INIT_HLIST_HEAD(&kmem_table[i]);
RETURN(0);
}
static void static void
spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock) spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock)
{ {
unsigned long flags; unsigned long flags;
kmem_debug_t *kd; kmem_debug_t *kd;
char str[17]; char str[17];
ENTRY;
spin_lock_irqsave(lock, flags); spin_lock_irqsave(lock, flags);
if (!list_empty(list)) if (!list_empty(list))
@ -1138,11 +986,42 @@ spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock)
kd->kd_func, kd->kd_line); kd->kd_func, kd->kd_line);
spin_unlock_irqrestore(lock, flags); spin_unlock_irqrestore(lock, flags);
EXIT;
} }
#else /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */ #else /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
#define spl_kmem_init_tracking(list, lock, size)
#define spl_kmem_fini_tracking(list, lock) #define spl_kmem_fini_tracking(list, lock)
#endif /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */ #endif /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
int
spl_kmem_init(void)
{
int rc = 0;
ENTRY;
init_rwsem(&spl_kmem_cache_sem);
INIT_LIST_HEAD(&spl_kmem_cache_list);
#ifdef HAVE_SET_SHRINKER
spl_kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS,
spl_kmem_cache_generic_shrinker);
if (spl_kmem_cache_shrinker == NULL)
GOTO(out, rc = -ENOMEM);
#else
register_shrinker(&spl_kmem_cache_shrinker);
#endif
#ifdef DEBUG_KMEM
atomic64_set(&kmem_alloc_used, 0);
atomic64_set(&vmem_alloc_used, 0);
spl_kmem_init_tracking(&kmem_list, &kmem_lock, KMEM_TABLE_SIZE);
spl_kmem_init_tracking(&vmem_list, &vmem_lock, VMEM_TABLE_SIZE);
#endif
out:
RETURN(rc);
}
void void
spl_kmem_fini(void) spl_kmem_fini(void)
{ {
@ -1171,8 +1050,5 @@ spl_kmem_fini(void)
unregister_shrinker(&spl_kmem_cache_shrinker); unregister_shrinker(&spl_kmem_cache_shrinker);
#endif #endif
(void)kmem_cache_destroy(spl_obj_cache);
(void)kmem_cache_destroy(spl_slab_cache);
EXIT; EXIT;
} }

View File

@ -577,14 +577,10 @@ slab_seq_show(struct seq_file *f, void *p)
spin_lock(&skc->skc_lock); spin_lock(&skc->skc_lock);
seq_printf(f, "%-36s ", skc->skc_name); seq_printf(f, "%-36s ", skc->skc_name);
seq_printf(f, "%u %u %u - %u %u %u - " seq_printf(f, "%u %u %u - %lu %lu %lu - %lu %lu %lu - %lu %lu %lu\n",
"%lu %lu %lu - %lu %lu %lu - %lu %lu %lu - %lu %lu\n",
(unsigned)skc->skc_obj_size, (unsigned)skc->skc_obj_size,
(unsigned)skc->skc_chunk_size, (unsigned)skc->skc_slab_objs,
(unsigned)skc->skc_slab_size, (unsigned)skc->skc_slab_size,
(unsigned)skc->skc_hash_bits,
(unsigned)skc->skc_hash_size,
(unsigned)skc->skc_hash_elts,
(long unsigned)skc->skc_slab_fail, (long unsigned)skc->skc_slab_fail,
(long unsigned)skc->skc_slab_create, (long unsigned)skc->skc_slab_create,
(long unsigned)skc->skc_slab_destroy, (long unsigned)skc->skc_slab_destroy,
@ -593,9 +589,7 @@ slab_seq_show(struct seq_file *f, void *p)
(long unsigned)skc->skc_slab_max, (long unsigned)skc->skc_slab_max,
(long unsigned)skc->skc_obj_total, (long unsigned)skc->skc_obj_total,
(long unsigned)skc->skc_obj_alloc, (long unsigned)skc->skc_obj_alloc,
(long unsigned)skc->skc_obj_max, (long unsigned)skc->skc_obj_max);
(long unsigned)skc->skc_hash_depth,
(long unsigned)skc->skc_hash_count);
spin_unlock(&skc->skc_lock); spin_unlock(&skc->skc_lock);

View File

@ -371,18 +371,40 @@ out_free:
return rc; return rc;
} }
/* Validate small object cache behavior for dynamic/kmem/vmem caches */
static int static int
splat_kmem_test5(struct file *file, void *arg) splat_kmem_test5(struct file *file, void *arg)
{ {
return splat_kmem_cache_size_test(file, arg, SPLAT_KMEM_TEST5_NAME, char *name = SPLAT_KMEM_TEST5_NAME;
sizeof(kmem_cache_data_t) * 1, 0); int rc;
rc = splat_kmem_cache_size_test(file, arg, name, 128, 0);
if (rc)
return rc;
rc = splat_kmem_cache_size_test(file, arg, name, 128, KMC_KMEM);
if (rc)
return rc;
return splat_kmem_cache_size_test(file, arg, name, 128, KMC_VMEM);
} }
/* Validate large object cache behavior for dynamic/kmem/vmem caches */
static int static int
splat_kmem_test6(struct file *file, void *arg) splat_kmem_test6(struct file *file, void *arg)
{ {
return splat_kmem_cache_size_test(file, arg, SPLAT_KMEM_TEST6_NAME, char *name = SPLAT_KMEM_TEST6_NAME;
sizeof(kmem_cache_data_t) * 1024, 0); int rc;
rc = splat_kmem_cache_size_test(file, arg, name, 128 * 1024, 0);
if (rc)
return rc;
rc = splat_kmem_cache_size_test(file, arg, name, 128 * 1024, KMC_KMEM);
if (rc)
return rc;
return splat_kmem_cache_size_test(file, arg, name, 128 * 1028, KMC_VMEM);
} }
static void static void
@ -533,11 +555,12 @@ splat_kmem_test8_thread(void *arg)
vmem_free(objs, count * sizeof(void *)); vmem_free(objs, count * sizeof(void *));
out: out:
spin_lock(&kcp->kcp_lock); spin_lock(&kcp->kcp_lock);
kcp->kcp_threads--;
if (!kcp->kcp_rc) if (!kcp->kcp_rc)
kcp->kcp_rc = rc; kcp->kcp_rc = rc;
if (--kcp->kcp_threads == 0)
wake_up(&kcp->kcp_waitq); wake_up(&kcp->kcp_waitq);
spin_unlock(&kcp->kcp_lock); spin_unlock(&kcp->kcp_lock);
thread_exit(); thread_exit();
@ -573,7 +596,7 @@ splat_kmem_test8_sc(struct file *file, void *arg, int size, int count)
splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s %s", "name", splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s %s", "name",
"time (sec)\tslabs \tobjs \thash\n"); "time (sec)\tslabs \tobjs \thash\n");
splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s %s", "", splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s %s", "",
" \ttot/max/calc\ttot/max/calc\tsize/depth\n"); " \ttot/max/calc\ttot/max/calc\n");
for (i = 1; i <= count; i *= 2) { for (i = 1; i <= count; i *= 2) {
kcp.kcp_size = size; kcp.kcp_size = size;
@ -611,7 +634,7 @@ splat_kmem_test8_sc(struct file *file, void *arg, int size, int count)
delta = timespec_sub(stop, start); delta = timespec_sub(stop, start);
splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s %2ld.%09ld\t" splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s %2ld.%09ld\t"
"%lu/%lu/%lu\t%lu/%lu/%lu\t%lu/%lu\n", "%lu/%lu/%lu\t%lu/%lu/%lu\n",
kcp.kcp_cache->skc_name, kcp.kcp_cache->skc_name,
delta.tv_sec, delta.tv_nsec, delta.tv_sec, delta.tv_nsec,
(unsigned long)kcp.kcp_cache->skc_slab_total, (unsigned long)kcp.kcp_cache->skc_slab_total,
@ -620,9 +643,7 @@ splat_kmem_test8_sc(struct file *file, void *arg, int size, int count)
SPL_KMEM_CACHE_OBJ_PER_SLAB), SPL_KMEM_CACHE_OBJ_PER_SLAB),
(unsigned long)kcp.kcp_cache->skc_obj_total, (unsigned long)kcp.kcp_cache->skc_obj_total,
(unsigned long)kcp.kcp_cache->skc_obj_max, (unsigned long)kcp.kcp_cache->skc_obj_max,
(unsigned long)(kcp.kcp_alloc * threads), (unsigned long)(kcp.kcp_alloc * threads));
(unsigned long)kcp.kcp_cache->skc_hash_size,
(unsigned long)kcp.kcp_cache->skc_hash_depth);
kmem_cache_destroy(kcp.kcp_cache); kmem_cache_destroy(kcp.kcp_cache);