Linux 2.6.31 kmem cache alignment fixes and cleanup.

The big fix here is the removal of kmalloc() in kv_alloc().  It used
to be true in previous kernels that kmallocs over PAGE_SIZE would
always be pages aligned.  This is no longer true atleast in 2.6.31
there are no longer any alignment expectations.  Since kv_alloc()
requires the resulting address to be page align we no only either
directly allocate pages in the KMC_KMEM case, or directly call
__vmalloc() both of which will always return a page aligned address.
Additionally, to avoid wasting memory size is always a power of two.

As for cleanup several helper functions were introduced to calculate
the aligned sizes of various data structures.  This helps ensure no
case is accidentally missed where the alignment needs to be taken in
to account.  The helpers now use P2ROUNDUP_TYPE instead of P2ROUNDUP
which is safer since the type will be explict and we no longer count
on the compiler to auto promote types hopefully as we expected.

Always wnforce minimum (SPL_KMEM_CACHE_ALIGN) and maximum (PAGE_SIZE)
alignment restrictions at cache creation time.

Use SPL_KMEM_CACHE_ALIGN in splat alignment test.
This commit is contained in:
Brian Behlendorf 2009-11-13 11:12:43 -08:00
parent c89fdee4d3
commit 8b45dda2bc
3 changed files with 93 additions and 57 deletions

View File

@ -172,12 +172,8 @@ extern void spl_cleanup(void);
#define P2ALIGN(x, align) ((x) & -(align)) #define P2ALIGN(x, align) ((x) & -(align))
#define P2CROSS(x, y, align) (((x) ^ (y)) > (align) - 1) #define P2CROSS(x, y, align) (((x) ^ (y)) > (align) - 1)
#define P2ROUNDUP(x, align) (-(-(x) & -(align))) #define P2ROUNDUP(x, align) (-(-(x) & -(align)))
#define P2ROUNDUP_TYPED(x, align, type) \
(-(-(type)(x) & -(type)(align)))
#define P2PHASE(x, align) ((x) & ((align) - 1)) #define P2PHASE(x, align) ((x) & ((align) - 1))
#define P2NPHASE(x, align) (-(x) & ((align) - 1)) #define P2NPHASE(x, align) (-(x) & ((align) - 1))
#define P2NPHASE_TYPED(x, align, type) \
(-(type)(x) & ((type)(align) - 1))
#define ISP2(x) (((x) & ((x) - 1)) == 0) #define ISP2(x) (((x) & ((x) - 1)) == 0)
#define IS_P2ALIGNED(v, a) ((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0) #define IS_P2ALIGNED(v, a) ((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
#define P2BOUNDARY(off, len, align) \ #define P2BOUNDARY(off, len, align) \

View File

@ -720,14 +720,15 @@ kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
{ {
void *ptr; void *ptr;
if (skc->skc_flags & KMC_KMEM) { ASSERT(ISP2(size));
if (size > (2 * PAGE_SIZE)) {
ptr = (void *)__get_free_pages(flags, get_order(size)); if (skc->skc_flags & KMC_KMEM)
} else ptr = (void *)__get_free_pages(flags, get_order(size));
ptr = kmem_alloc(size, flags); else
} else { ptr = __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
ptr = vmem_alloc(size, flags);
} /* Resulting allocated memory will be page aligned */
ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE));
return ptr; return ptr;
} }
@ -735,14 +736,55 @@ kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
static void static void
kv_free(spl_kmem_cache_t *skc, void *ptr, int size) kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
{ {
if (skc->skc_flags & KMC_KMEM) { ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE));
if (size > (2 * PAGE_SIZE)) ASSERT(ISP2(size));
free_pages((unsigned long)ptr, get_order(size));
else if (skc->skc_flags & KMC_KMEM)
kmem_free(ptr, size); free_pages((unsigned long)ptr, get_order(size));
} else { else
vmem_free(ptr, size); vfree(ptr);
} }
/*
* Required space for each aligned sks.
*/
static inline uint32_t
spl_sks_size(spl_kmem_cache_t *skc)
{
return P2ROUNDUP_TYPED(sizeof(spl_kmem_slab_t),
skc->skc_obj_align, uint32_t);
}
/*
* Required space for each aligned object.
*/
static inline uint32_t
spl_obj_size(spl_kmem_cache_t *skc)
{
uint32_t align = skc->skc_obj_align;
return P2ROUNDUP_TYPED(skc->skc_obj_size, align, uint32_t) +
P2ROUNDUP_TYPED(sizeof(spl_kmem_obj_t), align, uint32_t);
}
/*
* Lookup the spl_kmem_object_t for an object given that object.
*/
static inline spl_kmem_obj_t *
spl_sko_from_obj(spl_kmem_cache_t *skc, void *obj)
{
return obj + P2ROUNDUP_TYPED(skc->skc_obj_size,
skc->skc_obj_align, uint32_t);
}
/*
* Required space for each offslab object taking in to account alignment
* restrictions and the power-of-two requirement of kv_alloc().
*/
static inline uint32_t
spl_offslab_size(spl_kmem_cache_t *skc)
{
return 1UL << (highbit(spl_obj_size(skc)) + 1);
} }
/* /*
@ -782,7 +824,8 @@ spl_slab_alloc(spl_kmem_cache_t *skc, int flags)
spl_kmem_slab_t *sks; spl_kmem_slab_t *sks;
spl_kmem_obj_t *sko, *n; spl_kmem_obj_t *sko, *n;
void *base, *obj; void *base, *obj;
int i, align, size, rc = 0; uint32_t obj_size, offslab_size = 0;
int i, rc = 0;
base = kv_alloc(skc, skc->skc_slab_size, flags); base = kv_alloc(skc, skc->skc_slab_size, flags);
if (base == NULL) if (base == NULL)
@ -796,23 +839,22 @@ spl_slab_alloc(spl_kmem_cache_t *skc, int flags)
INIT_LIST_HEAD(&sks->sks_list); INIT_LIST_HEAD(&sks->sks_list);
INIT_LIST_HEAD(&sks->sks_free_list); INIT_LIST_HEAD(&sks->sks_free_list);
sks->sks_ref = 0; sks->sks_ref = 0;
obj_size = spl_obj_size(skc);
align = skc->skc_obj_align; if (skc->skc_flags * KMC_OFFSLAB)
size = P2ROUNDUP(skc->skc_obj_size, align) + offslab_size = spl_offslab_size(skc);
P2ROUNDUP(sizeof(spl_kmem_obj_t), align);
for (i = 0; i < sks->sks_objs; i++) { for (i = 0; i < sks->sks_objs; i++) {
if (skc->skc_flags & KMC_OFFSLAB) { if (skc->skc_flags & KMC_OFFSLAB) {
obj = kv_alloc(skc, size, flags); obj = kv_alloc(skc, offslab_size, flags);
if (!obj) if (!obj)
GOTO(out, rc = -ENOMEM); GOTO(out, rc = -ENOMEM);
} else { } else {
obj = base + obj = base + spl_sks_size(skc) + (i * obj_size);
P2ROUNDUP(sizeof(spl_kmem_slab_t), align) +
(i * size);
} }
sko = obj + P2ROUNDUP(skc->skc_obj_size, align); ASSERT(IS_P2ALIGNED(obj, skc->skc_obj_align));
sko = spl_sko_from_obj(skc, obj);
sko->sko_addr = obj; sko->sko_addr = obj;
sko->sko_magic = SKO_MAGIC; sko->sko_magic = SKO_MAGIC;
sko->sko_slab = sks; sko->sko_slab = sks;
@ -828,7 +870,7 @@ out:
if (skc->skc_flags & KMC_OFFSLAB) if (skc->skc_flags & KMC_OFFSLAB)
list_for_each_entry_safe(sko, n, &sks->sks_free_list, list_for_each_entry_safe(sko, n, &sks->sks_free_list,
sko_list) sko_list)
kv_free(skc, sko->sko_addr, size); kv_free(skc, sko->sko_addr, offslab_size);
kv_free(skc, base, skc->skc_slab_size); kv_free(skc, base, skc->skc_slab_size);
sks = NULL; sks = NULL;
@ -886,7 +928,8 @@ spl_slab_reclaim(spl_kmem_cache_t *skc, int count, int flag)
spl_kmem_obj_t *sko, *n; spl_kmem_obj_t *sko, *n;
LIST_HEAD(sks_list); LIST_HEAD(sks_list);
LIST_HEAD(sko_list); LIST_HEAD(sko_list);
int size = 0, i = 0; uint32_t size = 0;
int i = 0;
ENTRY; ENTRY;
/* /*
@ -922,8 +965,7 @@ spl_slab_reclaim(spl_kmem_cache_t *skc, int count, int flag)
* objects and slabs back to the system. * objects and slabs back to the system.
*/ */
if (skc->skc_flags & KMC_OFFSLAB) if (skc->skc_flags & KMC_OFFSLAB)
size = P2ROUNDUP(skc->skc_obj_size, skc->skc_obj_align) + size = spl_offslab_size(skc);
P2ROUNDUP(sizeof(spl_kmem_obj_t), skc->skc_obj_align);
list_for_each_entry_safe(sko, n, &sko_list, sko_list) { list_for_each_entry_safe(sko, n, &sko_list, sko_list) {
ASSERT(sko->sko_magic == SKO_MAGIC); ASSERT(sko->sko_magic == SKO_MAGIC);
@ -994,7 +1036,7 @@ spl_cache_age(void *data)
} }
/* /*
* Size a slab based on the size of each aliged object plus spl_kmem_obj_t. * Size a slab based on the size of each aligned object plus spl_kmem_obj_t.
* When on-slab we want to target SPL_KMEM_CACHE_OBJ_PER_SLAB. However, * When on-slab we want to target SPL_KMEM_CACHE_OBJ_PER_SLAB. However,
* for very small objects we may end up with more than this so as not * for very small objects we may end up with more than this so as not
* to waste space in the minimal allocation of a single page. Also for * to waste space in the minimal allocation of a single page. Also for
@ -1004,30 +1046,29 @@ spl_cache_age(void *data)
static int static int
spl_slab_size(spl_kmem_cache_t *skc, uint32_t *objs, uint32_t *size) spl_slab_size(spl_kmem_cache_t *skc, uint32_t *objs, uint32_t *size)
{ {
int sks_size, obj_size, max_size, align; uint32_t sks_size, obj_size, max_size;
if (skc->skc_flags & KMC_OFFSLAB) { if (skc->skc_flags & KMC_OFFSLAB) {
*objs = SPL_KMEM_CACHE_OBJ_PER_SLAB; *objs = SPL_KMEM_CACHE_OBJ_PER_SLAB;
*size = sizeof(spl_kmem_slab_t); *size = sizeof(spl_kmem_slab_t);
} else { } else {
align = skc->skc_obj_align; sks_size = spl_sks_size(skc);
sks_size = P2ROUNDUP(sizeof(spl_kmem_slab_t), align); obj_size = spl_obj_size(skc);
obj_size = P2ROUNDUP(skc->skc_obj_size, align) +
P2ROUNDUP(sizeof(spl_kmem_obj_t), align);
if (skc->skc_flags & KMC_KMEM) if (skc->skc_flags & KMC_KMEM)
max_size = ((uint64_t)1 << (MAX_ORDER-1)) * PAGE_SIZE; max_size = ((uint32_t)1 << (MAX_ORDER-1)) * PAGE_SIZE;
else else
max_size = (32 * 1024 * 1024); max_size = (32 * 1024 * 1024);
for (*size = PAGE_SIZE; *size <= max_size; *size += PAGE_SIZE) { /* Power of two sized slab */
for (*size = PAGE_SIZE; *size <= max_size; *size *= 2) {
*objs = (*size - sks_size) / obj_size; *objs = (*size - sks_size) / obj_size;
if (*objs >= SPL_KMEM_CACHE_OBJ_PER_SLAB) if (*objs >= SPL_KMEM_CACHE_OBJ_PER_SLAB)
RETURN(0); RETURN(0);
} }
/* /*
* Unable to satisfy target objets per slab, fallback to * Unable to satisfy target objects per slab, fall back to
* allocating a maximally sized slab and assuming it can * allocating a maximally sized slab and assuming it can
* contain the minimum objects count use it. If not fail. * contain the minimum objects count use it. If not fail.
*/ */
@ -1048,17 +1089,18 @@ spl_slab_size(spl_kmem_cache_t *skc, uint32_t *objs, uint32_t *size)
static int static int
spl_magazine_size(spl_kmem_cache_t *skc) spl_magazine_size(spl_kmem_cache_t *skc)
{ {
int size, align = skc->skc_obj_align; uint32_t obj_size = spl_obj_size(skc);
int size;
ENTRY; ENTRY;
/* Per-magazine sizes below assume a 4Kib page size */ /* Per-magazine sizes below assume a 4Kib page size */
if (P2ROUNDUP(skc->skc_obj_size, align) > (PAGE_SIZE * 256)) if (obj_size > (PAGE_SIZE * 256))
size = 4; /* Minimum 4Mib per-magazine */ size = 4; /* Minimum 4Mib per-magazine */
else if (P2ROUNDUP(skc->skc_obj_size, align) > (PAGE_SIZE * 32)) else if (obj_size > (PAGE_SIZE * 32))
size = 16; /* Minimum 2Mib per-magazine */ size = 16; /* Minimum 2Mib per-magazine */
else if (P2ROUNDUP(skc->skc_obj_size, align) > (PAGE_SIZE)) else if (obj_size > (PAGE_SIZE))
size = 64; /* Minimum 256Kib per-magazine */ size = 64; /* Minimum 256Kib per-magazine */
else if (P2ROUNDUP(skc->skc_obj_size, align) > (PAGE_SIZE / 4)) else if (obj_size > (PAGE_SIZE / 4))
size = 128; /* Minimum 128Kib per-magazine */ size = 128; /* Minimum 128Kib per-magazine */
else else
size = 256; size = 256;
@ -1240,19 +1282,18 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
skc->skc_obj_max = 0; skc->skc_obj_max = 0;
if (align) { if (align) {
ASSERT((align & (align - 1)) == 0); /* Power of two */ VERIFY(ISP2(align));
ASSERT(align >= SPL_KMEM_CACHE_ALIGN); /* Minimum size */ VERIFY3U(align, >=, SPL_KMEM_CACHE_ALIGN); /* Min alignment */
VERIFY3U(align, <=, PAGE_SIZE); /* Max alignment */
skc->skc_obj_align = align; skc->skc_obj_align = align;
} }
/* If none passed select a cache type based on object size */ /* If none passed select a cache type based on object size */
if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM))) { if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM))) {
if (P2ROUNDUP(skc->skc_obj_size, skc->skc_obj_align) < if (spl_obj_size(skc) < (PAGE_SIZE / 8))
(PAGE_SIZE / 8)) {
skc->skc_flags |= KMC_KMEM; skc->skc_flags |= KMC_KMEM;
} else { else
skc->skc_flags |= KMC_VMEM; skc->skc_flags |= KMC_VMEM;
}
} }
rc = spl_slab_size(skc, &skc->skc_slab_objs, &skc->skc_slab_size); rc = spl_slab_size(skc, &skc->skc_slab_objs, &skc->skc_slab_size);
@ -1492,9 +1533,8 @@ spl_cache_shrink(spl_kmem_cache_t *skc, void *obj)
ASSERT(skc->skc_magic == SKC_MAGIC); ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(spin_is_locked(&skc->skc_lock)); ASSERT(spin_is_locked(&skc->skc_lock));
sko = obj + P2ROUNDUP(skc->skc_obj_size, skc->skc_obj_align); sko = spl_sko_from_obj(skc, obj);
ASSERT(sko->sko_magic == SKO_MAGIC); ASSERT(sko->sko_magic == SKO_MAGIC);
sks = sko->sko_slab; sks = sko->sko_slab;
ASSERT(sks->sks_magic == SKS_MAGIC); ASSERT(sks->sks_magic == SKS_MAGIC);
ASSERT(sks->sks_cache == skc); ASSERT(sks->sks_cache == skc);
@ -1600,7 +1640,7 @@ restart:
local_irq_restore(irq_flags); local_irq_restore(irq_flags);
ASSERT(obj); ASSERT(obj);
ASSERT(((unsigned long)(obj) % skc->skc_obj_align) == 0); ASSERT(IS_P2ALIGNED(obj, skc->skc_obj_align));
/* Pre-emptively migrate object to CPU L1 cache */ /* Pre-emptively migrate object to CPU L1 cache */
prefetchw(obj); prefetchw(obj);

View File

@ -762,7 +762,7 @@ splat_kmem_test7(struct file *file, void *arg)
char *name = SPLAT_KMEM_TEST7_NAME; char *name = SPLAT_KMEM_TEST7_NAME;
int i, rc; int i, rc;
for (i = 8; i <= PAGE_SIZE; i *= 2) { for (i = SPL_KMEM_CACHE_ALIGN; i <= PAGE_SIZE; i *= 2) {
rc = splat_kmem_cache_test(file, arg, name, 157, i, 0); rc = splat_kmem_cache_test(file, arg, name, 157, i, 0);
if (rc) if (rc)
return rc; return rc;