diff --git a/include/sys/kmem.h b/include/sys/kmem.h index ef401ade4e..ef58763126 100644 --- a/include/sys/kmem.h +++ b/include/sys/kmem.h @@ -225,6 +225,7 @@ extern struct rw_semaphore spl_kmem_cache_sem; #define SPL_KMEM_CACHE_DELAY 5 #define SPL_KMEM_CACHE_OBJ_PER_SLAB 32 +#define SPL_KMEM_CACHE_ALIGN 8 typedef int (*spl_kmem_ctor_t)(void *, void *, int); typedef void (*spl_kmem_dtor_t)(void *, void *); @@ -270,6 +271,7 @@ typedef struct spl_kmem_cache { void *skc_vmp; /* Unused */ uint32_t skc_flags; /* Flags */ uint32_t skc_obj_size; /* Object size */ + uint32_t skc_obj_align; /* Object alignment */ uint32_t skc_slab_objs; /* Objects per slab */ uint32_t skc_slab_size; /* Slab size */ uint32_t skc_delay; /* slab reclaim interval */ diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c index c6cd914adc..0c3c2f37ef 100644 --- a/module/spl/spl-kmem.c +++ b/module/spl/spl-kmem.c @@ -148,8 +148,6 @@ EXPORT_SYMBOL(kmem_set_warning); * * XXX: Slab coloring may also yield performance improvements and would * be desirable to implement. - * - * XXX: Proper hardware cache alignment would be good too. */ struct list_head spl_kmem_cache_list; /* List of caches */ @@ -573,44 +571,44 @@ kv_free(spl_kmem_cache_t *skc, void *ptr, int size) } } +/* It's important that we pack the spl_kmem_obj_t structure and the + * actual objects in to one large address space to minimize the number + * of calls to the allocator. It is far better to do a few large + * allocations and then subdivide it ourselves. Now which allocator + * we use requires balancing a few trade offs. + * + * For small objects we use kmem_alloc() because as long as you are + * only requesting a small number of pages (ideally just one) its cheap. + * However, when you start requesting multiple pages with kmem_alloc() + * it gets increasingly expensive since it requires contigeous pages. + * For this reason we shift to vmem_alloc() for slabs of large objects + * which removes the need for contigeous pages. We do not use + * vmem_alloc() in all cases because there is significant locking + * overhead in __get_vm_area_node(). This function takes a single + * global lock when aquiring an available virtual address range which + * serializes all vmem_alloc()'s for all slab caches. Using slightly + * different allocation functions for small and large objects should + * give us the best of both worlds. + * + * KMC_ONSLAB KMC_OFFSLAB + * + * +------------------------+ +-----------------+ + * | spl_kmem_slab_t --+-+ | | spl_kmem_slab_t |---+-+ + * | skc_obj_size <-+ | | +-----------------+ | | + * | spl_kmem_obj_t | | | | + * | skc_obj_size <---+ | +-----------------+ | | + * | spl_kmem_obj_t | | | skc_obj_size | <-+ | + * | ... v | | spl_kmem_obj_t | | + * +------------------------+ +-----------------+ v + */ static spl_kmem_slab_t * spl_slab_alloc(spl_kmem_cache_t *skc, int flags) { spl_kmem_slab_t *sks; spl_kmem_obj_t *sko, *n; void *base, *obj; - int i, size, rc = 0; + int i, align, size, rc = 0; - /* It's important that we pack the spl_kmem_obj_t structure - * and the actual objects in to one large address space - * to minimize the number of calls to the allocator. It - * is far better to do a few large allocations and then - * subdivide it ourselves. Now which allocator we use - * requires balancling a few trade offs. - * - * For small objects we use kmem_alloc() because as long - * as you are only requesting a small number of pages - * (ideally just one) its cheap. However, when you start - * requesting multiple pages kmem_alloc() get increasingly - * expensive since it requires contigeous pages. For this - * reason we shift to vmem_alloc() for slabs of large - * objects which removes the need for contigeous pages. - * We do not use vmem_alloc() in all cases because there - * is significant locking overhead in __get_vm_area_node(). - * This function takes a single global lock when aquiring - * an available virtual address range which serialize all - * vmem_alloc()'s for all slab caches. Using slightly - * different allocation functions for small and large - * objects should give us the best of both worlds. - * - * sks struct: sizeof(spl_kmem_slab_t) - * obj data: skc->skc_obj_size - * obj struct: sizeof(spl_kmem_obj_t) - * - * - * XXX: It would probably be a good idea to more carefully - * align these data structures in memory. - */ base = kv_alloc(skc, skc->skc_slab_size, flags); if (base == NULL) RETURN(NULL); @@ -623,7 +621,10 @@ spl_slab_alloc(spl_kmem_cache_t *skc, int flags) INIT_LIST_HEAD(&sks->sks_list); INIT_LIST_HEAD(&sks->sks_free_list); sks->sks_ref = 0; - size = sizeof(spl_kmem_obj_t) + skc->skc_obj_size; + + align = skc->skc_obj_align; + size = P2ROUNDUP(skc->skc_obj_size, align) + + P2ROUNDUP(sizeof(spl_kmem_obj_t), align); for (i = 0; i < sks->sks_objs; i++) { if (skc->skc_flags & KMC_OFFSLAB) { @@ -631,10 +632,12 @@ spl_slab_alloc(spl_kmem_cache_t *skc, int flags) if (!obj) GOTO(out, rc = -ENOMEM); } else { - obj = base + sizeof(spl_kmem_slab_t) + i * size; + obj = base + + P2ROUNDUP(sizeof(spl_kmem_slab_t), align) + + (i * size); } - sko = obj + skc->skc_obj_size; + sko = obj + P2ROUNDUP(skc->skc_obj_size, align); sko->sko_addr = obj; sko->sko_magic = SKO_MAGIC; sko->sko_slab = sks; @@ -648,7 +651,8 @@ spl_slab_alloc(spl_kmem_cache_t *skc, int flags) out: if (rc) { if (skc->skc_flags & KMC_OFFSLAB) - list_for_each_entry_safe(sko,n,&sks->sks_free_list,sko_list) + list_for_each_entry_safe(sko, n, &sks->sks_free_list, + sko_list) kv_free(skc, sko->sko_addr, size); kv_free(skc, base, skc->skc_slab_size); @@ -678,7 +682,8 @@ spl_slab_free(spl_kmem_slab_t *sks) { skc->skc_obj_total -= sks->sks_objs; skc->skc_slab_total--; list_del(&sks->sks_list); - size = sizeof(spl_kmem_obj_t) + skc->skc_obj_size; + size = P2ROUNDUP(skc->skc_obj_size, skc->skc_obj_align) + + P2ROUNDUP(sizeof(spl_kmem_obj_t), skc->skc_obj_align); /* Run destructors slab is being released */ list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) { @@ -736,21 +741,48 @@ spl_slab_reclaim(spl_kmem_cache_t *skc) RETURN(rc); } +/* Size slabs properly to ensure they are not too large */ +static int +spl_slab_size(spl_kmem_cache_t *skc, uint32_t *objs, uint32_t *size) +{ + int max = ((uint64_t)1 << (MAX_ORDER - 1)) * PAGE_SIZE; + int align = skc->skc_obj_align; + + *objs = SPL_KMEM_CACHE_OBJ_PER_SLAB; + + if (skc->skc_flags & KMC_OFFSLAB) { + *size = sizeof(spl_kmem_slab_t); + } else { +resize: + *size = P2ROUNDUP(sizeof(spl_kmem_slab_t), align) + + *objs * (P2ROUNDUP(skc->skc_obj_size, align) + + P2ROUNDUP(sizeof(spl_kmem_obj_t), align)); + + if (*size > max) + GOTO(resize, *objs = *objs - 1); + + ASSERT(*objs > 0); + } + + ASSERTF(*size <= max, "%d < %d\n", *size, max); + RETURN(0); +} + static int spl_magazine_size(spl_kmem_cache_t *skc) { - int size; + int size, align = skc->skc_obj_align; ENTRY; /* Guesses for reasonable magazine sizes, they * should really adapt based on observed usage. */ - if (skc->skc_obj_size > (PAGE_SIZE * 256)) + if (P2ROUNDUP(skc->skc_obj_size, align) > (PAGE_SIZE * 256)) size = 4; - else if (skc->skc_obj_size > (PAGE_SIZE * 32)) + else if (P2ROUNDUP(skc->skc_obj_size, align) > (PAGE_SIZE * 32)) size = 16; - else if (skc->skc_obj_size > (PAGE_SIZE)) + else if (P2ROUNDUP(skc->skc_obj_size, align) > (PAGE_SIZE)) size = 64; - else if (skc->skc_obj_size > (PAGE_SIZE / 4)) + else if (P2ROUNDUP(skc->skc_obj_size, align) > (PAGE_SIZE / 4)) size = 128; else size = 512; @@ -839,13 +871,13 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, void *priv, void *vmp, int flags) { spl_kmem_cache_t *skc; - uint32_t slab_max, slab_size, slab_objs; int rc, kmem_flags = KM_SLEEP; ENTRY; ASSERTF(!(flags & KMC_NOMAGAZINE), "Bad KMC_NOMAGAZINE (%x)\n", flags); ASSERTF(!(flags & KMC_NOHASH), "Bad KMC_NOHASH (%x)\n", flags); ASSERTF(!(flags & KMC_QCACHE), "Bad KMC_QCACHE (%x)\n", flags); + ASSERT(vmp == NULL); /* We may be called when there is a non-zero preempt_count or * interrupts are disabled is which case we must not sleep. @@ -874,6 +906,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, skc->skc_vmp = vmp; skc->skc_flags = flags; skc->skc_obj_size = size; + skc->skc_obj_align = SPL_KMEM_CACHE_ALIGN; skc->skc_delay = SPL_KMEM_CACHE_DELAY; INIT_LIST_HEAD(&skc->skc_list); @@ -890,46 +923,39 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, skc->skc_obj_alloc = 0; skc->skc_obj_max = 0; + if (align) { + ASSERT((align & (align - 1)) == 0); /* Power of two */ + ASSERT(align >= SPL_KMEM_CACHE_ALIGN); /* Minimum size */ + skc->skc_obj_align = align; + } + /* If none passed select a cache type based on object size */ if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM))) { - if (skc->skc_obj_size < (PAGE_SIZE / 8)) { + if (P2ROUNDUP(skc->skc_obj_size, skc->skc_obj_align) < + (PAGE_SIZE / 8)) { skc->skc_flags |= KMC_KMEM; } else { skc->skc_flags |= KMC_VMEM; } } - /* Size slabs properly so ensure they are not too large */ - slab_max = ((uint64_t)1 << (MAX_ORDER - 1)) * PAGE_SIZE; - if (skc->skc_flags & KMC_OFFSLAB) { - skc->skc_slab_objs = SPL_KMEM_CACHE_OBJ_PER_SLAB; - skc->skc_slab_size = sizeof(spl_kmem_slab_t); - ASSERT(skc->skc_obj_size < slab_max); - } else { - slab_objs = SPL_KMEM_CACHE_OBJ_PER_SLAB + 1; - - do { - slab_objs--; - slab_size = sizeof(spl_kmem_slab_t) + slab_objs * - (skc->skc_obj_size+sizeof(spl_kmem_obj_t)); - } while (slab_size > slab_max); - - skc->skc_slab_objs = slab_objs; - skc->skc_slab_size = slab_size; - } + rc = spl_slab_size(skc, &skc->skc_slab_objs, &skc->skc_slab_size); + if (rc) + GOTO(out, rc); rc = spl_magazine_create(skc); - if (rc) { - kmem_free(skc->skc_name, skc->skc_name_size); - kmem_free(skc, sizeof(*skc)); - RETURN(NULL); - } + if (rc) + GOTO(out, rc); down_write(&spl_kmem_cache_sem); list_add_tail(&skc->skc_list, &spl_kmem_cache_list); up_write(&spl_kmem_cache_sem); RETURN(skc); +out: + kmem_free(skc->skc_name, skc->skc_name_size); + kmem_free(skc, sizeof(*skc)); + RETURN(NULL); } EXPORT_SYMBOL(spl_kmem_cache_create); @@ -1119,7 +1145,7 @@ spl_cache_shrink(spl_kmem_cache_t *skc, void *obj) ASSERT(skc->skc_magic == SKC_MAGIC); ASSERT(spin_is_locked(&skc->skc_lock)); - sko = obj + skc->skc_obj_size; + sko = obj + P2ROUNDUP(skc->skc_obj_size, skc->skc_obj_align); ASSERT(sko->sko_magic == SKO_MAGIC); sks = sko->sko_slab; @@ -1213,6 +1239,7 @@ restart: local_irq_restore(irq_flags); ASSERT(obj); + ASSERT(((unsigned long)(obj) % skc->skc_obj_align) == 0); /* Pre-emptively migrate object to CPU L1 cache */ prefetchw(obj); diff --git a/module/splat/splat-kmem.c b/module/splat/splat-kmem.c index 31499dde32..9b96fce90e 100644 --- a/module/splat/splat-kmem.c +++ b/module/splat/splat-kmem.c @@ -47,11 +47,11 @@ #define SPLAT_KMEM_TEST4_DESC "Memory allocation test (vmem_zalloc)" #define SPLAT_KMEM_TEST5_ID 0x0105 -#define SPLAT_KMEM_TEST5_NAME "kmem_cache1" +#define SPLAT_KMEM_TEST5_NAME "kmem_small" #define SPLAT_KMEM_TEST5_DESC "Slab ctor/dtor test (small)" #define SPLAT_KMEM_TEST6_ID 0x0106 -#define SPLAT_KMEM_TEST6_NAME "kmem_cache2" +#define SPLAT_KMEM_TEST6_NAME "kmem_large" #define SPLAT_KMEM_TEST6_DESC "Slab ctor/dtor test (large)" #define SPLAT_KMEM_TEST7_ID 0x0107 @@ -62,6 +62,10 @@ #define SPLAT_KMEM_TEST8_NAME "kmem_lock" #define SPLAT_KMEM_TEST8_DESC "Slab locking test" +#define SPLAT_KMEM_TEST9_ID 0x0109 +#define SPLAT_KMEM_TEST9_NAME "kmem_align" +#define SPLAT_KMEM_TEST9_DESC "Slab alignment test" + #define SPLAT_KMEM_ALLOC_COUNT 10 #define SPLAT_VMEM_ALLOC_COUNT 10 @@ -250,6 +254,7 @@ typedef struct kmem_cache_priv { spinlock_t kcp_lock; wait_queue_head_t kcp_waitq; int kcp_size; + int kcp_align; int kcp_count; int kcp_threads; int kcp_alloc; @@ -289,8 +294,8 @@ splat_kmem_cache_test_destructor(void *ptr, void *priv) } static int -splat_kmem_cache_size_test(struct file *file, void *arg, - char *name, int size, int flags) +splat_kmem_cache_test(struct file *file, void *arg, char *name, + int size, int align, int flags) { kmem_cache_t *cache = NULL; kmem_cache_data_t *kcd = NULL; @@ -300,10 +305,12 @@ splat_kmem_cache_size_test(struct file *file, void *arg, kcp.kcp_magic = SPLAT_KMEM_TEST_MAGIC; kcp.kcp_file = file; kcp.kcp_size = size; + kcp.kcp_align = align; kcp.kcp_count = 0; kcp.kcp_rc = 0; - cache = kmem_cache_create(SPLAT_KMEM_CACHE_NAME, kcp.kcp_size, 0, + cache = kmem_cache_create(SPLAT_KMEM_CACHE_NAME, + kcp.kcp_size, kcp.kcp_align, splat_kmem_cache_test_constructor, splat_kmem_cache_test_destructor, NULL, &kcp, NULL, flags); @@ -373,15 +380,15 @@ splat_kmem_test5(struct file *file, void *arg) char *name = SPLAT_KMEM_TEST5_NAME; int rc; - rc = splat_kmem_cache_size_test(file, arg, name, 128, 0); + rc = splat_kmem_cache_test(file, arg, name, 128, 0, 0); if (rc) return rc; - rc = splat_kmem_cache_size_test(file, arg, name, 128, KMC_KMEM); + rc = splat_kmem_cache_test(file, arg, name, 128, 0, KMC_KMEM); if (rc) return rc; - return splat_kmem_cache_size_test(file, arg, name, 128, KMC_VMEM); + return splat_kmem_cache_test(file, arg, name, 128, 0, KMC_VMEM); } /* Validate large object cache behavior for dynamic/kmem/vmem caches */ @@ -391,15 +398,15 @@ splat_kmem_test6(struct file *file, void *arg) char *name = SPLAT_KMEM_TEST6_NAME; int rc; - rc = splat_kmem_cache_size_test(file, arg, name, 128 * 1024, 0); + rc = splat_kmem_cache_test(file, arg, name, 128*1024, 0, 0); if (rc) return rc; - rc = splat_kmem_cache_size_test(file, arg, name, 128 * 1024, KMC_KMEM); + rc = splat_kmem_cache_test(file, arg, name, 128*1024, 0, KMC_KMEM); if (rc) return rc; - return splat_kmem_cache_size_test(file, arg, name, 128 * 1028, KMC_VMEM); + return splat_kmem_cache_test(file, arg, name, 128*1028, 0, KMC_VMEM); } static void @@ -675,6 +682,22 @@ splat_kmem_test8(struct file *file, void *arg) return rc; } +/* Validate object alignment cache behavior for caches */ +static int +splat_kmem_test9(struct file *file, void *arg) +{ + char *name = SPLAT_KMEM_TEST9_NAME; + int i, rc; + + for (i = 8; i <= PAGE_SIZE; i *= 2) { + rc = splat_kmem_cache_test(file, arg, name, 157, i, 0); + if (rc) + return rc; + } + + return rc; +} + splat_subsystem_t * splat_kmem_init(void) { @@ -708,6 +731,8 @@ splat_kmem_init(void) SPLAT_KMEM_TEST7_ID, splat_kmem_test7); SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST8_NAME, SPLAT_KMEM_TEST8_DESC, SPLAT_KMEM_TEST8_ID, splat_kmem_test8); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST9_NAME, SPLAT_KMEM_TEST9_DESC, + SPLAT_KMEM_TEST9_ID, splat_kmem_test9); return sub; } @@ -716,6 +741,7 @@ void splat_kmem_fini(splat_subsystem_t *sub) { ASSERT(sub); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST9_ID); SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST8_ID); SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST7_ID); SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST6_ID);