zfs/module/splat/splat-kmem.c

1411 lines
37 KiB
C
Raw Normal View History

/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*****************************************************************************
* Solaris Porting LAyer Tests (SPLAT) Kmem Tests.
*/
#include <sys/kmem.h>
#include <sys/kmem_cache.h>
#include <sys/vmem.h>
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
#include <sys/random.h>
#include <sys/thread.h>
#include <sys/vmsystm.h>
#include "splat-internal.h"
#define SPLAT_KMEM_NAME "kmem"
#define SPLAT_KMEM_DESC "Kernel Malloc/Slab Tests"
#define SPLAT_KMEM_TEST1_ID 0x0101
#define SPLAT_KMEM_TEST1_NAME "kmem_alloc"
#define SPLAT_KMEM_TEST1_DESC "Memory allocation test (kmem_alloc)"
#define SPLAT_KMEM_TEST2_ID 0x0102
#define SPLAT_KMEM_TEST2_NAME "kmem_zalloc"
#define SPLAT_KMEM_TEST2_DESC "Memory allocation test (kmem_zalloc)"
#define SPLAT_KMEM_TEST3_ID 0x0103
#define SPLAT_KMEM_TEST3_NAME "vmem_alloc"
#define SPLAT_KMEM_TEST3_DESC "Memory allocation test (vmem_alloc)"
#define SPLAT_KMEM_TEST4_ID 0x0104
#define SPLAT_KMEM_TEST4_NAME "vmem_zalloc"
#define SPLAT_KMEM_TEST4_DESC "Memory allocation test (vmem_zalloc)"
#define SPLAT_KMEM_TEST5_ID 0x0105
#define SPLAT_KMEM_TEST5_NAME "slab_small"
#define SPLAT_KMEM_TEST5_DESC "Slab ctor/dtor test (small)"
#define SPLAT_KMEM_TEST6_ID 0x0106
#define SPLAT_KMEM_TEST6_NAME "slab_large"
#define SPLAT_KMEM_TEST6_DESC "Slab ctor/dtor test (large)"
#define SPLAT_KMEM_TEST7_ID 0x0107
#define SPLAT_KMEM_TEST7_NAME "slab_align"
#define SPLAT_KMEM_TEST7_DESC "Slab alignment test"
#define SPLAT_KMEM_TEST8_ID 0x0108
#define SPLAT_KMEM_TEST8_NAME "slab_reap"
#define SPLAT_KMEM_TEST8_DESC "Slab reaping test"
#define SPLAT_KMEM_TEST9_ID 0x0109
#define SPLAT_KMEM_TEST9_NAME "slab_age"
#define SPLAT_KMEM_TEST9_DESC "Slab aging test"
#define SPLAT_KMEM_TEST10_ID 0x010a
#define SPLAT_KMEM_TEST10_NAME "slab_lock"
#define SPLAT_KMEM_TEST10_DESC "Slab locking test"
#if 0
#define SPLAT_KMEM_TEST11_ID 0x010b
#define SPLAT_KMEM_TEST11_NAME "slab_overcommit"
#define SPLAT_KMEM_TEST11_DESC "Slab memory overcommit test"
#endif
#define SPLAT_KMEM_TEST13_ID 0x010d
#define SPLAT_KMEM_TEST13_NAME "slab_reclaim"
#define SPLAT_KMEM_TEST13_DESC "Slab direct memory reclaim test"
#define SPLAT_KMEM_ALLOC_COUNT 10
#define SPLAT_VMEM_ALLOC_COUNT 10
static int
splat_kmem_test1(struct file *file, void *arg)
{
void *ptr[SPLAT_KMEM_ALLOC_COUNT];
int size = PAGE_SIZE;
int i, count, rc = 0;
Refactor generic memory allocation interfaces This patch achieves the following goals: 1. It replaces the preprocessor kmem flag to gfp flag mapping with proper translation logic. This eliminates the potential for surprises that were previously possible where kmem flags were mapped to gfp flags. 2. It maps vmem_alloc() allocations to kmem_alloc() for allocations sized less than or equal to the newly-added spl_kmem_alloc_max parameter. This ensures that small allocations will not contend on a single global lock, large allocations can still be handled, and potentially limited virtual address space will not be squandered. This behavior is entirely different than under Illumos due to different memory management strategies employed by the respective kernels. However, this functionally provides the semantics required. 3. The --disable-debug-kmem, --enable-debug-kmem (default), and --enable-debug-kmem-tracking allocators have been unified in to a single spl_kmem_alloc_impl() allocation function. This was done to simplify the code and make it more maintainable. 4. Improve portability by exposing an implementation of the memory allocations functions that can be safely used in the same way they are used on Illumos. Specifically, callers may safely use KM_SLEEP in contexts which perform filesystem IO. This allows us to eliminate an entire class of Linux specific changes which were previously required to avoid deadlocking the system. This change will be largely transparent to existing callers but there are a few caveats: 1. Because the headers were refactored and extraneous includes removed callers may find they need to explicitly add additional #includes. In particular, kmem_cache.h must now be explicitly includes to access the SPL's kmem cache implementation. This behavior is different from Illumos but it was done to avoid always masking the Linux slab functions when kmem.h is included. 2. Callers, like Lustre, which made assumptions about the definitions of KM_SLEEP, KM_NOSLEEP, and KM_PUSHPAGE will need to be updated. Other callers such as ZFS which did not will not require changes. 3. KM_PUSHPAGE is no longer overloaded to imply GFP_NOIO. It retains its original meaning of allowing allocations to access reserved memory. KM_PUSHPAGE callers can be converted back to KM_SLEEP. 4. The KM_NODEBUG flags has been retired and the default warning threshold increased to 32k. 5. The kmem_virt() functions has been removed. For callers which need to distinguish between a physical and virtual address use is_vmalloc_addr(). Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-08 20:37:14 +00:00
while ((!rc) && (size <= spl_kmem_alloc_warn)) {
count = 0;
for (i = 0; i < SPLAT_KMEM_ALLOC_COUNT; i++) {
Refactor generic memory allocation interfaces This patch achieves the following goals: 1. It replaces the preprocessor kmem flag to gfp flag mapping with proper translation logic. This eliminates the potential for surprises that were previously possible where kmem flags were mapped to gfp flags. 2. It maps vmem_alloc() allocations to kmem_alloc() for allocations sized less than or equal to the newly-added spl_kmem_alloc_max parameter. This ensures that small allocations will not contend on a single global lock, large allocations can still be handled, and potentially limited virtual address space will not be squandered. This behavior is entirely different than under Illumos due to different memory management strategies employed by the respective kernels. However, this functionally provides the semantics required. 3. The --disable-debug-kmem, --enable-debug-kmem (default), and --enable-debug-kmem-tracking allocators have been unified in to a single spl_kmem_alloc_impl() allocation function. This was done to simplify the code and make it more maintainable. 4. Improve portability by exposing an implementation of the memory allocations functions that can be safely used in the same way they are used on Illumos. Specifically, callers may safely use KM_SLEEP in contexts which perform filesystem IO. This allows us to eliminate an entire class of Linux specific changes which were previously required to avoid deadlocking the system. This change will be largely transparent to existing callers but there are a few caveats: 1. Because the headers were refactored and extraneous includes removed callers may find they need to explicitly add additional #includes. In particular, kmem_cache.h must now be explicitly includes to access the SPL's kmem cache implementation. This behavior is different from Illumos but it was done to avoid always masking the Linux slab functions when kmem.h is included. 2. Callers, like Lustre, which made assumptions about the definitions of KM_SLEEP, KM_NOSLEEP, and KM_PUSHPAGE will need to be updated. Other callers such as ZFS which did not will not require changes. 3. KM_PUSHPAGE is no longer overloaded to imply GFP_NOIO. It retains its original meaning of allowing allocations to access reserved memory. KM_PUSHPAGE callers can be converted back to KM_SLEEP. 4. The KM_NODEBUG flags has been retired and the default warning threshold increased to 32k. 5. The kmem_virt() functions has been removed. For callers which need to distinguish between a physical and virtual address use is_vmalloc_addr(). Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-08 20:37:14 +00:00
ptr[i] = kmem_alloc(size, KM_SLEEP);
if (ptr[i])
count++;
}
for (i = 0; i < SPLAT_KMEM_ALLOC_COUNT; i++)
if (ptr[i])
kmem_free(ptr[i], size);
splat_vprint(file, SPLAT_KMEM_TEST1_NAME,
"%d byte allocations, %d/%d successful\n",
size, count, SPLAT_KMEM_ALLOC_COUNT);
if (count != SPLAT_KMEM_ALLOC_COUNT)
rc = -ENOMEM;
size *= 2;
}
return rc;
}
static int
splat_kmem_test2(struct file *file, void *arg)
{
void *ptr[SPLAT_KMEM_ALLOC_COUNT];
int size = PAGE_SIZE;
int i, j, count, rc = 0;
Refactor generic memory allocation interfaces This patch achieves the following goals: 1. It replaces the preprocessor kmem flag to gfp flag mapping with proper translation logic. This eliminates the potential for surprises that were previously possible where kmem flags were mapped to gfp flags. 2. It maps vmem_alloc() allocations to kmem_alloc() for allocations sized less than or equal to the newly-added spl_kmem_alloc_max parameter. This ensures that small allocations will not contend on a single global lock, large allocations can still be handled, and potentially limited virtual address space will not be squandered. This behavior is entirely different than under Illumos due to different memory management strategies employed by the respective kernels. However, this functionally provides the semantics required. 3. The --disable-debug-kmem, --enable-debug-kmem (default), and --enable-debug-kmem-tracking allocators have been unified in to a single spl_kmem_alloc_impl() allocation function. This was done to simplify the code and make it more maintainable. 4. Improve portability by exposing an implementation of the memory allocations functions that can be safely used in the same way they are used on Illumos. Specifically, callers may safely use KM_SLEEP in contexts which perform filesystem IO. This allows us to eliminate an entire class of Linux specific changes which were previously required to avoid deadlocking the system. This change will be largely transparent to existing callers but there are a few caveats: 1. Because the headers were refactored and extraneous includes removed callers may find they need to explicitly add additional #includes. In particular, kmem_cache.h must now be explicitly includes to access the SPL's kmem cache implementation. This behavior is different from Illumos but it was done to avoid always masking the Linux slab functions when kmem.h is included. 2. Callers, like Lustre, which made assumptions about the definitions of KM_SLEEP, KM_NOSLEEP, and KM_PUSHPAGE will need to be updated. Other callers such as ZFS which did not will not require changes. 3. KM_PUSHPAGE is no longer overloaded to imply GFP_NOIO. It retains its original meaning of allowing allocations to access reserved memory. KM_PUSHPAGE callers can be converted back to KM_SLEEP. 4. The KM_NODEBUG flags has been retired and the default warning threshold increased to 32k. 5. The kmem_virt() functions has been removed. For callers which need to distinguish between a physical and virtual address use is_vmalloc_addr(). Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-08 20:37:14 +00:00
while ((!rc) && (size <= spl_kmem_alloc_warn)) {
count = 0;
for (i = 0; i < SPLAT_KMEM_ALLOC_COUNT; i++) {
Refactor generic memory allocation interfaces This patch achieves the following goals: 1. It replaces the preprocessor kmem flag to gfp flag mapping with proper translation logic. This eliminates the potential for surprises that were previously possible where kmem flags were mapped to gfp flags. 2. It maps vmem_alloc() allocations to kmem_alloc() for allocations sized less than or equal to the newly-added spl_kmem_alloc_max parameter. This ensures that small allocations will not contend on a single global lock, large allocations can still be handled, and potentially limited virtual address space will not be squandered. This behavior is entirely different than under Illumos due to different memory management strategies employed by the respective kernels. However, this functionally provides the semantics required. 3. The --disable-debug-kmem, --enable-debug-kmem (default), and --enable-debug-kmem-tracking allocators have been unified in to a single spl_kmem_alloc_impl() allocation function. This was done to simplify the code and make it more maintainable. 4. Improve portability by exposing an implementation of the memory allocations functions that can be safely used in the same way they are used on Illumos. Specifically, callers may safely use KM_SLEEP in contexts which perform filesystem IO. This allows us to eliminate an entire class of Linux specific changes which were previously required to avoid deadlocking the system. This change will be largely transparent to existing callers but there are a few caveats: 1. Because the headers were refactored and extraneous includes removed callers may find they need to explicitly add additional #includes. In particular, kmem_cache.h must now be explicitly includes to access the SPL's kmem cache implementation. This behavior is different from Illumos but it was done to avoid always masking the Linux slab functions when kmem.h is included. 2. Callers, like Lustre, which made assumptions about the definitions of KM_SLEEP, KM_NOSLEEP, and KM_PUSHPAGE will need to be updated. Other callers such as ZFS which did not will not require changes. 3. KM_PUSHPAGE is no longer overloaded to imply GFP_NOIO. It retains its original meaning of allowing allocations to access reserved memory. KM_PUSHPAGE callers can be converted back to KM_SLEEP. 4. The KM_NODEBUG flags has been retired and the default warning threshold increased to 32k. 5. The kmem_virt() functions has been removed. For callers which need to distinguish between a physical and virtual address use is_vmalloc_addr(). Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-08 20:37:14 +00:00
ptr[i] = kmem_zalloc(size, KM_SLEEP);
if (ptr[i])
count++;
}
/* Ensure buffer has been zero filled */
for (i = 0; i < SPLAT_KMEM_ALLOC_COUNT; i++) {
for (j = 0; j < size; j++) {
if (((char *)ptr[i])[j] != '\0') {
splat_vprint(file,SPLAT_KMEM_TEST2_NAME,
"%d-byte allocation was "
"not zeroed\n", size);
rc = -EFAULT;
}
}
}
for (i = 0; i < SPLAT_KMEM_ALLOC_COUNT; i++)
if (ptr[i])
kmem_free(ptr[i], size);
splat_vprint(file, SPLAT_KMEM_TEST2_NAME,
"%d byte allocations, %d/%d successful\n",
size, count, SPLAT_KMEM_ALLOC_COUNT);
if (count != SPLAT_KMEM_ALLOC_COUNT)
rc = -ENOMEM;
size *= 2;
}
return rc;
}
static int
splat_kmem_test3(struct file *file, void *arg)
{
void *ptr[SPLAT_VMEM_ALLOC_COUNT];
int size = PAGE_SIZE;
int i, count, rc = 0;
Refactor generic memory allocation interfaces This patch achieves the following goals: 1. It replaces the preprocessor kmem flag to gfp flag mapping with proper translation logic. This eliminates the potential for surprises that were previously possible where kmem flags were mapped to gfp flags. 2. It maps vmem_alloc() allocations to kmem_alloc() for allocations sized less than or equal to the newly-added spl_kmem_alloc_max parameter. This ensures that small allocations will not contend on a single global lock, large allocations can still be handled, and potentially limited virtual address space will not be squandered. This behavior is entirely different than under Illumos due to different memory management strategies employed by the respective kernels. However, this functionally provides the semantics required. 3. The --disable-debug-kmem, --enable-debug-kmem (default), and --enable-debug-kmem-tracking allocators have been unified in to a single spl_kmem_alloc_impl() allocation function. This was done to simplify the code and make it more maintainable. 4. Improve portability by exposing an implementation of the memory allocations functions that can be safely used in the same way they are used on Illumos. Specifically, callers may safely use KM_SLEEP in contexts which perform filesystem IO. This allows us to eliminate an entire class of Linux specific changes which were previously required to avoid deadlocking the system. This change will be largely transparent to existing callers but there are a few caveats: 1. Because the headers were refactored and extraneous includes removed callers may find they need to explicitly add additional #includes. In particular, kmem_cache.h must now be explicitly includes to access the SPL's kmem cache implementation. This behavior is different from Illumos but it was done to avoid always masking the Linux slab functions when kmem.h is included. 2. Callers, like Lustre, which made assumptions about the definitions of KM_SLEEP, KM_NOSLEEP, and KM_PUSHPAGE will need to be updated. Other callers such as ZFS which did not will not require changes. 3. KM_PUSHPAGE is no longer overloaded to imply GFP_NOIO. It retains its original meaning of allowing allocations to access reserved memory. KM_PUSHPAGE callers can be converted back to KM_SLEEP. 4. The KM_NODEBUG flags has been retired and the default warning threshold increased to 32k. 5. The kmem_virt() functions has been removed. For callers which need to distinguish between a physical and virtual address use is_vmalloc_addr(). Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-08 20:37:14 +00:00
/*
* Test up to 4x the maximum kmem_alloc() size to ensure both
* the kmem_alloc() and vmem_alloc() call paths are used.
*/
while ((!rc) && (size <= (4 * spl_kmem_alloc_max))) {
count = 0;
for (i = 0; i < SPLAT_VMEM_ALLOC_COUNT; i++) {
ptr[i] = vmem_alloc(size, KM_SLEEP);
if (ptr[i])
count++;
}
for (i = 0; i < SPLAT_VMEM_ALLOC_COUNT; i++)
if (ptr[i])
vmem_free(ptr[i], size);
splat_vprint(file, SPLAT_KMEM_TEST3_NAME,
"%d byte allocations, %d/%d successful\n",
size, count, SPLAT_VMEM_ALLOC_COUNT);
if (count != SPLAT_VMEM_ALLOC_COUNT)
rc = -ENOMEM;
size *= 2;
}
return rc;
}
static int
splat_kmem_test4(struct file *file, void *arg)
{
void *ptr[SPLAT_VMEM_ALLOC_COUNT];
int size = PAGE_SIZE;
int i, j, count, rc = 0;
Refactor generic memory allocation interfaces This patch achieves the following goals: 1. It replaces the preprocessor kmem flag to gfp flag mapping with proper translation logic. This eliminates the potential for surprises that were previously possible where kmem flags were mapped to gfp flags. 2. It maps vmem_alloc() allocations to kmem_alloc() for allocations sized less than or equal to the newly-added spl_kmem_alloc_max parameter. This ensures that small allocations will not contend on a single global lock, large allocations can still be handled, and potentially limited virtual address space will not be squandered. This behavior is entirely different than under Illumos due to different memory management strategies employed by the respective kernels. However, this functionally provides the semantics required. 3. The --disable-debug-kmem, --enable-debug-kmem (default), and --enable-debug-kmem-tracking allocators have been unified in to a single spl_kmem_alloc_impl() allocation function. This was done to simplify the code and make it more maintainable. 4. Improve portability by exposing an implementation of the memory allocations functions that can be safely used in the same way they are used on Illumos. Specifically, callers may safely use KM_SLEEP in contexts which perform filesystem IO. This allows us to eliminate an entire class of Linux specific changes which were previously required to avoid deadlocking the system. This change will be largely transparent to existing callers but there are a few caveats: 1. Because the headers were refactored and extraneous includes removed callers may find they need to explicitly add additional #includes. In particular, kmem_cache.h must now be explicitly includes to access the SPL's kmem cache implementation. This behavior is different from Illumos but it was done to avoid always masking the Linux slab functions when kmem.h is included. 2. Callers, like Lustre, which made assumptions about the definitions of KM_SLEEP, KM_NOSLEEP, and KM_PUSHPAGE will need to be updated. Other callers such as ZFS which did not will not require changes. 3. KM_PUSHPAGE is no longer overloaded to imply GFP_NOIO. It retains its original meaning of allowing allocations to access reserved memory. KM_PUSHPAGE callers can be converted back to KM_SLEEP. 4. The KM_NODEBUG flags has been retired and the default warning threshold increased to 32k. 5. The kmem_virt() functions has been removed. For callers which need to distinguish between a physical and virtual address use is_vmalloc_addr(). Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-08 20:37:14 +00:00
/*
* Test up to 4x the maximum kmem_zalloc() size to ensure both
* the kmem_zalloc() and vmem_zalloc() call paths are used.
*/
while ((!rc) && (size <= (4 * spl_kmem_alloc_max))) {
count = 0;
for (i = 0; i < SPLAT_VMEM_ALLOC_COUNT; i++) {
ptr[i] = vmem_zalloc(size, KM_SLEEP);
if (ptr[i])
count++;
}
/* Ensure buffer has been zero filled */
for (i = 0; i < SPLAT_VMEM_ALLOC_COUNT; i++) {
for (j = 0; j < size; j++) {
if (((char *)ptr[i])[j] != '\0') {
splat_vprint(file, SPLAT_KMEM_TEST4_NAME,
"%d-byte allocation was "
"not zeroed\n", size);
rc = -EFAULT;
}
}
}
for (i = 0; i < SPLAT_VMEM_ALLOC_COUNT; i++)
if (ptr[i])
vmem_free(ptr[i], size);
splat_vprint(file, SPLAT_KMEM_TEST4_NAME,
"%d byte allocations, %d/%d successful\n",
size, count, SPLAT_VMEM_ALLOC_COUNT);
if (count != SPLAT_VMEM_ALLOC_COUNT)
rc = -ENOMEM;
size *= 2;
}
return rc;
}
#define SPLAT_KMEM_TEST_MAGIC 0x004488CCUL
#define SPLAT_KMEM_CACHE_NAME "kmem_test"
#define SPLAT_KMEM_OBJ_COUNT 1024
#define SPLAT_KMEM_OBJ_RECLAIM 32 /* objects */
#define SPLAT_KMEM_THREADS 32
#define KCP_FLAG_READY 0x01
typedef struct kmem_cache_data {
unsigned long kcd_magic;
struct list_head kcd_node;
int kcd_flag;
char kcd_buf[0];
} kmem_cache_data_t;
typedef struct kmem_cache_thread {
spinlock_t kct_lock;
int kct_id;
struct list_head kct_list;
} kmem_cache_thread_t;
typedef struct kmem_cache_priv {
unsigned long kcp_magic;
struct file *kcp_file;
kmem_cache_t *kcp_cache;
spinlock_t kcp_lock;
spl_wait_queue_head_t kcp_ctl_waitq;
spl_wait_queue_head_t kcp_thr_waitq;
int kcp_flags;
int kcp_kct_count;
kmem_cache_thread_t *kcp_kct[SPLAT_KMEM_THREADS];
int kcp_size;
int kcp_align;
int kcp_count;
int kcp_alloc;
int kcp_rc;
} kmem_cache_priv_t;
static kmem_cache_priv_t *
splat_kmem_cache_test_kcp_alloc(struct file *file, char *name,
int size, int align, int alloc)
{
kmem_cache_priv_t *kcp;
kcp = kmem_zalloc(sizeof(kmem_cache_priv_t), KM_SLEEP);
if (!kcp)
return NULL;
kcp->kcp_magic = SPLAT_KMEM_TEST_MAGIC;
kcp->kcp_file = file;
kcp->kcp_cache = NULL;
spin_lock_init(&kcp->kcp_lock);
init_waitqueue_head(&kcp->kcp_ctl_waitq);
init_waitqueue_head(&kcp->kcp_thr_waitq);
kcp->kcp_flags = 0;
kcp->kcp_kct_count = -1;
kcp->kcp_size = size;
kcp->kcp_align = align;
kcp->kcp_count = 0;
kcp->kcp_alloc = alloc;
kcp->kcp_rc = 0;
return kcp;
}
static void
splat_kmem_cache_test_kcp_free(kmem_cache_priv_t *kcp)
{
kmem_free(kcp, sizeof(kmem_cache_priv_t));
}
static kmem_cache_thread_t *
splat_kmem_cache_test_kct_alloc(kmem_cache_priv_t *kcp, int id)
{
kmem_cache_thread_t *kct;
ASSERT3S(id, <, SPLAT_KMEM_THREADS);
ASSERT(kcp->kcp_kct[id] == NULL);
kct = kmem_zalloc(sizeof(kmem_cache_thread_t), KM_SLEEP);
if (!kct)
return NULL;
spin_lock_init(&kct->kct_lock);
kct->kct_id = id;
INIT_LIST_HEAD(&kct->kct_list);
spin_lock(&kcp->kcp_lock);
kcp->kcp_kct[id] = kct;
spin_unlock(&kcp->kcp_lock);
return kct;
}
static void
splat_kmem_cache_test_kct_free(kmem_cache_priv_t *kcp,
kmem_cache_thread_t *kct)
{
spin_lock(&kcp->kcp_lock);
kcp->kcp_kct[kct->kct_id] = NULL;
spin_unlock(&kcp->kcp_lock);
kmem_free(kct, sizeof(kmem_cache_thread_t));
}
static void
splat_kmem_cache_test_kcd_free(kmem_cache_priv_t *kcp,
kmem_cache_thread_t *kct)
{
kmem_cache_data_t *kcd;
spin_lock(&kct->kct_lock);
while (!list_empty(&kct->kct_list)) {
kcd = list_entry(kct->kct_list.next,
kmem_cache_data_t, kcd_node);
list_del(&kcd->kcd_node);
spin_unlock(&kct->kct_lock);
kmem_cache_free(kcp->kcp_cache, kcd);
spin_lock(&kct->kct_lock);
}
spin_unlock(&kct->kct_lock);
}
static int
splat_kmem_cache_test_kcd_alloc(kmem_cache_priv_t *kcp,
kmem_cache_thread_t *kct, int count)
{
kmem_cache_data_t *kcd;
int i;
for (i = 0; i < count; i++) {
kcd = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP);
if (kcd == NULL) {
splat_kmem_cache_test_kcd_free(kcp, kct);
return -ENOMEM;
}
spin_lock(&kct->kct_lock);
list_add_tail(&kcd->kcd_node, &kct->kct_list);
spin_unlock(&kct->kct_lock);
}
return 0;
}
static void
splat_kmem_cache_test_debug(struct file *file, char *name,
kmem_cache_priv_t *kcp)
{
int j;
Add KMC_SLAB cache type For small objects the Linux slab allocator has several advantages over its counterpart in the SPL. These include: 1) It is more memory-efficient and packs objects more tightly. 2) It is continually tuned to maximize performance. Therefore it makes sense to layer the SPLs slab allocator on top of the Linux slab allocator. This allows us to leverage the advantages above while preserving the Illumos semantics we depend on. However, there are some things we need to be careful of: 1) The Linux slab allocator was never designed to work well with large objects. Because the SPL slab must still handle this use case a cut off limit was added to transition from Linux slab backed objects to kmem or vmem backed slabs. spl_kmem_cache_slab_limit - Objects less than or equal to this size in bytes will be backed by the Linux slab. By default this value is zero which disables the Linux slab functionality. Reasonable values for this cut off limit are in the range of 4096-16386 bytes. spl_kmem_cache_kmem_limit - Objects less than or equal to this size in bytes will be backed by a kmem slab. Objects over this size will be vmem backed instead. This value defaults to 1/8 a page, or 512 bytes on an x86_64 architecture. 2) Be aware that using the Linux slab may inadvertently introduce new deadlocks. Care has been taken previously to ensure that all allocations which occur in the write path use GFP_NOIO. However, there may be internal allocations performed in the Linux slab which do not honor these flags. If this is the case a deadlock may occur. The path forward is definitely to start relying on the Linux slab. But for that to happen we need to start building confidence that there aren't any unexpected surprises lurking for us. And ideally need to move completely away from using the SPLs slab for large memory allocations. This patch is a first step. NOTES: 1) The KMC_NOMAGAZINE flag was leveraged to support the Linux slab backed caches but it is not supported for kmem/vmem backed caches. 2) Regardless of the spl_kmem_cache_*_limit settings a cache may be explicitly set to a given type by passed the KMC_KMEM, KMC_VMEM, or KMC_SLAB flags during cache creation. 3) The constructors, destructors, and reclaim callbacks are all functional and will be called regardless of the cache type. 4) KMC_SLAB caches will not appear in /proc/spl/kmem/slab due to the issues involved in presenting correct object accounting. Instead they will appear in /proc/slabinfo under the same names. 5) Several kmem SPLAT tests needed to be fixed because they relied incorrectly on internal kmem slab accounting. With the updated test cases all the SPLAT tests pass as expected. 6) An autoconf test was added to ensure that the __GFP_COMP flag was correctly added to the default flags used when allocating a slab. This is required to ensure all pages in higher order slabs are properly refcounted, see ae16ed9. 7) When using the SLUB allocator there is no need to attempt to set the __GFP_COMP flag. This has been the default behavior for the SLUB since Linux 2.6.25. 8) When using the SLUB it may be desirable to set the slub_nomerge kernel parameter to prevent caches from being merged. Original-patch-by: DHE <git@dehacked.net> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Prakash Surya <surya1@llnl.gov> Signed-off-by: Tim Chase <tim@chase2k.com> Signed-off-by: DHE <git@dehacked.net> Signed-off-by: Chunwei Chen <tuxoko@gmail.com> Closes #356
2013-12-08 22:01:45 +00:00
splat_vprint(file, name, "%s cache objects %d",
kcp->kcp_cache->skc_name, kcp->kcp_count);
if (kcp->kcp_cache->skc_flags & (KMC_KMEM | KMC_VMEM)) {
splat_vprint(file, name, ", slabs %u/%u objs %u/%u",
(unsigned)kcp->kcp_cache->skc_slab_alloc,
(unsigned)kcp->kcp_cache->skc_slab_total,
(unsigned)kcp->kcp_cache->skc_obj_alloc,
(unsigned)kcp->kcp_cache->skc_obj_total);
Add KMC_SLAB cache type For small objects the Linux slab allocator has several advantages over its counterpart in the SPL. These include: 1) It is more memory-efficient and packs objects more tightly. 2) It is continually tuned to maximize performance. Therefore it makes sense to layer the SPLs slab allocator on top of the Linux slab allocator. This allows us to leverage the advantages above while preserving the Illumos semantics we depend on. However, there are some things we need to be careful of: 1) The Linux slab allocator was never designed to work well with large objects. Because the SPL slab must still handle this use case a cut off limit was added to transition from Linux slab backed objects to kmem or vmem backed slabs. spl_kmem_cache_slab_limit - Objects less than or equal to this size in bytes will be backed by the Linux slab. By default this value is zero which disables the Linux slab functionality. Reasonable values for this cut off limit are in the range of 4096-16386 bytes. spl_kmem_cache_kmem_limit - Objects less than or equal to this size in bytes will be backed by a kmem slab. Objects over this size will be vmem backed instead. This value defaults to 1/8 a page, or 512 bytes on an x86_64 architecture. 2) Be aware that using the Linux slab may inadvertently introduce new deadlocks. Care has been taken previously to ensure that all allocations which occur in the write path use GFP_NOIO. However, there may be internal allocations performed in the Linux slab which do not honor these flags. If this is the case a deadlock may occur. The path forward is definitely to start relying on the Linux slab. But for that to happen we need to start building confidence that there aren't any unexpected surprises lurking for us. And ideally need to move completely away from using the SPLs slab for large memory allocations. This patch is a first step. NOTES: 1) The KMC_NOMAGAZINE flag was leveraged to support the Linux slab backed caches but it is not supported for kmem/vmem backed caches. 2) Regardless of the spl_kmem_cache_*_limit settings a cache may be explicitly set to a given type by passed the KMC_KMEM, KMC_VMEM, or KMC_SLAB flags during cache creation. 3) The constructors, destructors, and reclaim callbacks are all functional and will be called regardless of the cache type. 4) KMC_SLAB caches will not appear in /proc/spl/kmem/slab due to the issues involved in presenting correct object accounting. Instead they will appear in /proc/slabinfo under the same names. 5) Several kmem SPLAT tests needed to be fixed because they relied incorrectly on internal kmem slab accounting. With the updated test cases all the SPLAT tests pass as expected. 6) An autoconf test was added to ensure that the __GFP_COMP flag was correctly added to the default flags used when allocating a slab. This is required to ensure all pages in higher order slabs are properly refcounted, see ae16ed9. 7) When using the SLUB allocator there is no need to attempt to set the __GFP_COMP flag. This has been the default behavior for the SLUB since Linux 2.6.25. 8) When using the SLUB it may be desirable to set the slub_nomerge kernel parameter to prevent caches from being merged. Original-patch-by: DHE <git@dehacked.net> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Prakash Surya <surya1@llnl.gov> Signed-off-by: Tim Chase <tim@chase2k.com> Signed-off-by: DHE <git@dehacked.net> Signed-off-by: Chunwei Chen <tuxoko@gmail.com> Closes #356
2013-12-08 22:01:45 +00:00
if (!(kcp->kcp_cache->skc_flags & KMC_NOMAGAZINE)) {
splat_vprint(file, name, "%s", "mags");
for_each_online_cpu(j)
splat_print(file, "%u/%u ",
kcp->kcp_cache->skc_mag[j]->skm_avail,
kcp->kcp_cache->skc_mag[j]->skm_size);
}
}
splat_print(file, "%s\n", "");
}
static int
splat_kmem_cache_test_constructor(void *ptr, void *priv, int flags)
{
kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)priv;
kmem_cache_data_t *kcd = (kmem_cache_data_t *)ptr;
if (kcd && kcp) {
kcd->kcd_magic = kcp->kcp_magic;
INIT_LIST_HEAD(&kcd->kcd_node);
kcd->kcd_flag = 1;
memset(kcd->kcd_buf, 0xaa, kcp->kcp_size - (sizeof *kcd));
kcp->kcp_count++;
}
return 0;
}
static void
splat_kmem_cache_test_destructor(void *ptr, void *priv)
{
kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)priv;
kmem_cache_data_t *kcd = (kmem_cache_data_t *)ptr;
if (kcd && kcp) {
kcd->kcd_magic = 0;
kcd->kcd_flag = 0;
memset(kcd->kcd_buf, 0xbb, kcp->kcp_size - (sizeof *kcd));
kcp->kcp_count--;
}
return;
}
/*
* Generic reclaim function which assumes that all objects may
* be reclaimed at any time. We free a small percentage of the
* objects linked off the kcp or kct[] every time we are called.
*/
static void
splat_kmem_cache_test_reclaim(void *priv)
{
kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)priv;
kmem_cache_thread_t *kct;
kmem_cache_data_t *kcd;
LIST_HEAD(reclaim);
int i, count;
ASSERT(kcp->kcp_magic == SPLAT_KMEM_TEST_MAGIC);
/* For each kct thread reclaim some objects */
spin_lock(&kcp->kcp_lock);
for (i = 0; i < SPLAT_KMEM_THREADS; i++) {
kct = kcp->kcp_kct[i];
if (!kct)
continue;
spin_unlock(&kcp->kcp_lock);
spin_lock(&kct->kct_lock);
count = SPLAT_KMEM_OBJ_RECLAIM;
while (count > 0 && !list_empty(&kct->kct_list)) {
kcd = list_entry(kct->kct_list.next,
kmem_cache_data_t, kcd_node);
list_del(&kcd->kcd_node);
list_add(&kcd->kcd_node, &reclaim);
count--;
}
spin_unlock(&kct->kct_lock);
spin_lock(&kcp->kcp_lock);
}
spin_unlock(&kcp->kcp_lock);
/* Freed outside the spin lock */
while (!list_empty(&reclaim)) {
kcd = list_entry(reclaim.next, kmem_cache_data_t, kcd_node);
list_del(&kcd->kcd_node);
kmem_cache_free(kcp->kcp_cache, kcd);
}
return;
}
static int
splat_kmem_cache_test_threads(kmem_cache_priv_t *kcp, int threads)
{
int rc;
spin_lock(&kcp->kcp_lock);
rc = (kcp->kcp_kct_count == threads);
spin_unlock(&kcp->kcp_lock);
return rc;
}
static int
splat_kmem_cache_test_flags(kmem_cache_priv_t *kcp, int flags)
{
int rc;
spin_lock(&kcp->kcp_lock);
rc = (kcp->kcp_flags & flags);
spin_unlock(&kcp->kcp_lock);
return rc;
}
static void
splat_kmem_cache_test_thread(void *arg)
{
kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)arg;
kmem_cache_thread_t *kct;
int rc = 0, id;
ASSERT(kcp->kcp_magic == SPLAT_KMEM_TEST_MAGIC);
/* Assign thread ids */
spin_lock(&kcp->kcp_lock);
if (kcp->kcp_kct_count == -1)
kcp->kcp_kct_count = 0;
id = kcp->kcp_kct_count;
kcp->kcp_kct_count++;
spin_unlock(&kcp->kcp_lock);
kct = splat_kmem_cache_test_kct_alloc(kcp, id);
if (!kct) {
rc = -ENOMEM;
goto out;
}
/* Wait for all threads to have started and report they are ready */
if (kcp->kcp_kct_count == SPLAT_KMEM_THREADS)
wake_up(&kcp->kcp_ctl_waitq);
wait_event(kcp->kcp_thr_waitq,
splat_kmem_cache_test_flags(kcp, KCP_FLAG_READY));
/* Create and destroy objects */
rc = splat_kmem_cache_test_kcd_alloc(kcp, kct, kcp->kcp_alloc);
splat_kmem_cache_test_kcd_free(kcp, kct);
out:
if (kct)
splat_kmem_cache_test_kct_free(kcp, kct);
spin_lock(&kcp->kcp_lock);
if (!kcp->kcp_rc)
kcp->kcp_rc = rc;
if ((--kcp->kcp_kct_count) == 0)
wake_up(&kcp->kcp_ctl_waitq);
spin_unlock(&kcp->kcp_lock);
thread_exit();
}
static int
splat_kmem_cache_test(struct file *file, void *arg, char *name,
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
int size, int align, int flags)
{
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
kmem_cache_priv_t *kcp = NULL;
kmem_cache_data_t **kcd = NULL;
int i, rc = 0, objs = 0;
/* Limit size for low memory machines (1/128 of memory) */
size = MIN(size, (physmem * PAGE_SIZE) >> 7);
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
splat_vprint(file, name,
"Testing size=%d, align=%d, flags=0x%04x\n",
size, align, flags);
kcp = splat_kmem_cache_test_kcp_alloc(file, name, size, align, 0);
if (!kcp) {
splat_vprint(file, name, "Unable to create '%s'\n", "kcp");
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
return (-ENOMEM);
}
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
kcp->kcp_cache = kmem_cache_create(SPLAT_KMEM_CACHE_NAME,
kcp->kcp_size, kcp->kcp_align,
splat_kmem_cache_test_constructor,
splat_kmem_cache_test_destructor,
NULL, kcp, NULL, flags);
if (kcp->kcp_cache == NULL) {
splat_vprint(file, name, "Unable to create "
"name='%s', size=%d, align=%d, flags=0x%x\n",
SPLAT_KMEM_CACHE_NAME, size, align, flags);
rc = -ENOMEM;
goto out_free;
}
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
/*
* Allocate several slabs worth of objects to verify functionality.
* However, on 32-bit systems with limited address space constrain
* it to a single slab for the purposes of this test.
*/
#ifdef _LP64
objs = kcp->kcp_cache->skc_slab_objs * 4;
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
#else
objs = 1;
#endif
kcd = kmem_zalloc(sizeof (kmem_cache_data_t *) * objs, KM_SLEEP);
if (kcd == NULL) {
splat_vprint(file, name, "Unable to allocate pointers "
"for %d objects\n", objs);
rc = -ENOMEM;
goto out_free;
}
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
for (i = 0; i < objs; i++) {
kcd[i] = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP);
if (kcd[i] == NULL) {
splat_vprint(file, name, "Unable to allocate "
"from '%s'\n", SPLAT_KMEM_CACHE_NAME);
rc = -EINVAL;
goto out_free;
}
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
if (!kcd[i]->kcd_flag) {
splat_vprint(file, name, "Failed to run constructor "
"for '%s'\n", SPLAT_KMEM_CACHE_NAME);
rc = -EINVAL;
goto out_free;
}
if (kcd[i]->kcd_magic != kcp->kcp_magic) {
splat_vprint(file, name,
"Failed to pass private data to constructor "
"for '%s'\n", SPLAT_KMEM_CACHE_NAME);
rc = -EINVAL;
goto out_free;
}
}
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
for (i = 0; i < objs; i++) {
kmem_cache_free(kcp->kcp_cache, kcd[i]);
/* Destructors are run for every kmem_cache_free() */
if (kcd[i]->kcd_flag) {
splat_vprint(file, name,
"Failed to run destructor for '%s'\n",
SPLAT_KMEM_CACHE_NAME);
rc = -EINVAL;
goto out_free;
}
}
if (kcp->kcp_count) {
splat_vprint(file, name,
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
"Failed to run destructor on all slab objects for '%s'\n",
SPLAT_KMEM_CACHE_NAME);
rc = -EINVAL;
}
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
kmem_free(kcd, sizeof (kmem_cache_data_t *) * objs);
kmem_cache_destroy(kcp->kcp_cache);
splat_kmem_cache_test_kcp_free(kcp);
splat_vprint(file, name,
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
"Success ran alloc'd/free'd %d objects of size %d\n",
objs, size);
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
return (rc);
out_free:
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
if (kcd) {
for (i = 0; i < objs; i++) {
if (kcd[i] != NULL)
kmem_cache_free(kcp->kcp_cache, kcd[i]);
}
kmem_free(kcd, sizeof (kmem_cache_data_t *) * objs);
}
if (kcp->kcp_cache)
kmem_cache_destroy(kcp->kcp_cache);
splat_kmem_cache_test_kcp_free(kcp);
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
return (rc);
}
static int
splat_kmem_cache_thread_test(struct file *file, void *arg, char *name,
2009-01-31 05:24:42 +00:00
int size, int alloc, int max_time)
{
kmem_cache_priv_t *kcp;
kthread_t *thr;
struct timespec start, stop, delta;
char cache_name[32];
int i, rc = 0;
kcp = splat_kmem_cache_test_kcp_alloc(file, name, size, 0, alloc);
if (!kcp) {
splat_vprint(file, name, "Unable to create '%s'\n", "kcp");
return -ENOMEM;
}
(void)snprintf(cache_name, 32, "%s-%d-%d",
SPLAT_KMEM_CACHE_NAME, size, alloc);
kcp->kcp_cache =
kmem_cache_create(cache_name, kcp->kcp_size, 0,
splat_kmem_cache_test_constructor,
splat_kmem_cache_test_destructor,
splat_kmem_cache_test_reclaim,
kcp, NULL, 0);
if (!kcp->kcp_cache) {
splat_vprint(file, name, "Unable to create '%s'\n", cache_name);
rc = -ENOMEM;
goto out_kcp;
}
getnstimeofday(&start);
for (i = 0; i < SPLAT_KMEM_THREADS; i++) {
thr = thread_create(NULL, 0,
splat_kmem_cache_test_thread,
kcp, 0, &p0, TS_RUN, defclsyspri);
if (thr == NULL) {
rc = -ESRCH;
goto out_cache;
}
}
/* Sleep until all threads have started, then set the ready
* flag and wake them all up for maximum concurrency. */
wait_event(kcp->kcp_ctl_waitq,
splat_kmem_cache_test_threads(kcp, SPLAT_KMEM_THREADS));
spin_lock(&kcp->kcp_lock);
kcp->kcp_flags |= KCP_FLAG_READY;
spin_unlock(&kcp->kcp_lock);
wake_up_all(&kcp->kcp_thr_waitq);
/* Sleep until all thread have finished */
wait_event(kcp->kcp_ctl_waitq, splat_kmem_cache_test_threads(kcp, 0));
getnstimeofday(&stop);
delta = timespec_sub(stop, start);
splat_vprint(file, name,
"%-22s %2ld.%09ld\t"
"%lu/%lu/%lu\t%lu/%lu/%lu\n",
kcp->kcp_cache->skc_name,
delta.tv_sec, delta.tv_nsec,
(unsigned long)kcp->kcp_cache->skc_slab_total,
(unsigned long)kcp->kcp_cache->skc_slab_max,
(unsigned long)(kcp->kcp_alloc *
SPLAT_KMEM_THREADS /
SPL_KMEM_CACHE_OBJ_PER_SLAB),
(unsigned long)kcp->kcp_cache->skc_obj_total,
(unsigned long)kcp->kcp_cache->skc_obj_max,
(unsigned long)(kcp->kcp_alloc *
SPLAT_KMEM_THREADS));
2009-01-31 05:24:42 +00:00
if (delta.tv_sec >= max_time)
rc = -ETIME;
if (!rc && kcp->kcp_rc)
rc = kcp->kcp_rc;
out_cache:
kmem_cache_destroy(kcp->kcp_cache);
out_kcp:
splat_kmem_cache_test_kcp_free(kcp);
return rc;
}
/* Validate small object cache behavior for dynamic/kmem/vmem caches */
static int
splat_kmem_test5(struct file *file, void *arg)
{
char *name = SPLAT_KMEM_TEST5_NAME;
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
int i, rc = 0;
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
/* Randomly pick small object sizes and alignments. */
for (i = 0; i < 100; i++) {
int size, align, flags = 0;
uint32_t rnd;
/* Evenly distribute tests over all value cache types */
get_random_bytes((void *)&rnd, sizeof (uint32_t));
switch (rnd & 0x03) {
default:
case 0x00:
flags = 0;
break;
case 0x01:
flags = KMC_KMEM;
break;
case 0x02:
flags = KMC_VMEM;
break;
case 0x03:
flags = KMC_SLAB;
break;
}
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
/* The following flags are set with a 1/10 chance */
flags |= ((((rnd >> 8) % 10) == 0) ? KMC_OFFSLAB : 0);
flags |= ((((rnd >> 16) % 10) == 0) ? KMC_NOEMERGENCY : 0);
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
/* 32b - PAGE_SIZE */
get_random_bytes((void *)&rnd, sizeof (uint32_t));
size = MAX(rnd % (PAGE_SIZE + 1), 32);
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
/* 2^N where (3 <= N <= PAGE_SHIFT) */
get_random_bytes((void *)&rnd, sizeof (uint32_t));
align = (1 << MAX(3, rnd % (PAGE_SHIFT + 1)));
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
rc = splat_kmem_cache_test(file, arg, name, size, align, flags);
if (rc)
return (rc);
}
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
return (rc);
}
/*
* Validate large object cache behavior for dynamic/kmem/vmem caches
*/
static int
splat_kmem_test6(struct file *file, void *arg)
{
char *name = SPLAT_KMEM_TEST6_NAME;
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
int i, max_size, rc = 0;
/* Randomly pick large object sizes and alignments. */
for (i = 0; i < 100; i++) {
int size, align, flags = 0;
uint32_t rnd;
/* Evenly distribute tests over all value cache types */
get_random_bytes((void *)&rnd, sizeof (uint32_t));
switch (rnd & 0x03) {
default:
case 0x00:
flags = 0;
max_size = (SPL_KMEM_CACHE_MAX_SIZE * 1024 * 1024) / 2;
break;
case 0x01:
flags = KMC_KMEM;
max_size = (SPL_MAX_ORDER_NR_PAGES - 2) * PAGE_SIZE;
break;
case 0x02:
flags = KMC_VMEM;
max_size = (SPL_KMEM_CACHE_MAX_SIZE * 1024 * 1024) / 2;
break;
case 0x03:
flags = KMC_SLAB;
max_size = SPL_MAX_KMEM_ORDER_NR_PAGES * PAGE_SIZE;
break;
}
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
/* The following flags are set with a 1/10 chance */
flags |= ((((rnd >> 8) % 10) == 0) ? KMC_OFFSLAB : 0);
flags |= ((((rnd >> 16) % 10) == 0) ? KMC_NOEMERGENCY : 0);
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
/* PAGE_SIZE - max_size */
get_random_bytes((void *)&rnd, sizeof (uint32_t));
size = MAX(rnd % (max_size + 1), PAGE_SIZE),
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
/* 2^N where (3 <= N <= PAGE_SHIFT) */
get_random_bytes((void *)&rnd, sizeof (uint32_t));
align = (1 << MAX(3, rnd % (PAGE_SHIFT + 1)));
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
rc = splat_kmem_cache_test(file, arg, name, size, align, flags);
if (rc)
return (rc);
}
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
return (rc);
}
/*
* Validate object alignment cache behavior for caches
*/
static int
splat_kmem_test7(struct file *file, void *arg)
{
char *name = SPLAT_KMEM_TEST7_NAME;
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
int max_size = (SPL_KMEM_CACHE_MAX_SIZE * 1024 * 1024) / 2;
int i, rc;
for (i = SPL_KMEM_CACHE_ALIGN; i <= PAGE_SIZE; i *= 2) {
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
uint32_t size;
get_random_bytes((void *)&size, sizeof (uint32_t));
size = MAX(size % (max_size + 1), 32);
rc = splat_kmem_cache_test(file, arg, name, size, i, 0);
if (rc)
return rc;
Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2014-12-15 22:06:18 +00:00
rc = splat_kmem_cache_test(file, arg, name, size, i,
KMC_OFFSLAB);
if (rc)
return rc;
}
return rc;
}
/*
* Validate kmem_cache_reap() by requesting the slab cache free any objects
* it can. For a few reasons this may not immediately result in more free
* memory even if objects are freed. First off, due to fragmentation we
* may not be able to reclaim any slabs. Secondly, even if we do we fully
* clear some slabs we will not want to immediately reclaim all of them
* because we may contend with cache allocations and thrash. What we want
* to see is the slab size decrease more gradually as it becomes clear they
* will not be needed. This should be achievable in less than a minute.
* If it takes longer than this something has gone wrong.
*/
static int
splat_kmem_test8(struct file *file, void *arg)
{
kmem_cache_priv_t *kcp;
kmem_cache_thread_t *kct;
unsigned int spl_kmem_cache_expire_old;
int i, rc = 0;
/* Enable cache aging just for this test if it is disabled */
spl_kmem_cache_expire_old = spl_kmem_cache_expire;
spl_kmem_cache_expire = KMC_EXPIRE_AGE;
kcp = splat_kmem_cache_test_kcp_alloc(file, SPLAT_KMEM_TEST8_NAME,
256, 0, 0);
if (!kcp) {
splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
"Unable to create '%s'\n", "kcp");
rc = -ENOMEM;
goto out;
}
kcp->kcp_cache =
kmem_cache_create(SPLAT_KMEM_CACHE_NAME, kcp->kcp_size, 0,
splat_kmem_cache_test_constructor,
splat_kmem_cache_test_destructor,
splat_kmem_cache_test_reclaim,
kcp, NULL, 0);
if (!kcp->kcp_cache) {
splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
"Unable to create '%s'\n", SPLAT_KMEM_CACHE_NAME);
rc = -ENOMEM;
goto out_kcp;
}
kct = splat_kmem_cache_test_kct_alloc(kcp, 0);
if (!kct) {
splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
"Unable to create '%s'\n", "kct");
rc = -ENOMEM;
goto out_cache;
}
rc = splat_kmem_cache_test_kcd_alloc(kcp, kct, SPLAT_KMEM_OBJ_COUNT);
if (rc) {
splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "Unable to "
"allocate from '%s'\n", SPLAT_KMEM_CACHE_NAME);
goto out_kct;
}
/* Force reclaim every 1/10 a second for 60 seconds. */
for (i = 0; i < 600; i++) {
kmem_cache_reap_now(kcp->kcp_cache);
splat_kmem_cache_test_debug(file, SPLAT_KMEM_TEST8_NAME, kcp);
Add KMC_SLAB cache type For small objects the Linux slab allocator has several advantages over its counterpart in the SPL. These include: 1) It is more memory-efficient and packs objects more tightly. 2) It is continually tuned to maximize performance. Therefore it makes sense to layer the SPLs slab allocator on top of the Linux slab allocator. This allows us to leverage the advantages above while preserving the Illumos semantics we depend on. However, there are some things we need to be careful of: 1) The Linux slab allocator was never designed to work well with large objects. Because the SPL slab must still handle this use case a cut off limit was added to transition from Linux slab backed objects to kmem or vmem backed slabs. spl_kmem_cache_slab_limit - Objects less than or equal to this size in bytes will be backed by the Linux slab. By default this value is zero which disables the Linux slab functionality. Reasonable values for this cut off limit are in the range of 4096-16386 bytes. spl_kmem_cache_kmem_limit - Objects less than or equal to this size in bytes will be backed by a kmem slab. Objects over this size will be vmem backed instead. This value defaults to 1/8 a page, or 512 bytes on an x86_64 architecture. 2) Be aware that using the Linux slab may inadvertently introduce new deadlocks. Care has been taken previously to ensure that all allocations which occur in the write path use GFP_NOIO. However, there may be internal allocations performed in the Linux slab which do not honor these flags. If this is the case a deadlock may occur. The path forward is definitely to start relying on the Linux slab. But for that to happen we need to start building confidence that there aren't any unexpected surprises lurking for us. And ideally need to move completely away from using the SPLs slab for large memory allocations. This patch is a first step. NOTES: 1) The KMC_NOMAGAZINE flag was leveraged to support the Linux slab backed caches but it is not supported for kmem/vmem backed caches. 2) Regardless of the spl_kmem_cache_*_limit settings a cache may be explicitly set to a given type by passed the KMC_KMEM, KMC_VMEM, or KMC_SLAB flags during cache creation. 3) The constructors, destructors, and reclaim callbacks are all functional and will be called regardless of the cache type. 4) KMC_SLAB caches will not appear in /proc/spl/kmem/slab due to the issues involved in presenting correct object accounting. Instead they will appear in /proc/slabinfo under the same names. 5) Several kmem SPLAT tests needed to be fixed because they relied incorrectly on internal kmem slab accounting. With the updated test cases all the SPLAT tests pass as expected. 6) An autoconf test was added to ensure that the __GFP_COMP flag was correctly added to the default flags used when allocating a slab. This is required to ensure all pages in higher order slabs are properly refcounted, see ae16ed9. 7) When using the SLUB allocator there is no need to attempt to set the __GFP_COMP flag. This has been the default behavior for the SLUB since Linux 2.6.25. 8) When using the SLUB it may be desirable to set the slub_nomerge kernel parameter to prevent caches from being merged. Original-patch-by: DHE <git@dehacked.net> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Prakash Surya <surya1@llnl.gov> Signed-off-by: Tim Chase <tim@chase2k.com> Signed-off-by: DHE <git@dehacked.net> Signed-off-by: Chunwei Chen <tuxoko@gmail.com> Closes #356
2013-12-08 22:01:45 +00:00
if (kcp->kcp_count == 0)
break;
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(HZ / 10);
}
Add KMC_SLAB cache type For small objects the Linux slab allocator has several advantages over its counterpart in the SPL. These include: 1) It is more memory-efficient and packs objects more tightly. 2) It is continually tuned to maximize performance. Therefore it makes sense to layer the SPLs slab allocator on top of the Linux slab allocator. This allows us to leverage the advantages above while preserving the Illumos semantics we depend on. However, there are some things we need to be careful of: 1) The Linux slab allocator was never designed to work well with large objects. Because the SPL slab must still handle this use case a cut off limit was added to transition from Linux slab backed objects to kmem or vmem backed slabs. spl_kmem_cache_slab_limit - Objects less than or equal to this size in bytes will be backed by the Linux slab. By default this value is zero which disables the Linux slab functionality. Reasonable values for this cut off limit are in the range of 4096-16386 bytes. spl_kmem_cache_kmem_limit - Objects less than or equal to this size in bytes will be backed by a kmem slab. Objects over this size will be vmem backed instead. This value defaults to 1/8 a page, or 512 bytes on an x86_64 architecture. 2) Be aware that using the Linux slab may inadvertently introduce new deadlocks. Care has been taken previously to ensure that all allocations which occur in the write path use GFP_NOIO. However, there may be internal allocations performed in the Linux slab which do not honor these flags. If this is the case a deadlock may occur. The path forward is definitely to start relying on the Linux slab. But for that to happen we need to start building confidence that there aren't any unexpected surprises lurking for us. And ideally need to move completely away from using the SPLs slab for large memory allocations. This patch is a first step. NOTES: 1) The KMC_NOMAGAZINE flag was leveraged to support the Linux slab backed caches but it is not supported for kmem/vmem backed caches. 2) Regardless of the spl_kmem_cache_*_limit settings a cache may be explicitly set to a given type by passed the KMC_KMEM, KMC_VMEM, or KMC_SLAB flags during cache creation. 3) The constructors, destructors, and reclaim callbacks are all functional and will be called regardless of the cache type. 4) KMC_SLAB caches will not appear in /proc/spl/kmem/slab due to the issues involved in presenting correct object accounting. Instead they will appear in /proc/slabinfo under the same names. 5) Several kmem SPLAT tests needed to be fixed because they relied incorrectly on internal kmem slab accounting. With the updated test cases all the SPLAT tests pass as expected. 6) An autoconf test was added to ensure that the __GFP_COMP flag was correctly added to the default flags used when allocating a slab. This is required to ensure all pages in higher order slabs are properly refcounted, see ae16ed9. 7) When using the SLUB allocator there is no need to attempt to set the __GFP_COMP flag. This has been the default behavior for the SLUB since Linux 2.6.25. 8) When using the SLUB it may be desirable to set the slub_nomerge kernel parameter to prevent caches from being merged. Original-patch-by: DHE <git@dehacked.net> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Prakash Surya <surya1@llnl.gov> Signed-off-by: Tim Chase <tim@chase2k.com> Signed-off-by: DHE <git@dehacked.net> Signed-off-by: Chunwei Chen <tuxoko@gmail.com> Closes #356
2013-12-08 22:01:45 +00:00
if (kcp->kcp_count == 0) {
splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
"Successfully created %d objects "
"in cache %s and reclaimed them\n",
SPLAT_KMEM_OBJ_COUNT, SPLAT_KMEM_CACHE_NAME);
} else {
splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
"Failed to reclaim %u/%d objects from cache %s\n",
Add KMC_SLAB cache type For small objects the Linux slab allocator has several advantages over its counterpart in the SPL. These include: 1) It is more memory-efficient and packs objects more tightly. 2) It is continually tuned to maximize performance. Therefore it makes sense to layer the SPLs slab allocator on top of the Linux slab allocator. This allows us to leverage the advantages above while preserving the Illumos semantics we depend on. However, there are some things we need to be careful of: 1) The Linux slab allocator was never designed to work well with large objects. Because the SPL slab must still handle this use case a cut off limit was added to transition from Linux slab backed objects to kmem or vmem backed slabs. spl_kmem_cache_slab_limit - Objects less than or equal to this size in bytes will be backed by the Linux slab. By default this value is zero which disables the Linux slab functionality. Reasonable values for this cut off limit are in the range of 4096-16386 bytes. spl_kmem_cache_kmem_limit - Objects less than or equal to this size in bytes will be backed by a kmem slab. Objects over this size will be vmem backed instead. This value defaults to 1/8 a page, or 512 bytes on an x86_64 architecture. 2) Be aware that using the Linux slab may inadvertently introduce new deadlocks. Care has been taken previously to ensure that all allocations which occur in the write path use GFP_NOIO. However, there may be internal allocations performed in the Linux slab which do not honor these flags. If this is the case a deadlock may occur. The path forward is definitely to start relying on the Linux slab. But for that to happen we need to start building confidence that there aren't any unexpected surprises lurking for us. And ideally need to move completely away from using the SPLs slab for large memory allocations. This patch is a first step. NOTES: 1) The KMC_NOMAGAZINE flag was leveraged to support the Linux slab backed caches but it is not supported for kmem/vmem backed caches. 2) Regardless of the spl_kmem_cache_*_limit settings a cache may be explicitly set to a given type by passed the KMC_KMEM, KMC_VMEM, or KMC_SLAB flags during cache creation. 3) The constructors, destructors, and reclaim callbacks are all functional and will be called regardless of the cache type. 4) KMC_SLAB caches will not appear in /proc/spl/kmem/slab due to the issues involved in presenting correct object accounting. Instead they will appear in /proc/slabinfo under the same names. 5) Several kmem SPLAT tests needed to be fixed because they relied incorrectly on internal kmem slab accounting. With the updated test cases all the SPLAT tests pass as expected. 6) An autoconf test was added to ensure that the __GFP_COMP flag was correctly added to the default flags used when allocating a slab. This is required to ensure all pages in higher order slabs are properly refcounted, see ae16ed9. 7) When using the SLUB allocator there is no need to attempt to set the __GFP_COMP flag. This has been the default behavior for the SLUB since Linux 2.6.25. 8) When using the SLUB it may be desirable to set the slub_nomerge kernel parameter to prevent caches from being merged. Original-patch-by: DHE <git@dehacked.net> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Prakash Surya <surya1@llnl.gov> Signed-off-by: Tim Chase <tim@chase2k.com> Signed-off-by: DHE <git@dehacked.net> Signed-off-by: Chunwei Chen <tuxoko@gmail.com> Closes #356
2013-12-08 22:01:45 +00:00
(unsigned)kcp->kcp_count,
SPLAT_KMEM_OBJ_COUNT, SPLAT_KMEM_CACHE_NAME);
rc = -ENOMEM;
}
/* Cleanup our mess (for failure case of time expiring) */
splat_kmem_cache_test_kcd_free(kcp, kct);
out_kct:
splat_kmem_cache_test_kct_free(kcp, kct);
out_cache:
kmem_cache_destroy(kcp->kcp_cache);
out_kcp:
splat_kmem_cache_test_kcp_free(kcp);
out:
spl_kmem_cache_expire = spl_kmem_cache_expire_old;
return rc;
}
/* Test cache aging, we have allocated a large number of objects thus
* creating a large number of slabs and then free'd them all. However,
* since there should be little memory pressure at the moment those
* slabs have not been freed. What we want to see is the slab size
* decrease gradually as it becomes clear they will not be be needed.
* This should be achievable in less than minute. If it takes longer
* than this something has gone wrong.
*/
static int
splat_kmem_test9(struct file *file, void *arg)
{
kmem_cache_priv_t *kcp;
kmem_cache_thread_t *kct;
unsigned int spl_kmem_cache_expire_old;
int i, rc = 0, count = SPLAT_KMEM_OBJ_COUNT * 128;
/* Enable cache aging just for this test if it is disabled */
spl_kmem_cache_expire_old = spl_kmem_cache_expire;
spl_kmem_cache_expire = KMC_EXPIRE_AGE;
kcp = splat_kmem_cache_test_kcp_alloc(file, SPLAT_KMEM_TEST9_NAME,
256, 0, 0);
if (!kcp) {
splat_vprint(file, SPLAT_KMEM_TEST9_NAME,
"Unable to create '%s'\n", "kcp");
rc = -ENOMEM;
goto out;
}
kcp->kcp_cache =
kmem_cache_create(SPLAT_KMEM_CACHE_NAME, kcp->kcp_size, 0,
splat_kmem_cache_test_constructor,
splat_kmem_cache_test_destructor,
NULL, kcp, NULL, 0);
if (!kcp->kcp_cache) {
splat_vprint(file, SPLAT_KMEM_TEST9_NAME,
"Unable to create '%s'\n", SPLAT_KMEM_CACHE_NAME);
rc = -ENOMEM;
goto out_kcp;
}
kct = splat_kmem_cache_test_kct_alloc(kcp, 0);
if (!kct) {
splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
"Unable to create '%s'\n", "kct");
rc = -ENOMEM;
goto out_cache;
}
rc = splat_kmem_cache_test_kcd_alloc(kcp, kct, count);
if (rc) {
splat_vprint(file, SPLAT_KMEM_TEST9_NAME, "Unable to "
"allocate from '%s'\n", SPLAT_KMEM_CACHE_NAME);
goto out_kct;
}
splat_kmem_cache_test_kcd_free(kcp, kct);
for (i = 0; i < 60; i++) {
splat_kmem_cache_test_debug(file, SPLAT_KMEM_TEST9_NAME, kcp);
Add KMC_SLAB cache type For small objects the Linux slab allocator has several advantages over its counterpart in the SPL. These include: 1) It is more memory-efficient and packs objects more tightly. 2) It is continually tuned to maximize performance. Therefore it makes sense to layer the SPLs slab allocator on top of the Linux slab allocator. This allows us to leverage the advantages above while preserving the Illumos semantics we depend on. However, there are some things we need to be careful of: 1) The Linux slab allocator was never designed to work well with large objects. Because the SPL slab must still handle this use case a cut off limit was added to transition from Linux slab backed objects to kmem or vmem backed slabs. spl_kmem_cache_slab_limit - Objects less than or equal to this size in bytes will be backed by the Linux slab. By default this value is zero which disables the Linux slab functionality. Reasonable values for this cut off limit are in the range of 4096-16386 bytes. spl_kmem_cache_kmem_limit - Objects less than or equal to this size in bytes will be backed by a kmem slab. Objects over this size will be vmem backed instead. This value defaults to 1/8 a page, or 512 bytes on an x86_64 architecture. 2) Be aware that using the Linux slab may inadvertently introduce new deadlocks. Care has been taken previously to ensure that all allocations which occur in the write path use GFP_NOIO. However, there may be internal allocations performed in the Linux slab which do not honor these flags. If this is the case a deadlock may occur. The path forward is definitely to start relying on the Linux slab. But for that to happen we need to start building confidence that there aren't any unexpected surprises lurking for us. And ideally need to move completely away from using the SPLs slab for large memory allocations. This patch is a first step. NOTES: 1) The KMC_NOMAGAZINE flag was leveraged to support the Linux slab backed caches but it is not supported for kmem/vmem backed caches. 2) Regardless of the spl_kmem_cache_*_limit settings a cache may be explicitly set to a given type by passed the KMC_KMEM, KMC_VMEM, or KMC_SLAB flags during cache creation. 3) The constructors, destructors, and reclaim callbacks are all functional and will be called regardless of the cache type. 4) KMC_SLAB caches will not appear in /proc/spl/kmem/slab due to the issues involved in presenting correct object accounting. Instead they will appear in /proc/slabinfo under the same names. 5) Several kmem SPLAT tests needed to be fixed because they relied incorrectly on internal kmem slab accounting. With the updated test cases all the SPLAT tests pass as expected. 6) An autoconf test was added to ensure that the __GFP_COMP flag was correctly added to the default flags used when allocating a slab. This is required to ensure all pages in higher order slabs are properly refcounted, see ae16ed9. 7) When using the SLUB allocator there is no need to attempt to set the __GFP_COMP flag. This has been the default behavior for the SLUB since Linux 2.6.25. 8) When using the SLUB it may be desirable to set the slub_nomerge kernel parameter to prevent caches from being merged. Original-patch-by: DHE <git@dehacked.net> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Prakash Surya <surya1@llnl.gov> Signed-off-by: Tim Chase <tim@chase2k.com> Signed-off-by: DHE <git@dehacked.net> Signed-off-by: Chunwei Chen <tuxoko@gmail.com> Closes #356
2013-12-08 22:01:45 +00:00
if (kcp->kcp_count == 0)
break;
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(HZ);
}
Add KMC_SLAB cache type For small objects the Linux slab allocator has several advantages over its counterpart in the SPL. These include: 1) It is more memory-efficient and packs objects more tightly. 2) It is continually tuned to maximize performance. Therefore it makes sense to layer the SPLs slab allocator on top of the Linux slab allocator. This allows us to leverage the advantages above while preserving the Illumos semantics we depend on. However, there are some things we need to be careful of: 1) The Linux slab allocator was never designed to work well with large objects. Because the SPL slab must still handle this use case a cut off limit was added to transition from Linux slab backed objects to kmem or vmem backed slabs. spl_kmem_cache_slab_limit - Objects less than or equal to this size in bytes will be backed by the Linux slab. By default this value is zero which disables the Linux slab functionality. Reasonable values for this cut off limit are in the range of 4096-16386 bytes. spl_kmem_cache_kmem_limit - Objects less than or equal to this size in bytes will be backed by a kmem slab. Objects over this size will be vmem backed instead. This value defaults to 1/8 a page, or 512 bytes on an x86_64 architecture. 2) Be aware that using the Linux slab may inadvertently introduce new deadlocks. Care has been taken previously to ensure that all allocations which occur in the write path use GFP_NOIO. However, there may be internal allocations performed in the Linux slab which do not honor these flags. If this is the case a deadlock may occur. The path forward is definitely to start relying on the Linux slab. But for that to happen we need to start building confidence that there aren't any unexpected surprises lurking for us. And ideally need to move completely away from using the SPLs slab for large memory allocations. This patch is a first step. NOTES: 1) The KMC_NOMAGAZINE flag was leveraged to support the Linux slab backed caches but it is not supported for kmem/vmem backed caches. 2) Regardless of the spl_kmem_cache_*_limit settings a cache may be explicitly set to a given type by passed the KMC_KMEM, KMC_VMEM, or KMC_SLAB flags during cache creation. 3) The constructors, destructors, and reclaim callbacks are all functional and will be called regardless of the cache type. 4) KMC_SLAB caches will not appear in /proc/spl/kmem/slab due to the issues involved in presenting correct object accounting. Instead they will appear in /proc/slabinfo under the same names. 5) Several kmem SPLAT tests needed to be fixed because they relied incorrectly on internal kmem slab accounting. With the updated test cases all the SPLAT tests pass as expected. 6) An autoconf test was added to ensure that the __GFP_COMP flag was correctly added to the default flags used when allocating a slab. This is required to ensure all pages in higher order slabs are properly refcounted, see ae16ed9. 7) When using the SLUB allocator there is no need to attempt to set the __GFP_COMP flag. This has been the default behavior for the SLUB since Linux 2.6.25. 8) When using the SLUB it may be desirable to set the slub_nomerge kernel parameter to prevent caches from being merged. Original-patch-by: DHE <git@dehacked.net> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Prakash Surya <surya1@llnl.gov> Signed-off-by: Tim Chase <tim@chase2k.com> Signed-off-by: DHE <git@dehacked.net> Signed-off-by: Chunwei Chen <tuxoko@gmail.com> Closes #356
2013-12-08 22:01:45 +00:00
if (kcp->kcp_count == 0) {
splat_vprint(file, SPLAT_KMEM_TEST9_NAME,
"Successfully created %d objects "
"in cache %s and reclaimed them\n",
count, SPLAT_KMEM_CACHE_NAME);
} else {
splat_vprint(file, SPLAT_KMEM_TEST9_NAME,
"Failed to reclaim %u/%d objects from cache %s\n",
Add KMC_SLAB cache type For small objects the Linux slab allocator has several advantages over its counterpart in the SPL. These include: 1) It is more memory-efficient and packs objects more tightly. 2) It is continually tuned to maximize performance. Therefore it makes sense to layer the SPLs slab allocator on top of the Linux slab allocator. This allows us to leverage the advantages above while preserving the Illumos semantics we depend on. However, there are some things we need to be careful of: 1) The Linux slab allocator was never designed to work well with large objects. Because the SPL slab must still handle this use case a cut off limit was added to transition from Linux slab backed objects to kmem or vmem backed slabs. spl_kmem_cache_slab_limit - Objects less than or equal to this size in bytes will be backed by the Linux slab. By default this value is zero which disables the Linux slab functionality. Reasonable values for this cut off limit are in the range of 4096-16386 bytes. spl_kmem_cache_kmem_limit - Objects less than or equal to this size in bytes will be backed by a kmem slab. Objects over this size will be vmem backed instead. This value defaults to 1/8 a page, or 512 bytes on an x86_64 architecture. 2) Be aware that using the Linux slab may inadvertently introduce new deadlocks. Care has been taken previously to ensure that all allocations which occur in the write path use GFP_NOIO. However, there may be internal allocations performed in the Linux slab which do not honor these flags. If this is the case a deadlock may occur. The path forward is definitely to start relying on the Linux slab. But for that to happen we need to start building confidence that there aren't any unexpected surprises lurking for us. And ideally need to move completely away from using the SPLs slab for large memory allocations. This patch is a first step. NOTES: 1) The KMC_NOMAGAZINE flag was leveraged to support the Linux slab backed caches but it is not supported for kmem/vmem backed caches. 2) Regardless of the spl_kmem_cache_*_limit settings a cache may be explicitly set to a given type by passed the KMC_KMEM, KMC_VMEM, or KMC_SLAB flags during cache creation. 3) The constructors, destructors, and reclaim callbacks are all functional and will be called regardless of the cache type. 4) KMC_SLAB caches will not appear in /proc/spl/kmem/slab due to the issues involved in presenting correct object accounting. Instead they will appear in /proc/slabinfo under the same names. 5) Several kmem SPLAT tests needed to be fixed because they relied incorrectly on internal kmem slab accounting. With the updated test cases all the SPLAT tests pass as expected. 6) An autoconf test was added to ensure that the __GFP_COMP flag was correctly added to the default flags used when allocating a slab. This is required to ensure all pages in higher order slabs are properly refcounted, see ae16ed9. 7) When using the SLUB allocator there is no need to attempt to set the __GFP_COMP flag. This has been the default behavior for the SLUB since Linux 2.6.25. 8) When using the SLUB it may be desirable to set the slub_nomerge kernel parameter to prevent caches from being merged. Original-patch-by: DHE <git@dehacked.net> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Prakash Surya <surya1@llnl.gov> Signed-off-by: Tim Chase <tim@chase2k.com> Signed-off-by: DHE <git@dehacked.net> Signed-off-by: Chunwei Chen <tuxoko@gmail.com> Closes #356
2013-12-08 22:01:45 +00:00
(unsigned)kcp->kcp_count, count,
SPLAT_KMEM_CACHE_NAME);
rc = -ENOMEM;
}
out_kct:
splat_kmem_cache_test_kct_free(kcp, kct);
out_cache:
kmem_cache_destroy(kcp->kcp_cache);
out_kcp:
splat_kmem_cache_test_kcp_free(kcp);
out:
spl_kmem_cache_expire = spl_kmem_cache_expire_old;
return rc;
}
/*
* This test creates N threads with a shared kmem cache. They then all
* concurrently allocate and free from the cache to stress the locking and
* concurrent cache performance. If any one test takes longer than 5
* seconds to complete it is treated as a failure and may indicate a
* performance regression. On my test system no one test takes more
* than 1 second to complete so a 5x slowdown likely a problem.
*/
static int
splat_kmem_test10(struct file *file, void *arg)
{
uint64_t size, alloc, maxsize, limit, rc = 0;
#if defined(CONFIG_64BIT)
maxsize = (1024 * 1024);
#else
maxsize = (128 * 1024);
#endif
for (size = 32; size <= maxsize; size *= 2) {
splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s %s", "name",
"time (sec)\tslabs \tobjs \thash\n");
splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s %s", "",
" \ttot/max/calc\ttot/max/calc\n");
for (alloc = 1; alloc <= 1024; alloc *= 2) {
/* Skip tests which exceed 1/2 of memory. */
limit = MIN(physmem * PAGE_SIZE,
vmem_size(NULL, VMEM_ALLOC | VMEM_FREE)) / 2;
if (size * alloc * SPLAT_KMEM_THREADS > limit)
continue;
rc = splat_kmem_cache_thread_test(file, arg,
2009-01-31 05:24:42 +00:00
SPLAT_KMEM_TEST10_NAME, size, alloc, 5);
if (rc)
break;
}
}
return rc;
}
#if 0
/*
* This test creates N threads with a shared kmem cache which overcommits
* memory by 4x. This makes it impossible for the slab to satify the
* thread requirements without having its reclaim hook run which will
* free objects back for use. This behavior is triggered by the linum VM
* detecting a low memory condition on the node and invoking the shrinkers.
* This should allow all the threads to complete while avoiding deadlock
* and for the most part out of memory events. This is very tough on the
* system so it is possible the test app may get oom'ed. This particular
* test has proven troublesome on 32-bit archs with limited virtual
* address space so it only run on 64-bit systems.
*/
static int
splat_kmem_test11(struct file *file, void *arg)
{
uint64_t size, alloc, rc;
size = 8 * 1024;
FC10/i686 Compatibility Update (2.6.27.19-170.2.35.fc10.i686) In the interests of portability I have added a FC10/i686 box to my list of development platforms. The hope is this will allow me to keep current with upstream kernel API changes, and at the same time ensure I don't accidentally break x86 support. This patch resolves all remaining issues observed under that environment. 1) SPL_AC_ZONE_STAT_ITEM_FIA autoconf check added. As of 2.6.21 the kernel added a clean API for modules to get the global count for free, inactive, and active pages. The SPL attempts to detect if this API is available and directly map spl_global_page_state() to global_page_state(). If the full API is not available then spl_global_page_state() is implemented as a thin layer to get these values via get_zone_counts() if that symbol is available. 2) New kmem:vmem_size regression test added to validate correct vmem_size() functionality. The test case acquires the current global vmem state, allocates from the vmem region, then verifies the allocation is correctly reflected in the vmem_size() stats. 3) Change splat_kmem_cache_thread_test() to always use KMC_KMEM based memory. On x86 systems with limited virtual address space failures resulted due to exhaustig the address space. The tests really need to problem exhausting all memory on the system thus we need to use the physical address space. 4) Change kmem:slab_lock to cap it's memory usage at availrmem instead of using the native linux nr_free_pages(). This provides additional test coverage of the SPL Linux VM integration. 5) Change kmem:slab_overcommit to perform allocation of 256K instead of 1M. On x86 based systems it is not possible to create a kmem backed slab with entires of that size. To compensate for this the number of allocations performed in increased by 4x. 6) Additional autoconf documentation for proposed upstream API changes to make additional symbols available to modules. 7) Console error messages added when spl_kallsyms_lookup_name() fails to locate an expected symbol. This causes the module to fail to load and we need to know exactly which symbol was not available.
2009-03-17 19:16:31 +00:00
alloc = ((4 * physmem * PAGE_SIZE) / size) / SPLAT_KMEM_THREADS;
FC10/i686 Compatibility Update (2.6.27.19-170.2.35.fc10.i686) In the interests of portability I have added a FC10/i686 box to my list of development platforms. The hope is this will allow me to keep current with upstream kernel API changes, and at the same time ensure I don't accidentally break x86 support. This patch resolves all remaining issues observed under that environment. 1) SPL_AC_ZONE_STAT_ITEM_FIA autoconf check added. As of 2.6.21 the kernel added a clean API for modules to get the global count for free, inactive, and active pages. The SPL attempts to detect if this API is available and directly map spl_global_page_state() to global_page_state(). If the full API is not available then spl_global_page_state() is implemented as a thin layer to get these values via get_zone_counts() if that symbol is available. 2) New kmem:vmem_size regression test added to validate correct vmem_size() functionality. The test case acquires the current global vmem state, allocates from the vmem region, then verifies the allocation is correctly reflected in the vmem_size() stats. 3) Change splat_kmem_cache_thread_test() to always use KMC_KMEM based memory. On x86 systems with limited virtual address space failures resulted due to exhaustig the address space. The tests really need to problem exhausting all memory on the system thus we need to use the physical address space. 4) Change kmem:slab_lock to cap it's memory usage at availrmem instead of using the native linux nr_free_pages(). This provides additional test coverage of the SPL Linux VM integration. 5) Change kmem:slab_overcommit to perform allocation of 256K instead of 1M. On x86 based systems it is not possible to create a kmem backed slab with entires of that size. To compensate for this the number of allocations performed in increased by 4x. 6) Additional autoconf documentation for proposed upstream API changes to make additional symbols available to modules. 7) Console error messages added when spl_kallsyms_lookup_name() fails to locate an expected symbol. This causes the module to fail to load and we need to know exactly which symbol was not available.
2009-03-17 19:16:31 +00:00
splat_vprint(file, SPLAT_KMEM_TEST11_NAME, "%-22s %s", "name",
"time (sec)\tslabs \tobjs \thash\n");
FC10/i686 Compatibility Update (2.6.27.19-170.2.35.fc10.i686) In the interests of portability I have added a FC10/i686 box to my list of development platforms. The hope is this will allow me to keep current with upstream kernel API changes, and at the same time ensure I don't accidentally break x86 support. This patch resolves all remaining issues observed under that environment. 1) SPL_AC_ZONE_STAT_ITEM_FIA autoconf check added. As of 2.6.21 the kernel added a clean API for modules to get the global count for free, inactive, and active pages. The SPL attempts to detect if this API is available and directly map spl_global_page_state() to global_page_state(). If the full API is not available then spl_global_page_state() is implemented as a thin layer to get these values via get_zone_counts() if that symbol is available. 2) New kmem:vmem_size regression test added to validate correct vmem_size() functionality. The test case acquires the current global vmem state, allocates from the vmem region, then verifies the allocation is correctly reflected in the vmem_size() stats. 3) Change splat_kmem_cache_thread_test() to always use KMC_KMEM based memory. On x86 systems with limited virtual address space failures resulted due to exhaustig the address space. The tests really need to problem exhausting all memory on the system thus we need to use the physical address space. 4) Change kmem:slab_lock to cap it's memory usage at availrmem instead of using the native linux nr_free_pages(). This provides additional test coverage of the SPL Linux VM integration. 5) Change kmem:slab_overcommit to perform allocation of 256K instead of 1M. On x86 based systems it is not possible to create a kmem backed slab with entires of that size. To compensate for this the number of allocations performed in increased by 4x. 6) Additional autoconf documentation for proposed upstream API changes to make additional symbols available to modules. 7) Console error messages added when spl_kallsyms_lookup_name() fails to locate an expected symbol. This causes the module to fail to load and we need to know exactly which symbol was not available.
2009-03-17 19:16:31 +00:00
splat_vprint(file, SPLAT_KMEM_TEST11_NAME, "%-22s %s", "",
" \ttot/max/calc\ttot/max/calc\n");
rc = splat_kmem_cache_thread_test(file, arg,
2009-01-31 05:24:42 +00:00
SPLAT_KMEM_TEST11_NAME, size, alloc, 60);
return rc;
}
#endif
typedef struct dummy_page {
struct list_head dp_list;
char dp_pad[PAGE_SIZE - sizeof(struct list_head)];
} dummy_page_t;
/*
* This test is designed to verify that direct reclaim is functioning as
* expected. We allocate a large number of objects thus creating a large
* number of slabs. We then apply memory pressure and expect that the
* direct reclaim path can easily recover those slabs. The registered
* reclaim function will free the objects and the slab shrinker will call
* it repeatedly until at least a single slab can be freed.
*
* Note it may not be possible to reclaim every last slab via direct reclaim
* without a failure because the shrinker_rwsem may be contended. For this
* reason, quickly reclaiming 3/4 of the slabs is considered a success.
*
* This should all be possible within 10 seconds. For reference, on a
* system with 2G of memory this test takes roughly 0.2 seconds to run.
* It may take longer on larger memory systems but should still easily
* complete in the alloted 10 seconds.
*/
static int
splat_kmem_test13(struct file *file, void *arg)
{
kmem_cache_priv_t *kcp;
kmem_cache_thread_t *kct;
dummy_page_t *dp;
struct list_head list;
struct timespec start, stop, delta = { 0, 0 };
int size, count, slabs, fails = 0;
int i, rc = 0, max_time = 10;
size = 128 * 1024;
count = MIN(physmem * PAGE_SIZE, vmem_size(NULL,
VMEM_ALLOC | VMEM_FREE)) / 4 / size;
kcp = splat_kmem_cache_test_kcp_alloc(file, SPLAT_KMEM_TEST13_NAME,
size, 0, 0);
if (!kcp) {
splat_vprint(file, SPLAT_KMEM_TEST13_NAME,
"Unable to create '%s'\n", "kcp");
rc = -ENOMEM;
goto out;
}
kcp->kcp_cache =
kmem_cache_create(SPLAT_KMEM_CACHE_NAME, kcp->kcp_size, 0,
splat_kmem_cache_test_constructor,
splat_kmem_cache_test_destructor,
splat_kmem_cache_test_reclaim,
kcp, NULL, 0);
if (!kcp->kcp_cache) {
splat_vprint(file, SPLAT_KMEM_TEST13_NAME,
"Unable to create '%s'\n", SPLAT_KMEM_CACHE_NAME);
rc = -ENOMEM;
goto out_kcp;
}
kct = splat_kmem_cache_test_kct_alloc(kcp, 0);
if (!kct) {
splat_vprint(file, SPLAT_KMEM_TEST13_NAME,
"Unable to create '%s'\n", "kct");
rc = -ENOMEM;
goto out_cache;
}
rc = splat_kmem_cache_test_kcd_alloc(kcp, kct, count);
if (rc) {
splat_vprint(file, SPLAT_KMEM_TEST13_NAME, "Unable to "
"allocate from '%s'\n", SPLAT_KMEM_CACHE_NAME);
goto out_kct;
}
i = 0;
slabs = kcp->kcp_cache->skc_slab_total;
INIT_LIST_HEAD(&list);
getnstimeofday(&start);
/* Apply memory pressure */
while (kcp->kcp_cache->skc_slab_total > (slabs >> 2)) {
if ((i % 10000) == 0)
splat_kmem_cache_test_debug(
file, SPLAT_KMEM_TEST13_NAME, kcp);
getnstimeofday(&stop);
delta = timespec_sub(stop, start);
if (delta.tv_sec >= max_time) {
splat_vprint(file, SPLAT_KMEM_TEST13_NAME,
"Failed to reclaim 3/4 of cache in %ds, "
"%u/%u slabs remain\n", max_time,
(unsigned)kcp->kcp_cache->skc_slab_total,
slabs);
rc = -ETIME;
break;
}
dp = (dummy_page_t *)__get_free_page(GFP_KERNEL);
if (!dp) {
fails++;
splat_vprint(file, SPLAT_KMEM_TEST13_NAME,
"Failed (%d) to allocate page with %u "
"slabs still in the cache\n", fails,
(unsigned)kcp->kcp_cache->skc_slab_total);
continue;
}
list_add(&dp->dp_list, &list);
i++;
}
if (rc == 0)
splat_vprint(file, SPLAT_KMEM_TEST13_NAME,
"Successfully created %u slabs and with %d alloc "
"failures reclaimed 3/4 of them in %d.%03ds\n",
slabs, fails,
(int)delta.tv_sec, (int)delta.tv_nsec / 1000000);
/* Release memory pressure pages */
while (!list_empty(&list)) {
dp = list_entry(list.next, dummy_page_t, dp_list);
list_del_init(&dp->dp_list);
free_page((unsigned long)dp);
}
/* Release remaining kmem cache objects */
splat_kmem_cache_test_kcd_free(kcp, kct);
out_kct:
splat_kmem_cache_test_kct_free(kcp, kct);
out_cache:
kmem_cache_destroy(kcp->kcp_cache);
out_kcp:
splat_kmem_cache_test_kcp_free(kcp);
out:
return rc;
}
splat_subsystem_t *
splat_kmem_init(void)
{
splat_subsystem_t *sub;
sub = kmalloc(sizeof(*sub), GFP_KERNEL);
if (sub == NULL)
return NULL;
memset(sub, 0, sizeof(*sub));
strncpy(sub->desc.name, SPLAT_KMEM_NAME, SPLAT_NAME_SIZE);
strncpy(sub->desc.desc, SPLAT_KMEM_DESC, SPLAT_DESC_SIZE);
INIT_LIST_HEAD(&sub->subsystem_list);
INIT_LIST_HEAD(&sub->test_list);
spin_lock_init(&sub->test_lock);
sub->desc.id = SPLAT_SUBSYSTEM_KMEM;
splat_test_init(sub, SPLAT_KMEM_TEST1_NAME, SPLAT_KMEM_TEST1_DESC,
SPLAT_KMEM_TEST1_ID, splat_kmem_test1);
splat_test_init(sub, SPLAT_KMEM_TEST2_NAME, SPLAT_KMEM_TEST2_DESC,
SPLAT_KMEM_TEST2_ID, splat_kmem_test2);
splat_test_init(sub, SPLAT_KMEM_TEST3_NAME, SPLAT_KMEM_TEST3_DESC,
SPLAT_KMEM_TEST3_ID, splat_kmem_test3);
splat_test_init(sub, SPLAT_KMEM_TEST4_NAME, SPLAT_KMEM_TEST4_DESC,
SPLAT_KMEM_TEST4_ID, splat_kmem_test4);
splat_test_init(sub, SPLAT_KMEM_TEST5_NAME, SPLAT_KMEM_TEST5_DESC,
SPLAT_KMEM_TEST5_ID, splat_kmem_test5);
splat_test_init(sub, SPLAT_KMEM_TEST6_NAME, SPLAT_KMEM_TEST6_DESC,
SPLAT_KMEM_TEST6_ID, splat_kmem_test6);
splat_test_init(sub, SPLAT_KMEM_TEST7_NAME, SPLAT_KMEM_TEST7_DESC,
SPLAT_KMEM_TEST7_ID, splat_kmem_test7);
splat_test_init(sub, SPLAT_KMEM_TEST8_NAME, SPLAT_KMEM_TEST8_DESC,
SPLAT_KMEM_TEST8_ID, splat_kmem_test8);
splat_test_init(sub, SPLAT_KMEM_TEST9_NAME, SPLAT_KMEM_TEST9_DESC,
SPLAT_KMEM_TEST9_ID, splat_kmem_test9);
splat_test_init(sub, SPLAT_KMEM_TEST10_NAME, SPLAT_KMEM_TEST10_DESC,
SPLAT_KMEM_TEST10_ID, splat_kmem_test10);
#if 0
splat_test_init(sub, SPLAT_KMEM_TEST11_NAME, SPLAT_KMEM_TEST11_DESC,
SPLAT_KMEM_TEST11_ID, splat_kmem_test11);
#endif
splat_test_init(sub, SPLAT_KMEM_TEST13_NAME, SPLAT_KMEM_TEST13_DESC,
SPLAT_KMEM_TEST13_ID, splat_kmem_test13);
return sub;
}
void
splat_kmem_fini(splat_subsystem_t *sub)
{
ASSERT(sub);
splat_test_fini(sub, SPLAT_KMEM_TEST13_ID);
#if 0
splat_test_fini(sub, SPLAT_KMEM_TEST11_ID);
#endif
splat_test_fini(sub, SPLAT_KMEM_TEST10_ID);
splat_test_fini(sub, SPLAT_KMEM_TEST9_ID);
splat_test_fini(sub, SPLAT_KMEM_TEST8_ID);
splat_test_fini(sub, SPLAT_KMEM_TEST7_ID);
splat_test_fini(sub, SPLAT_KMEM_TEST6_ID);
splat_test_fini(sub, SPLAT_KMEM_TEST5_ID);
splat_test_fini(sub, SPLAT_KMEM_TEST4_ID);
splat_test_fini(sub, SPLAT_KMEM_TEST3_ID);
splat_test_fini(sub, SPLAT_KMEM_TEST2_ID);
splat_test_fini(sub, SPLAT_KMEM_TEST1_ID);
kfree(sub);
}
int
splat_kmem_id(void) {
return SPLAT_SUBSYSTEM_KMEM;
}