Make use of kvmalloc if available and fix vmem_alloc implementation
This patch implements use of kvmalloc for GFP_KERNEL allocations, which may increase performance if the allocator is able to allocate physical memory, if kvmalloc is available as a public kernel interface (since v4.12). Otherwise it will simply fall back to virtual memory (vmalloc). Also fix vmem_alloc implementation which can lead to slow allocations since the first attempt with kmalloc does not make use of the noretry flag but tells the linux kernel to retry several times before it fails. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Matt Ahrens <matt@delphix.com> Signed-off-by: Sebastian Gottschall <s.gottschall@dd-wrt.com> Signed-off-by: Michael Niewöhner <foss@mniewoehner.de> Closes #9034
This commit is contained in:
parent
c025008df5
commit
66955885e2
|
@ -56,3 +56,27 @@ AC_DEFUN([SPL_AC_DEBUG_KMEM_TRACKING], [
|
||||||
AC_MSG_CHECKING([whether detailed kmem tracking is enabled])
|
AC_MSG_CHECKING([whether detailed kmem tracking is enabled])
|
||||||
AC_MSG_RESULT([$enable_debug_kmem_tracking])
|
AC_MSG_RESULT([$enable_debug_kmem_tracking])
|
||||||
])
|
])
|
||||||
|
|
||||||
|
dnl #
|
||||||
|
dnl # 4.12 API,
|
||||||
|
dnl # Added kvmalloc allocation strategy
|
||||||
|
dnl #
|
||||||
|
AC_DEFUN([ZFS_AC_KERNEL_SRC_KVMALLOC], [
|
||||||
|
ZFS_LINUX_TEST_SRC([kvmalloc], [
|
||||||
|
#include <linux/mm.h>
|
||||||
|
],[
|
||||||
|
void *p __attribute__ ((unused));
|
||||||
|
|
||||||
|
p = kvmalloc(0, GFP_KERNEL);
|
||||||
|
])
|
||||||
|
])
|
||||||
|
|
||||||
|
AC_DEFUN([ZFS_AC_KERNEL_KVMALLOC], [
|
||||||
|
AC_MSG_CHECKING([whether kvmalloc(ptr, flags) is available])
|
||||||
|
ZFS_LINUX_TEST_RESULT([kvmalloc], [
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
AC_DEFINE(HAVE_KVMALLOC, 1, [kvmalloc exists])
|
||||||
|
],[
|
||||||
|
AC_MSG_RESULT(no)
|
||||||
|
])
|
||||||
|
])
|
||||||
|
|
|
@ -44,6 +44,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
|
||||||
ZFS_AC_KERNEL_SRC_SCHED
|
ZFS_AC_KERNEL_SRC_SCHED
|
||||||
ZFS_AC_KERNEL_SRC_USLEEP_RANGE
|
ZFS_AC_KERNEL_SRC_USLEEP_RANGE
|
||||||
ZFS_AC_KERNEL_SRC_KMEM_CACHE
|
ZFS_AC_KERNEL_SRC_KMEM_CACHE
|
||||||
|
ZFS_AC_KERNEL_SRC_KVMALLOC
|
||||||
ZFS_AC_KERNEL_SRC_WAIT
|
ZFS_AC_KERNEL_SRC_WAIT
|
||||||
ZFS_AC_KERNEL_SRC_INODE_TIMES
|
ZFS_AC_KERNEL_SRC_INODE_TIMES
|
||||||
ZFS_AC_KERNEL_SRC_INODE_LOCK
|
ZFS_AC_KERNEL_SRC_INODE_LOCK
|
||||||
|
@ -137,6 +138,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
|
||||||
ZFS_AC_KERNEL_SCHED
|
ZFS_AC_KERNEL_SCHED
|
||||||
ZFS_AC_KERNEL_USLEEP_RANGE
|
ZFS_AC_KERNEL_USLEEP_RANGE
|
||||||
ZFS_AC_KERNEL_KMEM_CACHE
|
ZFS_AC_KERNEL_KMEM_CACHE
|
||||||
|
ZFS_AC_KERNEL_KVMALLOC
|
||||||
ZFS_AC_KERNEL_WAIT
|
ZFS_AC_KERNEL_WAIT
|
||||||
ZFS_AC_KERNEL_INODE_TIMES
|
ZFS_AC_KERNEL_INODE_TIMES
|
||||||
ZFS_AC_KERNEL_INODE_LOCK
|
ZFS_AC_KERNEL_INODE_LOCK
|
||||||
|
|
|
@ -28,6 +28,8 @@
|
||||||
#include <sys/debug.h>
|
#include <sys/debug.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
|
#include <linux/mm.h>
|
||||||
|
#include <linux/vmalloc.h>
|
||||||
|
|
||||||
extern int kmem_debugging(void);
|
extern int kmem_debugging(void);
|
||||||
extern char *kmem_vasprintf(const char *fmt, va_list ap);
|
extern char *kmem_vasprintf(const char *fmt, va_list ap);
|
||||||
|
@ -47,6 +49,7 @@ extern void kmem_strfree(char *str);
|
||||||
#define KM_PUBLIC_MASK (KM_SLEEP | KM_NOSLEEP | KM_PUSHPAGE)
|
#define KM_PUBLIC_MASK (KM_SLEEP | KM_NOSLEEP | KM_PUSHPAGE)
|
||||||
|
|
||||||
static int spl_fstrans_check(void);
|
static int spl_fstrans_check(void);
|
||||||
|
void *spl_kvmalloc(size_t size, gfp_t flags);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Convert a KM_* flags mask to its Linux GFP_* counterpart. The conversion
|
* Convert a KM_* flags mask to its Linux GFP_* counterpart. The conversion
|
||||||
|
|
|
@ -203,7 +203,23 @@ kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
|
||||||
ASSERT(ISP2(size));
|
ASSERT(ISP2(size));
|
||||||
ptr = (void *)__get_free_pages(lflags, get_order(size));
|
ptr = (void *)__get_free_pages(lflags, get_order(size));
|
||||||
} else {
|
} else {
|
||||||
ptr = __vmalloc(size, lflags | __GFP_HIGHMEM, PAGE_KERNEL);
|
/*
|
||||||
|
* GFP_KERNEL allocations can safely use kvmalloc which may
|
||||||
|
* improve performance by avoiding a) high latency caused by
|
||||||
|
* vmalloc's on-access allocation, b) performance loss due to
|
||||||
|
* MMU memory address mapping and c) vmalloc locking overhead.
|
||||||
|
* This has the side-effect that the slab statistics will
|
||||||
|
* incorrectly report this as a vmem allocation, but that is
|
||||||
|
* purely cosmetic.
|
||||||
|
*
|
||||||
|
* For non-GFP_KERNEL allocations we stick to __vmalloc.
|
||||||
|
*/
|
||||||
|
if ((lflags & GFP_KERNEL) == GFP_KERNEL) {
|
||||||
|
ptr = spl_kvmalloc(size, lflags);
|
||||||
|
} else {
|
||||||
|
ptr = __vmalloc(size, lflags | __GFP_HIGHMEM,
|
||||||
|
PAGE_KERNEL);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Resulting allocated memory will be page aligned */
|
/* Resulting allocated memory will be page aligned */
|
||||||
|
@ -231,7 +247,7 @@ kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
|
||||||
ASSERT(ISP2(size));
|
ASSERT(ISP2(size));
|
||||||
free_pages((unsigned long)ptr, get_order(size));
|
free_pages((unsigned long)ptr, get_order(size));
|
||||||
} else {
|
} else {
|
||||||
vfree(ptr);
|
spl_kmem_free_impl(ptr, size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -133,6 +133,73 @@ kmem_strfree(char *str)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(kmem_strfree);
|
EXPORT_SYMBOL(kmem_strfree);
|
||||||
|
|
||||||
|
/* Kernel compatibility for <4.13 */
|
||||||
|
#ifndef __GFP_RETRY_MAYFAIL
|
||||||
|
#define __GFP_RETRY_MAYFAIL __GFP_REPEAT
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void *
|
||||||
|
spl_kvmalloc(size_t size, gfp_t lflags)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_KVMALLOC
|
||||||
|
/*
|
||||||
|
* GFP_KERNEL allocations can safely use kvmalloc which may
|
||||||
|
* improve performance by avoiding a) high latency caused by
|
||||||
|
* vmalloc's on-access allocation, b) performance loss due to
|
||||||
|
* MMU memory address mapping and c) vmalloc locking overhead.
|
||||||
|
* This has the side-effect that the slab statistics will
|
||||||
|
* incorrectly report this as a vmem allocation, but that is
|
||||||
|
* purely cosmetic.
|
||||||
|
*/
|
||||||
|
if ((lflags & GFP_KERNEL) == GFP_KERNEL)
|
||||||
|
return (kvmalloc(size, lflags));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
gfp_t kmalloc_lflags = lflags;
|
||||||
|
|
||||||
|
if (size > PAGE_SIZE) {
|
||||||
|
/*
|
||||||
|
* We need to set __GFP_NOWARN here since spl_kvmalloc is not
|
||||||
|
* only called by spl_kmem_alloc_impl but can be called
|
||||||
|
* directly with custom lflags, too. In that case
|
||||||
|
* kmem_flags_convert does not get called, which would
|
||||||
|
* implicitly set __GFP_NOWARN.
|
||||||
|
*/
|
||||||
|
kmalloc_lflags |= __GFP_NOWARN;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* N.B. __GFP_RETRY_MAYFAIL is supported only for large
|
||||||
|
* e (>32kB) allocations.
|
||||||
|
*
|
||||||
|
* We have to override __GFP_RETRY_MAYFAIL by __GFP_NORETRY
|
||||||
|
* for !costly requests because there is no other way to tell
|
||||||
|
* the allocator that we want to fail rather than retry
|
||||||
|
* endlessly.
|
||||||
|
*/
|
||||||
|
if (!(kmalloc_lflags & __GFP_RETRY_MAYFAIL) ||
|
||||||
|
(size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
|
||||||
|
kmalloc_lflags |= __GFP_NORETRY;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We first try kmalloc - even for big sizes - and fall back to
|
||||||
|
* __vmalloc if that fails.
|
||||||
|
*
|
||||||
|
* For non-GFP_KERNEL allocations we always stick to kmalloc_node,
|
||||||
|
* and fail when kmalloc is not successful (returns NULL).
|
||||||
|
* We cannot fall back to __vmalloc in this case because __vmalloc
|
||||||
|
* internally uses GPF_KERNEL allocations.
|
||||||
|
*/
|
||||||
|
void *ptr = kmalloc_node(size, kmalloc_lflags, NUMA_NO_NODE);
|
||||||
|
if (ptr || size <= PAGE_SIZE ||
|
||||||
|
(lflags & GFP_KERNEL) != GFP_KERNEL) {
|
||||||
|
return (ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (__vmalloc(size, lflags | __GFP_HIGHMEM, PAGE_KERNEL));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* General purpose unified implementation of kmem_alloc(). It is an
|
* General purpose unified implementation of kmem_alloc(). It is an
|
||||||
* amalgamation of Linux and Illumos allocator design. It should never be
|
* amalgamation of Linux and Illumos allocator design. It should never be
|
||||||
|
@ -144,7 +211,6 @@ inline void *
|
||||||
spl_kmem_alloc_impl(size_t size, int flags, int node)
|
spl_kmem_alloc_impl(size_t size, int flags, int node)
|
||||||
{
|
{
|
||||||
gfp_t lflags = kmem_flags_convert(flags);
|
gfp_t lflags = kmem_flags_convert(flags);
|
||||||
int use_vmem = 0;
|
|
||||||
void *ptr;
|
void *ptr;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -178,28 +244,30 @@ spl_kmem_alloc_impl(size_t size, int flags, int node)
|
||||||
* impact performance so frequently manipulating the virtual
|
* impact performance so frequently manipulating the virtual
|
||||||
* address space is strongly discouraged.
|
* address space is strongly discouraged.
|
||||||
*/
|
*/
|
||||||
if ((size > spl_kmem_alloc_max) || use_vmem) {
|
if (size > spl_kmem_alloc_max) {
|
||||||
if (flags & KM_VMEM) {
|
if (flags & KM_VMEM) {
|
||||||
ptr = __vmalloc(size, lflags | __GFP_HIGHMEM,
|
ptr = __vmalloc(size, lflags | __GFP_HIGHMEM,
|
||||||
PAGE_KERNEL);
|
PAGE_KERNEL);
|
||||||
} else {
|
} else {
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
if (flags & KM_VMEM) {
|
||||||
|
ptr = spl_kvmalloc(size, lflags);
|
||||||
} else {
|
} else {
|
||||||
ptr = kmalloc_node(size, lflags, node);
|
ptr = kmalloc_node(size, lflags, node);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (likely(ptr) || (flags & KM_NOSLEEP))
|
if (likely(ptr) || (flags & KM_NOSLEEP))
|
||||||
return (ptr);
|
return (ptr);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For vmem_alloc() and vmem_zalloc() callers retry immediately
|
* Try hard to satisfy the allocation. However, when progress
|
||||||
* using __vmalloc() which is unlikely to fail.
|
* cannot be made, the allocation is allowed to fail.
|
||||||
*/
|
*/
|
||||||
if ((flags & KM_VMEM) && (use_vmem == 0)) {
|
if ((lflags & GFP_KERNEL) == GFP_KERNEL)
|
||||||
use_vmem = 1;
|
lflags |= __GFP_RETRY_MAYFAIL;
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use cond_resched() instead of congestion_wait() to avoid
|
* Use cond_resched() instead of congestion_wait() to avoid
|
||||||
|
|
Loading…
Reference in New Issue