From d4b5517ef95948d22a26376768442a59116fa4e7 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 30 Jul 2024 14:40:47 -0400 Subject: [PATCH] Linux: Report reclaimable memory to kernel as such (#16385) Linux provides SLAB_RECLAIM_ACCOUNT and __GFP_RECLAIMABLE flags to mark memory allocations that can be freed via shinker calls. It should allow kernel to tune and group such allocations for lower memory fragmentation and better reclamation under pressure. This patch marks as reclaimable most of ARC memory, directly evictable via ZFS shrinker, plus also dnode/znode/sa memory, indirectly evictable via kernel's superblock shrinker. Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Reviewed-by: Tony Hutter Reviewed-by: Allan Jude --- include/os/freebsd/spl/sys/kmem.h | 1 + include/os/linux/spl/sys/kmem_cache.h | 5 ++--- include/sys/zfs_context.h | 1 + module/os/freebsd/zfs/abd_os.c | 2 +- module/os/freebsd/zfs/zfs_znode.c | 2 +- module/os/linux/spl/spl-kmem-cache.c | 8 ++++++++ module/os/linux/spl/spl-zlib.c | 2 +- module/os/linux/zfs/abd_os.c | 6 +++--- module/os/linux/zfs/zfs_znode.c | 3 ++- module/zfs/arc.c | 2 +- module/zfs/dnode.c | 2 +- module/zfs/lz4_zfs.c | 3 ++- module/zfs/sa.c | 2 +- module/zfs/zio.c | 4 ++++ 14 files changed, 29 insertions(+), 14 deletions(-) diff --git a/include/os/freebsd/spl/sys/kmem.h b/include/os/freebsd/spl/sys/kmem.h index c633799318..ae786f0e20 100644 --- a/include/os/freebsd/spl/sys/kmem.h +++ b/include/os/freebsd/spl/sys/kmem.h @@ -49,6 +49,7 @@ MALLOC_DECLARE(M_SOLARIS); #define KM_NOSLEEP M_NOWAIT #define KM_NORMALPRI 0 #define KMC_NODEBUG UMA_ZONE_NODUMP +#define KMC_RECLAIMABLE 0x0 typedef struct vmem vmem_t; diff --git a/include/os/linux/spl/sys/kmem_cache.h b/include/os/linux/spl/sys/kmem_cache.h index 905ff57a14..2b4f120e64 100644 --- a/include/os/linux/spl/sys/kmem_cache.h +++ b/include/os/linux/spl/sys/kmem_cache.h @@ -45,6 +45,7 @@ typedef enum kmc_bit { KMC_BIT_TOTAL = 18, /* Proc handler helper bit */ KMC_BIT_ALLOC = 19, /* Proc handler helper bit */ KMC_BIT_MAX = 20, /* Proc handler helper bit */ + KMC_BIT_RECLAIMABLE = 21, /* Can be freed by shrinker */ } kmc_bit_t; /* kmem move callback return values */ @@ -66,9 +67,7 @@ typedef enum kmem_cbrc { #define KMC_TOTAL (1 << KMC_BIT_TOTAL) #define KMC_ALLOC (1 << KMC_BIT_ALLOC) #define KMC_MAX (1 << KMC_BIT_MAX) - -#define KMC_REAP_CHUNK INT_MAX -#define KMC_DEFAULT_SEEKS 1 +#define KMC_RECLAIMABLE (1 << KMC_BIT_RECLAIMABLE) extern struct list_head spl_kmem_cache_list; extern struct rw_semaphore spl_kmem_cache_sem; diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index e4711ce419..998eaa5dd8 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -413,6 +413,7 @@ void procfs_list_add(procfs_list_t *procfs_list, void *p); #define KM_NORMALPRI 0 /* not needed with UMEM_DEFAULT */ #define KMC_NODEBUG UMC_NODEBUG #define KMC_KVMEM 0x0 +#define KMC_RECLAIMABLE 0x0 #define kmem_alloc(_s, _f) umem_alloc(_s, _f) #define kmem_zalloc(_s, _f) umem_zalloc(_s, _f) #define kmem_free(_b, _s) umem_free(_b, _s) diff --git a/module/os/freebsd/zfs/abd_os.c b/module/os/freebsd/zfs/abd_os.c index 3b812271f9..fb5c46ecf7 100644 --- a/module/os/freebsd/zfs/abd_os.c +++ b/module/os/freebsd/zfs/abd_os.c @@ -300,7 +300,7 @@ void abd_init(void) { abd_chunk_cache = kmem_cache_create("abd_chunk", PAGE_SIZE, 0, - NULL, NULL, NULL, NULL, 0, KMC_NODEBUG); + NULL, NULL, NULL, NULL, 0, KMC_NODEBUG | KMC_RECLAIMABLE); wmsum_init(&abd_sums.abdstat_struct_size, 0); wmsum_init(&abd_sums.abdstat_scatter_cnt, 0); diff --git a/module/os/freebsd/zfs/zfs_znode.c b/module/os/freebsd/zfs/zfs_znode.c index 0eea2a8494..afbea9798c 100644 --- a/module/os/freebsd/zfs/zfs_znode.c +++ b/module/os/freebsd/zfs/zfs_znode.c @@ -236,7 +236,7 @@ zfs_znode_init(void) ASSERT3P(znode_cache, ==, NULL); znode_cache = kmem_cache_create("zfs_znode_cache", sizeof (znode_t), 0, zfs_znode_cache_constructor, - zfs_znode_cache_destructor, NULL, NULL, NULL, 0); + zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_RECLAIMABLE); } static znode_t * diff --git a/module/os/linux/spl/spl-kmem-cache.c b/module/os/linux/spl/spl-kmem-cache.c index 737c2e063f..16412bc9e6 100644 --- a/module/os/linux/spl/spl-kmem-cache.c +++ b/module/os/linux/spl/spl-kmem-cache.c @@ -144,6 +144,8 @@ kv_alloc(spl_kmem_cache_t *skc, int size, int flags) gfp_t lflags = kmem_flags_convert(flags); void *ptr; + if (skc->skc_flags & KMC_RECLAIMABLE) + lflags |= __GFP_RECLAIMABLE; ptr = spl_vmalloc(size, lflags | __GFP_HIGHMEM); /* Resulting allocated memory will be page aligned */ @@ -424,6 +426,8 @@ spl_emergency_alloc(spl_kmem_cache_t *skc, int flags, void **obj) if (!empty) return (-EEXIST); + if (skc->skc_flags & KMC_RECLAIMABLE) + lflags |= __GFP_RECLAIMABLE; ske = kmalloc(sizeof (*ske), lflags); if (ske == NULL) return (-ENOMEM); @@ -663,6 +667,7 @@ spl_magazine_destroy(spl_kmem_cache_t *skc) * KMC_KVMEM Force kvmem backed SPL cache * KMC_SLAB Force Linux slab backed cache * KMC_NODEBUG Disable debugging (unsupported) + * KMC_RECLAIMABLE Memory can be freed under pressure */ spl_kmem_cache_t * spl_kmem_cache_create(const char *name, size_t size, size_t align, @@ -780,6 +785,9 @@ spl_kmem_cache_create(const char *name, size_t size, size_t align, if (size > spl_kmem_cache_slab_limit) goto out; + if (skc->skc_flags & KMC_RECLAIMABLE) + slabflags |= SLAB_RECLAIM_ACCOUNT; + #if defined(SLAB_USERCOPY) /* * Required for PAX-enabled kernels if the slab is to be diff --git a/module/os/linux/spl/spl-zlib.c b/module/os/linux/spl/spl-zlib.c index 8c6282ee5d..a7b6c14ee1 100644 --- a/module/os/linux/spl/spl-zlib.c +++ b/module/os/linux/spl/spl-zlib.c @@ -202,7 +202,7 @@ spl_zlib_init(void) zlib_workspace_cache = kmem_cache_create( "spl_zlib_workspace_cache", size, 0, NULL, NULL, NULL, NULL, NULL, - KMC_KVMEM); + KMC_KVMEM | KMC_RECLAIMABLE); if (!zlib_workspace_cache) return (-ENOMEM); diff --git a/module/os/linux/zfs/abd_os.c b/module/os/linux/zfs/abd_os.c index 4bf9eaf771..f7af20c619 100644 --- a/module/os/linux/zfs/abd_os.c +++ b/module/os/linux/zfs/abd_os.c @@ -281,7 +281,7 @@ abd_alloc_chunks(abd_t *abd, size_t size) struct sg_table table; struct scatterlist *sg; struct page *page, *tmp_page = NULL; - gfp_t gfp = __GFP_NOWARN | GFP_NOIO; + gfp_t gfp = __GFP_RECLAIMABLE | __GFP_NOWARN | GFP_NOIO; gfp_t gfp_comp = (gfp | __GFP_NORETRY | __GFP_COMP) & ~__GFP_RECLAIM; unsigned int max_order = MIN(zfs_abd_scatter_max_order, ABD_MAX_ORDER - 1); @@ -403,7 +403,7 @@ abd_alloc_chunks(abd_t *abd, size_t size) struct scatterlist *sg = NULL; struct sg_table table; struct page *page; - gfp_t gfp = __GFP_NOWARN | GFP_NOIO; + gfp_t gfp = __GFP_RECLAIMABLE | __GFP_NOWARN | GFP_NOIO; int nr_pages = abd_chunkcnt_for_bytes(size); int i = 0; @@ -762,7 +762,7 @@ abd_init(void) int i; abd_cache = kmem_cache_create("abd_t", sizeof (abd_t), - 0, NULL, NULL, NULL, NULL, NULL, 0); + 0, NULL, NULL, NULL, NULL, NULL, KMC_RECLAIMABLE); wmsum_init(&abd_sums.abdstat_struct_size, 0); wmsum_init(&abd_sums.abdstat_linear_cnt, 0); diff --git a/module/os/linux/zfs/zfs_znode.c b/module/os/linux/zfs/zfs_znode.c index b99df188c6..265153e011 100644 --- a/module/os/linux/zfs/zfs_znode.c +++ b/module/os/linux/zfs/zfs_znode.c @@ -194,7 +194,8 @@ zfs_znode_init(void) ASSERT(znode_cache == NULL); znode_cache = kmem_cache_create("zfs_znode_cache", sizeof (znode_t), 0, zfs_znode_cache_constructor, - zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_SLAB); + zfs_znode_cache_destructor, NULL, NULL, NULL, + KMC_SLAB | KMC_RECLAIMABLE); ASSERT(znode_hold_cache == NULL); znode_hold_cache = kmem_cache_create("zfs_znode_hold_cache", diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 6605243bac..d01bf0947d 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -1258,7 +1258,7 @@ retry: } hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE, - 0, hdr_full_cons, hdr_full_dest, NULL, NULL, NULL, 0); + 0, hdr_full_cons, hdr_full_dest, NULL, NULL, NULL, KMC_RECLAIMABLE); hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only", HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, NULL, NULL, NULL, 0); diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index b6cc512cbb..ecc6761f8f 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -306,7 +306,7 @@ dnode_init(void) { ASSERT(dnode_cache == NULL); dnode_cache = kmem_cache_create("dnode_t", sizeof (dnode_t), - 0, dnode_cons, dnode_dest, NULL, NULL, NULL, 0); + 0, dnode_cons, dnode_dest, NULL, NULL, NULL, KMC_RECLAIMABLE); kmem_cache_set_move(dnode_cache, dnode_move); wmsum_init(&dnode_sums.dnode_hold_dbuf_hold, 0); diff --git a/module/zfs/lz4_zfs.c b/module/zfs/lz4_zfs.c index e28215cf35..a3b9e70703 100644 --- a/module/zfs/lz4_zfs.c +++ b/module/zfs/lz4_zfs.c @@ -867,7 +867,8 @@ void lz4_init(void) { lz4_cache = kmem_cache_create("lz4_cache", - sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0); + sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, + KMC_RECLAIMABLE); } void diff --git a/module/zfs/sa.c b/module/zfs/sa.c index 32ac287eb7..a1b2ffa870 100644 --- a/module/zfs/sa.c +++ b/module/zfs/sa.c @@ -236,7 +236,7 @@ sa_cache_init(void) { sa_cache = kmem_cache_create("sa_cache", sizeof (sa_handle_t), 0, sa_cache_constructor, - sa_cache_destructor, NULL, NULL, NULL, 0); + sa_cache_destructor, NULL, NULL, NULL, KMC_RECLAIMABLE); } void diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 00c93e76ae..26ffc597f1 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -194,6 +194,10 @@ zio_init(void) cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ? KMC_NODEBUG : 0; data_cflags = KMC_NODEBUG; + if (abd_size_alloc_linear(size)) { + cflags |= KMC_RECLAIMABLE; + data_cflags |= KMC_RECLAIMABLE; + } if (cflags == data_cflags) { /* * Resulting kmem caches would be identical.