diff --git a/include/os/linux/spl/sys/kmem_cache.h b/include/os/linux/spl/sys/kmem_cache.h index ce18eb4743..30d2dd5edf 100644 --- a/include/os/linux/spl/sys/kmem_cache.h +++ b/include/os/linux/spl/sys/kmem_cache.h @@ -124,7 +124,6 @@ extern struct rw_semaphore spl_kmem_cache_sem; typedef int (*spl_kmem_ctor_t)(void *, void *, int); typedef void (*spl_kmem_dtor_t)(void *, void *); -typedef void (*spl_kmem_reclaim_t)(void *); typedef struct spl_kmem_magazine { uint32_t skm_magic; /* Sanity magic */ @@ -174,7 +173,6 @@ typedef struct spl_kmem_cache { uint32_t skc_mag_refill; /* Magazine refill count */ spl_kmem_ctor_t skc_ctor; /* Constructor */ spl_kmem_dtor_t skc_dtor; /* Destructor */ - spl_kmem_reclaim_t skc_reclaim; /* Reclaimator */ void *skc_private; /* Private data */ void *skc_vmp; /* Unused */ struct kmem_cache *skc_linux_cache; /* Linux slab cache if used */ @@ -210,7 +208,7 @@ typedef struct spl_kmem_cache { extern spl_kmem_cache_t *spl_kmem_cache_create(char *name, size_t size, size_t align, spl_kmem_ctor_t ctor, spl_kmem_dtor_t dtor, - spl_kmem_reclaim_t reclaim, void *priv, void *vmp, int flags); + void *reclaim, void *priv, void *vmp, int flags); extern void spl_kmem_cache_set_move(spl_kmem_cache_t *, kmem_cbrc_t (*)(void *, void *, size_t, void *)); extern void spl_kmem_cache_destroy(spl_kmem_cache_t *skc); diff --git a/module/os/linux/spl/spl-kmem-cache.c b/module/os/linux/spl/spl-kmem-cache.c index 0347274101..76b89b2546 100644 --- a/module/os/linux/spl/spl-kmem-cache.c +++ b/module/os/linux/spl/spl-kmem-cache.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -883,7 +882,7 @@ spl_magazine_destroy(spl_kmem_cache_t *skc) */ spl_kmem_cache_t * spl_kmem_cache_create(char *name, size_t size, size_t align, - spl_kmem_ctor_t ctor, spl_kmem_dtor_t dtor, spl_kmem_reclaim_t reclaim, + spl_kmem_ctor_t ctor, spl_kmem_dtor_t dtor, void *reclaim, void *priv, void *vmp, int flags) { gfp_t lflags = kmem_flags_convert(KM_SLEEP); @@ -897,6 +896,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, ASSERT0(flags & KMC_NOHASH); ASSERT0(flags & KMC_QCACHE); ASSERT(vmp == NULL); + ASSERT(reclaim == NULL); might_sleep(); @@ -915,7 +915,6 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, skc->skc_ctor = ctor; skc->skc_dtor = dtor; - skc->skc_reclaim = reclaim; skc->skc_private = priv; skc->skc_vmp = vmp; skc->skc_linux_cache = NULL; @@ -1606,78 +1605,11 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj) EXPORT_SYMBOL(spl_kmem_cache_free); /* - * The generic shrinker function for all caches. Under Linux a shrinker - * may not be tightly coupled with a slab cache. In fact Linux always - * systematically tries calling all registered shrinker callbacks which - * report that they contain unused objects. Because of this we only - * register one shrinker function in the shim layer for all slab caches. - * We always attempt to shrink all caches when this generic shrinker - * is called. - * - * The _count() function returns the number of free-able objects. - * The _scan() function returns the number of objects that were freed. - */ -static unsigned long -spl_kmem_cache_shrinker_count(struct shrinker *shrink, - struct shrink_control *sc) -{ - spl_kmem_cache_t *skc = NULL; - int alloc = 0; - - down_read(&spl_kmem_cache_sem); - list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) { - alloc += skc->skc_obj_alloc; - } - up_read(&spl_kmem_cache_sem); - - return (MAX(alloc, 0)); -} - -static unsigned long -spl_kmem_cache_shrinker_scan(struct shrinker *shrink, - struct shrink_control *sc) -{ - spl_kmem_cache_t *skc = NULL; - int alloc = 0; - - /* - * No shrinking in a transaction context. Can cause deadlocks. - */ - if (spl_fstrans_check()) - return (SHRINK_STOP); - - down_read(&spl_kmem_cache_sem); - list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) { - uint64_t oldalloc = skc->skc_obj_alloc; - spl_kmem_cache_reap_now(skc); - if (oldalloc > skc->skc_obj_alloc) - alloc += oldalloc - skc->skc_obj_alloc; - } - up_read(&spl_kmem_cache_sem); - - /* - * When KMC_RECLAIM_ONCE is set allow only a single reclaim pass. - * This functionality only exists to work around a rare issue where - * shrink_slabs() is repeatedly invoked by many cores causing the - * system to thrash. - */ - if (spl_kmem_cache_reclaim & KMC_RECLAIM_ONCE) - return (SHRINK_STOP); - - return (MAX(alloc, 0)); -} - -SPL_SHRINKER_DECLARE(spl_kmem_cache_shrinker, - spl_kmem_cache_shrinker_count, spl_kmem_cache_shrinker_scan, - KMC_DEFAULT_SEEKS); - -/* - * Call the registered reclaim function for a cache. Depending on how - * many and which objects are released it may simply repopulate the - * local magazine which will then need to age-out. Objects which cannot - * fit in the magazine we will be released back to their slabs which will - * also need to age out before being release. This is all just best - * effort and we do not want to thrash creating and destroying slabs. + * Depending on how many and which objects are released it may simply + * repopulate the local magazine which will then need to age-out. Objects + * which cannot fit in the magazine will be released back to their slabs + * which will also need to age out before being released. This is all just + * best effort and we do not want to thrash creating and destroying slabs. */ void spl_kmem_cache_reap_now(spl_kmem_cache_t *skc) @@ -1685,16 +1617,10 @@ spl_kmem_cache_reap_now(spl_kmem_cache_t *skc) ASSERT(skc->skc_magic == SKC_MAGIC); ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags)); - atomic_inc(&skc->skc_ref); + if (skc->skc_flags & KMC_SLAB) + return; - /* - * Execute the registered reclaim callback if it exists. - */ - if (skc->skc_flags & KMC_SLAB) { - if (skc->skc_reclaim) - skc->skc_reclaim(skc->skc_private); - goto out; - } + atomic_inc(&skc->skc_ref); /* * Prevent concurrent cache reaping when contended. @@ -1702,39 +1628,6 @@ spl_kmem_cache_reap_now(spl_kmem_cache_t *skc) if (test_and_set_bit(KMC_BIT_REAPING, &skc->skc_flags)) goto out; - /* - * When a reclaim function is available it may be invoked repeatedly - * until at least a single slab can be freed. This ensures that we - * do free memory back to the system. This helps minimize the chance - * of an OOM event when the bulk of memory is used by the slab. - * - * When free slabs are already available the reclaim callback will be - * skipped. Additionally, if no forward progress is detected despite - * a reclaim function the cache will be skipped to avoid deadlock. - * - * Longer term this would be the correct place to add the code which - * repacks the slabs in order minimize fragmentation. - */ - if (skc->skc_reclaim) { - uint64_t objects = UINT64_MAX; - int do_reclaim; - - do { - spin_lock(&skc->skc_lock); - do_reclaim = - (skc->skc_slab_total > 0) && - ((skc->skc_slab_total-skc->skc_slab_alloc) == 0) && - (skc->skc_obj_alloc < objects); - - objects = skc->skc_obj_alloc; - spin_unlock(&skc->skc_lock); - - if (do_reclaim) - skc->skc_reclaim(skc->skc_private); - - } while (do_reclaim); - } - /* Reclaim from the magazine and free all now empty slabs. */ if (spl_kmem_cache_expire & KMC_EXPIRE_MEM) { spl_kmem_magazine_t *skm; @@ -1773,12 +1666,13 @@ EXPORT_SYMBOL(spl_kmem_cache_reap_active); void spl_kmem_reap(void) { - struct shrink_control sc; + spl_kmem_cache_t *skc = NULL; - sc.nr_to_scan = KMC_REAP_CHUNK; - sc.gfp_mask = GFP_KERNEL; - - (void) spl_kmem_cache_shrinker_scan(NULL, &sc); + down_read(&spl_kmem_cache_sem); + list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) { + spl_kmem_cache_reap_now(skc); + } + up_read(&spl_kmem_cache_sem); } EXPORT_SYMBOL(spl_kmem_reap); @@ -1791,7 +1685,6 @@ spl_kmem_cache_init(void) spl_kmem_cache_kmem_threads, maxclsyspri, spl_kmem_cache_kmem_threads * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC); - spl_register_shrinker(&spl_kmem_cache_shrinker); return (0); } @@ -1799,6 +1692,5 @@ spl_kmem_cache_init(void) void spl_kmem_cache_fini(void) { - spl_unregister_shrinker(&spl_kmem_cache_shrinker); taskq_destroy(spl_kmem_cache_taskq); } diff --git a/module/zfs/arc.c b/module/zfs/arc.c index ea22686ccd..5b9df43d23 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -380,11 +380,6 @@ static int arc_min_prescient_prefetch_ms; */ int arc_lotsfree_percent = 10; -/* - * hdr_recl() uses this to determine if the arc is up and running. - */ -static boolean_t arc_initialized; - /* * The arc has filled available memory and has now warmed up. */ @@ -1198,22 +1193,6 @@ buf_dest(void *vbuf, void *unused) arc_space_return(sizeof (arc_buf_t), ARC_SPACE_HDRS); } -/* - * Reclaim callback -- invoked when memory is low. - */ -/* ARGSUSED */ -static void -hdr_recl(void *unused) -{ - dprintf("hdr_recl called\n"); - /* - * umem calls the reclaim func when we destroy the buf cache, - * which is after we do arc_fini(). - */ - if (arc_initialized) - zthr_wakeup(arc_reap_zthr); -} - static void buf_init(void) { @@ -1249,12 +1228,12 @@ retry: } hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE, - 0, hdr_full_cons, hdr_full_dest, hdr_recl, NULL, NULL, 0); + 0, hdr_full_cons, hdr_full_dest, NULL, NULL, NULL, 0); hdr_full_crypt_cache = kmem_cache_create("arc_buf_hdr_t_full_crypt", HDR_FULL_CRYPT_SIZE, 0, hdr_full_crypt_cons, hdr_full_crypt_dest, - hdr_recl, NULL, NULL, 0); + NULL, NULL, NULL, 0); hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only", - HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, hdr_recl, + HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, NULL, NULL, NULL, 0); buf_cache = kmem_cache_create("arc_buf_t", sizeof (arc_buf_t), 0, buf_cons, buf_dest, NULL, NULL, NULL, 0); @@ -4688,9 +4667,6 @@ arc_kmem_reap_soon(void) static boolean_t arc_adjust_cb_check(void *arg, zthr_t *zthr) { - if (!arc_initialized) - return (B_FALSE); - /* * This is necessary so that any changes which may have been made to * many of the zfs_arc_* module parameters will be propagated to @@ -4778,9 +4754,6 @@ arc_adjust_cb(void *arg, zthr_t *zthr) static boolean_t arc_reap_cb_check(void *arg, zthr_t *zthr) { - if (!arc_initialized) - return (B_FALSE); - int64_t free_memory = arc_available_memory(); /* @@ -7348,12 +7321,6 @@ arc_init(void) arc_state_init(); - /* - * The arc must be "uninitialized", so that hdr_recl() (which is - * registered by buf_init()) will not access arc_reap_zthr before - * it is created. - */ - ASSERT(!arc_initialized); buf_init(); list_create(&arc_prune_list, sizeof (arc_prune_t), @@ -7377,7 +7344,6 @@ arc_init(void) arc_reap_zthr = zthr_create_timer(arc_reap_cb_check, arc_reap_cb, NULL, SEC2NSEC(1)); - arc_initialized = B_TRUE; arc_warm = B_FALSE; /* @@ -7412,8 +7378,6 @@ arc_fini(void) /* Use B_TRUE to ensure *all* buffers are evicted */ arc_flush(NULL, B_TRUE); - arc_initialized = B_FALSE; - if (arc_ksp != NULL) { kstat_delete(arc_ksp); arc_ksp = NULL;