|
|
|
@ -20,10 +20,10 @@
|
|
|
|
|
*/
|
|
|
|
|
/*
|
|
|
|
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
|
|
|
|
|
* Copyright (c) 2018, Joyent, Inc.
|
|
|
|
|
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
|
|
|
|
|
* Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
|
|
|
|
|
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
|
|
|
|
|
* Copyright 2017 Nexenta Systems, Inc. All rights reserved.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
@ -299,7 +299,7 @@
|
|
|
|
|
#endif
|
|
|
|
|
#include <sys/callb.h>
|
|
|
|
|
#include <sys/kstat.h>
|
|
|
|
|
#include <sys/dmu_tx.h>
|
|
|
|
|
#include <sys/zthr.h>
|
|
|
|
|
#include <zfs_fletcher.h>
|
|
|
|
|
#include <sys/arc_impl.h>
|
|
|
|
|
#include <sys/trace_arc.h>
|
|
|
|
@ -311,10 +311,22 @@
|
|
|
|
|
boolean_t arc_watch = B_FALSE;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
static kmutex_t arc_reclaim_lock;
|
|
|
|
|
static kcondvar_t arc_reclaim_thread_cv;
|
|
|
|
|
static boolean_t arc_reclaim_thread_exit;
|
|
|
|
|
static kcondvar_t arc_reclaim_waiters_cv;
|
|
|
|
|
/*
|
|
|
|
|
* This thread's job is to keep enough free memory in the system, by
|
|
|
|
|
* calling arc_kmem_reap_soon() plus arc_reduce_target_size(), which improves
|
|
|
|
|
* arc_available_memory().
|
|
|
|
|
*/
|
|
|
|
|
static zthr_t *arc_reap_zthr;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This thread's job is to keep arc_size under arc_c, by calling
|
|
|
|
|
* arc_adjust(), which improves arc_is_overflowing().
|
|
|
|
|
*/
|
|
|
|
|
static zthr_t *arc_adjust_zthr;
|
|
|
|
|
|
|
|
|
|
static kmutex_t arc_adjust_lock;
|
|
|
|
|
static kcondvar_t arc_adjust_waiters_cv;
|
|
|
|
|
static boolean_t arc_adjust_needed = B_FALSE;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The number of headers to evict in arc_evict_state_impl() before
|
|
|
|
@ -328,11 +340,16 @@ int zfs_arc_evict_batch_limit = 10;
|
|
|
|
|
/* number of seconds before growing cache again */
|
|
|
|
|
static int arc_grow_retry = 5;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Minimum time between calls to arc_kmem_reap_soon().
|
|
|
|
|
*/
|
|
|
|
|
int arc_kmem_cache_reap_retry_ms = 1000;
|
|
|
|
|
|
|
|
|
|
/* shift of arc_c for calculating overflow limit in arc_get_data_impl */
|
|
|
|
|
int zfs_arc_overflow_shift = 8;
|
|
|
|
|
|
|
|
|
|
/* shift of arc_c for calculating both min and max arc_p */
|
|
|
|
|
static int arc_p_min_shift = 4;
|
|
|
|
|
int arc_p_min_shift = 4;
|
|
|
|
|
|
|
|
|
|
/* log2(fraction of arc to reclaim) */
|
|
|
|
|
static int arc_shrink_shift = 7;
|
|
|
|
@ -366,7 +383,10 @@ static int arc_min_prescient_prefetch_ms;
|
|
|
|
|
*/
|
|
|
|
|
int arc_lotsfree_percent = 10;
|
|
|
|
|
|
|
|
|
|
static int arc_dead;
|
|
|
|
|
/*
|
|
|
|
|
* hdr_recl() uses this to determine if the arc is up and running.
|
|
|
|
|
*/
|
|
|
|
|
static boolean_t arc_initialized;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The arc has filled available memory and has now warmed up.
|
|
|
|
@ -906,6 +926,7 @@ aggsum_t astat_bonus_size;
|
|
|
|
|
aggsum_t astat_hdr_size;
|
|
|
|
|
aggsum_t astat_l2_hdr_size;
|
|
|
|
|
|
|
|
|
|
static hrtime_t arc_growtime;
|
|
|
|
|
static list_t arc_prune_list;
|
|
|
|
|
static kmutex_t arc_prune_mtx;
|
|
|
|
|
static taskq_t *arc_prune_taskq;
|
|
|
|
@ -1380,8 +1401,8 @@ hdr_recl(void *unused)
|
|
|
|
|
* umem calls the reclaim func when we destroy the buf cache,
|
|
|
|
|
* which is after we do arc_fini().
|
|
|
|
|
*/
|
|
|
|
|
if (!arc_dead)
|
|
|
|
|
cv_signal(&arc_reclaim_thread_cv);
|
|
|
|
|
if (arc_initialized)
|
|
|
|
|
zthr_wakeup(arc_reap_zthr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
@ -4097,13 +4118,14 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
|
|
|
|
|
* function should proceed in this case).
|
|
|
|
|
*
|
|
|
|
|
* If threads are left sleeping, due to not
|
|
|
|
|
* using cv_broadcast, they will be woken up
|
|
|
|
|
* just before arc_reclaim_thread() sleeps.
|
|
|
|
|
* using cv_broadcast here, they will be woken
|
|
|
|
|
* up via cv_broadcast in arc_adjust_cb() just
|
|
|
|
|
* before arc_adjust_zthr sleeps.
|
|
|
|
|
*/
|
|
|
|
|
mutex_enter(&arc_reclaim_lock);
|
|
|
|
|
mutex_enter(&arc_adjust_lock);
|
|
|
|
|
if (!arc_is_overflowing())
|
|
|
|
|
cv_signal(&arc_reclaim_waiters_cv);
|
|
|
|
|
mutex_exit(&arc_reclaim_lock);
|
|
|
|
|
cv_signal(&arc_adjust_waiters_cv);
|
|
|
|
|
mutex_exit(&arc_adjust_lock);
|
|
|
|
|
} else {
|
|
|
|
|
ARCSTAT_BUMP(arcstat_mutex_miss);
|
|
|
|
|
}
|
|
|
|
@ -4763,8 +4785,8 @@ arc_flush(spa_t *spa, boolean_t retry)
|
|
|
|
|
(void) arc_flush_state(arc_mfu_ghost, guid, ARC_BUFC_METADATA, retry);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
arc_shrink(int64_t to_free)
|
|
|
|
|
static void
|
|
|
|
|
arc_reduce_target_size(int64_t to_free)
|
|
|
|
|
{
|
|
|
|
|
uint64_t asize = aggsum_value(&arc_size);
|
|
|
|
|
uint64_t c = arc_c;
|
|
|
|
@ -4782,10 +4804,14 @@ arc_shrink(int64_t to_free)
|
|
|
|
|
arc_c = arc_c_min;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (asize > arc_c)
|
|
|
|
|
(void) arc_adjust();
|
|
|
|
|
if (asize > arc_c) {
|
|
|
|
|
/* See comment in arc_adjust_cb_check() on why lock+flag */
|
|
|
|
|
mutex_enter(&arc_adjust_lock);
|
|
|
|
|
arc_adjust_needed = B_TRUE;
|
|
|
|
|
mutex_exit(&arc_adjust_lock);
|
|
|
|
|
zthr_wakeup(arc_adjust_zthr);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Return maximum amount of memory that we could possibly use. Reduced
|
|
|
|
|
* to half of all memory in user space which is primarily used for testing.
|
|
|
|
@ -4989,7 +5015,7 @@ arc_reclaim_needed(void)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
arc_kmem_reap_now(void)
|
|
|
|
|
arc_kmem_reap_soon(void)
|
|
|
|
|
{
|
|
|
|
|
size_t i;
|
|
|
|
|
kmem_cache_t *prev_cache = NULL;
|
|
|
|
@ -5044,79 +5070,155 @@ arc_kmem_reap_now(void)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Threads can block in arc_get_data_impl() waiting for this thread to evict
|
|
|
|
|
* enough data and signal them to proceed. When this happens, the threads in
|
|
|
|
|
* arc_get_data_impl() are sleeping while holding the hash lock for their
|
|
|
|
|
* particular arc header. Thus, we must be careful to never sleep on a
|
|
|
|
|
* hash lock in this thread. This is to prevent the following deadlock:
|
|
|
|
|
*
|
|
|
|
|
* - Thread A sleeps on CV in arc_get_data_impl() holding hash lock "L",
|
|
|
|
|
* waiting for the reclaim thread to signal it.
|
|
|
|
|
*
|
|
|
|
|
* - arc_reclaim_thread() tries to acquire hash lock "L" using mutex_enter,
|
|
|
|
|
* fails, and goes to sleep forever.
|
|
|
|
|
*
|
|
|
|
|
* This possible deadlock is avoided by always acquiring a hash lock
|
|
|
|
|
* using mutex_tryenter() from arc_reclaim_thread().
|
|
|
|
|
*/
|
|
|
|
|
/* ARGSUSED */
|
|
|
|
|
static void
|
|
|
|
|
arc_reclaim_thread(void *unused)
|
|
|
|
|
static boolean_t
|
|
|
|
|
arc_adjust_cb_check(void *arg, zthr_t *zthr)
|
|
|
|
|
{
|
|
|
|
|
fstrans_cookie_t cookie = spl_fstrans_mark();
|
|
|
|
|
hrtime_t growtime = 0;
|
|
|
|
|
callb_cpr_t cpr;
|
|
|
|
|
|
|
|
|
|
CALLB_CPR_INIT(&cpr, &arc_reclaim_lock, callb_generic_cpr, FTAG);
|
|
|
|
|
|
|
|
|
|
mutex_enter(&arc_reclaim_lock);
|
|
|
|
|
while (!arc_reclaim_thread_exit) {
|
|
|
|
|
uint64_t evicted = 0;
|
|
|
|
|
uint64_t need_free = arc_need_free;
|
|
|
|
|
arc_tuning_update();
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This is necessary in order for the mdb ::arc dcmd to
|
|
|
|
|
* show up to date information. Since the ::arc command
|
|
|
|
|
* does not call the kstat's update function, without
|
|
|
|
|
* this call, the command may show stale stats for the
|
|
|
|
|
* This is necessary in order to keep the kstat information
|
|
|
|
|
* up to date for tools that display kstat data such as the
|
|
|
|
|
* mdb ::arc dcmd and the Linux crash utility. These tools
|
|
|
|
|
* typically do not call kstat's update function, but simply
|
|
|
|
|
* dump out stats from the most recent update. Without
|
|
|
|
|
* this call, these commands may show stale stats for the
|
|
|
|
|
* anon, mru, mru_ghost, mfu, and mfu_ghost lists. Even
|
|
|
|
|
* with this change, the data might be up to 1 second
|
|
|
|
|
* out of date; but that should suffice. The arc_state_t
|
|
|
|
|
* structures can be queried directly if more accurate
|
|
|
|
|
* information is needed.
|
|
|
|
|
* out of date(the arc_adjust_zthr has a maximum sleep
|
|
|
|
|
* time of 1 second); but that should suffice. The
|
|
|
|
|
* arc_state_t structures can be queried directly if more
|
|
|
|
|
* accurate information is needed.
|
|
|
|
|
*/
|
|
|
|
|
#ifndef __linux__
|
|
|
|
|
if (arc_ksp != NULL)
|
|
|
|
|
arc_ksp->ks_update(arc_ksp, KSTAT_READ);
|
|
|
|
|
#endif
|
|
|
|
|
mutex_exit(&arc_reclaim_lock);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We call arc_adjust() before (possibly) calling
|
|
|
|
|
* arc_kmem_reap_now(), so that we can wake up
|
|
|
|
|
* arc_get_data_buf() sooner.
|
|
|
|
|
* We have to rely on arc_get_data_impl() to tell us when to adjust,
|
|
|
|
|
* rather than checking if we are overflowing here, so that we are
|
|
|
|
|
* sure to not leave arc_get_data_impl() waiting on
|
|
|
|
|
* arc_adjust_waiters_cv. If we have become "not overflowing" since
|
|
|
|
|
* arc_get_data_impl() checked, we need to wake it up. We could
|
|
|
|
|
* broadcast the CV here, but arc_get_data_impl() may have not yet
|
|
|
|
|
* gone to sleep. We would need to use a mutex to ensure that this
|
|
|
|
|
* function doesn't broadcast until arc_get_data_impl() has gone to
|
|
|
|
|
* sleep (e.g. the arc_adjust_lock). However, the lock ordering of
|
|
|
|
|
* such a lock would necessarily be incorrect with respect to the
|
|
|
|
|
* zthr_lock, which is held before this function is called, and is
|
|
|
|
|
* held by arc_get_data_impl() when it calls zthr_wakeup().
|
|
|
|
|
*/
|
|
|
|
|
return (arc_adjust_needed);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Keep arc_size under arc_c by running arc_adjust which evicts data
|
|
|
|
|
* from the ARC.
|
|
|
|
|
*/
|
|
|
|
|
/* ARGSUSED */
|
|
|
|
|
static int
|
|
|
|
|
arc_adjust_cb(void *arg, zthr_t *zthr)
|
|
|
|
|
{
|
|
|
|
|
uint64_t evicted = 0;
|
|
|
|
|
fstrans_cookie_t cookie = spl_fstrans_mark();
|
|
|
|
|
|
|
|
|
|
/* Evict from cache */
|
|
|
|
|
evicted = arc_adjust();
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If evicted is zero, we couldn't evict anything
|
|
|
|
|
* via arc_adjust(). This could be due to hash lock
|
|
|
|
|
* collisions, but more likely due to the majority of
|
|
|
|
|
* arc buffers being unevictable. Therefore, even if
|
|
|
|
|
* arc_size is above arc_c, another pass is unlikely to
|
|
|
|
|
* be helpful and could potentially cause us to enter an
|
|
|
|
|
* infinite loop. Additionally, zthr_iscancelled() is
|
|
|
|
|
* checked here so that if the arc is shutting down, the
|
|
|
|
|
* broadcast will wake any remaining arc adjust waiters.
|
|
|
|
|
*/
|
|
|
|
|
mutex_enter(&arc_adjust_lock);
|
|
|
|
|
arc_adjust_needed = !zthr_iscancelled(arc_adjust_zthr) &&
|
|
|
|
|
evicted > 0 && aggsum_compare(&arc_size, arc_c) > 0;
|
|
|
|
|
if (!arc_adjust_needed) {
|
|
|
|
|
/*
|
|
|
|
|
* We're either no longer overflowing, or we
|
|
|
|
|
* can't evict anything more, so we should wake
|
|
|
|
|
* arc_get_data_impl() sooner.
|
|
|
|
|
*/
|
|
|
|
|
cv_broadcast(&arc_adjust_waiters_cv);
|
|
|
|
|
arc_need_free = 0;
|
|
|
|
|
}
|
|
|
|
|
mutex_exit(&arc_adjust_lock);
|
|
|
|
|
spl_fstrans_unmark(cookie);
|
|
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* ARGSUSED */
|
|
|
|
|
static boolean_t
|
|
|
|
|
arc_reap_cb_check(void *arg, zthr_t *zthr)
|
|
|
|
|
{
|
|
|
|
|
int64_t free_memory = arc_available_memory();
|
|
|
|
|
if (free_memory < 0) {
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If a kmem reap is already active, don't schedule more. We must
|
|
|
|
|
* check for this because kmem_cache_reap_soon() won't actually
|
|
|
|
|
* block on the cache being reaped (this is to prevent callers from
|
|
|
|
|
* becoming implicitly blocked by a system-wide kmem reap -- which,
|
|
|
|
|
* on a system with many, many full magazines, can take minutes).
|
|
|
|
|
*/
|
|
|
|
|
if (!kmem_cache_reap_active() && free_memory < 0) {
|
|
|
|
|
|
|
|
|
|
arc_no_grow = B_TRUE;
|
|
|
|
|
arc_warm = B_TRUE;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Wait at least zfs_grow_retry (default 5) seconds
|
|
|
|
|
* before considering growing.
|
|
|
|
|
*/
|
|
|
|
|
growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
|
|
|
|
|
arc_growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
|
|
|
|
|
return (B_TRUE);
|
|
|
|
|
} else if (free_memory < arc_c >> arc_no_grow_shift) {
|
|
|
|
|
arc_no_grow = B_TRUE;
|
|
|
|
|
} else if (gethrtime() >= arc_growtime) {
|
|
|
|
|
arc_no_grow = B_FALSE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
arc_kmem_reap_now();
|
|
|
|
|
return (B_FALSE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If we are still low on memory, shrink the ARC
|
|
|
|
|
* so that we have arc_shrink_min free space.
|
|
|
|
|
* Keep enough free memory in the system by reaping the ARC's kmem
|
|
|
|
|
* caches. To cause more slabs to be reapable, we may reduce the
|
|
|
|
|
* target size of the cache (arc_c), causing the arc_adjust_cb()
|
|
|
|
|
* to free more buffers.
|
|
|
|
|
*/
|
|
|
|
|
/* ARGSUSED */
|
|
|
|
|
static int
|
|
|
|
|
arc_reap_cb(void *arg, zthr_t *zthr)
|
|
|
|
|
{
|
|
|
|
|
int64_t free_memory;
|
|
|
|
|
fstrans_cookie_t cookie = spl_fstrans_mark();
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Kick off asynchronous kmem_reap()'s of all our caches.
|
|
|
|
|
*/
|
|
|
|
|
arc_kmem_reap_soon();
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Wait at least arc_kmem_cache_reap_retry_ms between
|
|
|
|
|
* arc_kmem_reap_soon() calls. Without this check it is possible to
|
|
|
|
|
* end up in a situation where we spend lots of time reaping
|
|
|
|
|
* caches, while we're near arc_c_min. Waiting here also gives the
|
|
|
|
|
* subsequent free memory check a chance of finding that the
|
|
|
|
|
* asynchronous reap has already freed enough memory, and we don't
|
|
|
|
|
* need to call arc_reduce_target_size().
|
|
|
|
|
*/
|
|
|
|
|
delay((hz * arc_kmem_cache_reap_retry_ms + 999) / 1000);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Reduce the target size as needed to maintain the amount of free
|
|
|
|
|
* memory in the system at a fraction of the arc_size (1/128th by
|
|
|
|
|
* default). If oversubscribed (free_memory < 0) then reduce the
|
|
|
|
|
* target arc_size by the deficit amount plus the fractional
|
|
|
|
|
* amount. If free memory is positive but less then the fractional
|
|
|
|
|
* amount, reduce by what is needed to hit the fractional amount.
|
|
|
|
|
*/
|
|
|
|
|
free_memory = arc_available_memory();
|
|
|
|
|
|
|
|
|
@ -5124,55 +5226,13 @@ arc_reclaim_thread(void *unused)
|
|
|
|
|
(arc_c >> arc_shrink_shift) - free_memory;
|
|
|
|
|
if (to_free > 0) {
|
|
|
|
|
#ifdef _KERNEL
|
|
|
|
|
to_free = MAX(to_free, need_free);
|
|
|
|
|
to_free = MAX(to_free, arc_need_free);
|
|
|
|
|
#endif
|
|
|
|
|
arc_shrink(to_free);
|
|
|
|
|
arc_reduce_target_size(to_free);
|
|
|
|
|
}
|
|
|
|
|
} else if (free_memory < arc_c >> arc_no_grow_shift) {
|
|
|
|
|
arc_no_grow = B_TRUE;
|
|
|
|
|
} else if (gethrtime() >= growtime) {
|
|
|
|
|
arc_no_grow = B_FALSE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mutex_enter(&arc_reclaim_lock);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If evicted is zero, we couldn't evict anything via
|
|
|
|
|
* arc_adjust(). This could be due to hash lock
|
|
|
|
|
* collisions, but more likely due to the majority of
|
|
|
|
|
* arc buffers being unevictable. Therefore, even if
|
|
|
|
|
* arc_size is above arc_c, another pass is unlikely to
|
|
|
|
|
* be helpful and could potentially cause us to enter an
|
|
|
|
|
* infinite loop.
|
|
|
|
|
*/
|
|
|
|
|
if (aggsum_compare(&arc_size, arc_c) <= 0|| evicted == 0) {
|
|
|
|
|
/*
|
|
|
|
|
* We're either no longer overflowing, or we
|
|
|
|
|
* can't evict anything more, so we should wake
|
|
|
|
|
* up any threads before we go to sleep and remove
|
|
|
|
|
* the bytes we were working on from arc_need_free
|
|
|
|
|
* since nothing more will be done here.
|
|
|
|
|
*/
|
|
|
|
|
cv_broadcast(&arc_reclaim_waiters_cv);
|
|
|
|
|
ARCSTAT_INCR(arcstat_need_free, -need_free);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Block until signaled, or after one second (we
|
|
|
|
|
* might need to perform arc_kmem_reap_now()
|
|
|
|
|
* even if we aren't being signalled)
|
|
|
|
|
*/
|
|
|
|
|
CALLB_CPR_SAFE_BEGIN(&cpr);
|
|
|
|
|
(void) cv_timedwait_sig_hires(&arc_reclaim_thread_cv,
|
|
|
|
|
&arc_reclaim_lock, SEC2NSEC(1), MSEC2NSEC(1), 0);
|
|
|
|
|
CALLB_CPR_SAFE_END(&cpr, &arc_reclaim_lock);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
arc_reclaim_thread_exit = B_FALSE;
|
|
|
|
|
cv_broadcast(&arc_reclaim_thread_cv);
|
|
|
|
|
CALLB_CPR_EXIT(&cpr); /* drops arc_reclaim_lock */
|
|
|
|
|
spl_fstrans_unmark(cookie);
|
|
|
|
|
thread_exit();
|
|
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef _KERNEL
|
|
|
|
@ -5276,21 +5336,21 @@ __arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc)
|
|
|
|
|
return (SHRINK_STOP);
|
|
|
|
|
|
|
|
|
|
/* Reclaim in progress */
|
|
|
|
|
if (mutex_tryenter(&arc_reclaim_lock) == 0) {
|
|
|
|
|
if (mutex_tryenter(&arc_adjust_lock) == 0) {
|
|
|
|
|
ARCSTAT_INCR(arcstat_need_free, ptob(sc->nr_to_scan));
|
|
|
|
|
return (0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mutex_exit(&arc_reclaim_lock);
|
|
|
|
|
mutex_exit(&arc_adjust_lock);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Evict the requested number of pages by shrinking arc_c the
|
|
|
|
|
* requested amount.
|
|
|
|
|
*/
|
|
|
|
|
if (pages > 0) {
|
|
|
|
|
arc_shrink(ptob(sc->nr_to_scan));
|
|
|
|
|
arc_reduce_target_size(ptob(sc->nr_to_scan));
|
|
|
|
|
if (current_is_kswapd())
|
|
|
|
|
arc_kmem_reap_now();
|
|
|
|
|
arc_kmem_reap_soon();
|
|
|
|
|
#ifdef HAVE_SPLIT_SHRINKER_CALLBACK
|
|
|
|
|
pages = MAX((int64_t)pages -
|
|
|
|
|
(int64_t)btop(arc_evictable_memory()), 0);
|
|
|
|
@ -5300,7 +5360,7 @@ __arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc)
|
|
|
|
|
/*
|
|
|
|
|
* We've shrunk what we can, wake up threads.
|
|
|
|
|
*/
|
|
|
|
|
cv_broadcast(&arc_reclaim_waiters_cv);
|
|
|
|
|
cv_broadcast(&arc_adjust_waiters_cv);
|
|
|
|
|
} else
|
|
|
|
|
pages = SHRINK_STOP;
|
|
|
|
|
|
|
|
|
@ -5315,7 +5375,7 @@ __arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc)
|
|
|
|
|
ARCSTAT_BUMP(arcstat_memory_indirect_count);
|
|
|
|
|
} else {
|
|
|
|
|
arc_no_grow = B_TRUE;
|
|
|
|
|
arc_kmem_reap_now();
|
|
|
|
|
arc_kmem_reap_soon();
|
|
|
|
|
ARCSTAT_BUMP(arcstat_memory_direct_count);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -5369,8 +5429,11 @@ arc_adapt(int bytes, arc_state_t *state)
|
|
|
|
|
}
|
|
|
|
|
ASSERT((int64_t)arc_p >= 0);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Wake reap thread if we do not have any available memory
|
|
|
|
|
*/
|
|
|
|
|
if (arc_reclaim_needed()) {
|
|
|
|
|
cv_signal(&arc_reclaim_thread_cv);
|
|
|
|
|
zthr_wakeup(arc_reap_zthr);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -5478,7 +5541,7 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
|
|
|
|
|
* overflowing; thus we don't use a while loop here.
|
|
|
|
|
*/
|
|
|
|
|
if (arc_is_overflowing()) {
|
|
|
|
|
mutex_enter(&arc_reclaim_lock);
|
|
|
|
|
mutex_enter(&arc_adjust_lock);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Now that we've acquired the lock, we may no longer be
|
|
|
|
@ -5492,11 +5555,12 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
|
|
|
|
|
* shouldn't cause any harm.
|
|
|
|
|
*/
|
|
|
|
|
if (arc_is_overflowing()) {
|
|
|
|
|
cv_signal(&arc_reclaim_thread_cv);
|
|
|
|
|
cv_wait(&arc_reclaim_waiters_cv, &arc_reclaim_lock);
|
|
|
|
|
arc_adjust_needed = B_TRUE;
|
|
|
|
|
zthr_wakeup(arc_adjust_zthr);
|
|
|
|
|
(void) cv_wait(&arc_adjust_waiters_cv,
|
|
|
|
|
&arc_adjust_lock);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mutex_exit(&arc_reclaim_lock);
|
|
|
|
|
mutex_exit(&arc_adjust_lock);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
VERIFY3U(hdr->b_type, ==, type);
|
|
|
|
@ -7687,10 +7751,8 @@ void
|
|
|
|
|
arc_init(void)
|
|
|
|
|
{
|
|
|
|
|
uint64_t percent, allmem = arc_all_memory();
|
|
|
|
|
|
|
|
|
|
mutex_init(&arc_reclaim_lock, NULL, MUTEX_DEFAULT, NULL);
|
|
|
|
|
cv_init(&arc_reclaim_thread_cv, NULL, CV_DEFAULT, NULL);
|
|
|
|
|
cv_init(&arc_reclaim_waiters_cv, NULL, CV_DEFAULT, NULL);
|
|
|
|
|
mutex_init(&arc_adjust_lock, NULL, MUTEX_DEFAULT, NULL);
|
|
|
|
|
cv_init(&arc_adjust_waiters_cv, NULL, CV_DEFAULT, NULL);
|
|
|
|
|
|
|
|
|
|
arc_min_prefetch_ms = 1000;
|
|
|
|
|
arc_min_prescient_prefetch_ms = 6000;
|
|
|
|
@ -7750,6 +7812,13 @@ arc_init(void)
|
|
|
|
|
arc_c = arc_c_min;
|
|
|
|
|
|
|
|
|
|
arc_state_init();
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The arc must be "uninitialized", so that hdr_recl() (which is
|
|
|
|
|
* registered by buf_init()) will not access arc_reap_zthr before
|
|
|
|
|
* it is created.
|
|
|
|
|
*/
|
|
|
|
|
ASSERT(!arc_initialized);
|
|
|
|
|
buf_init();
|
|
|
|
|
|
|
|
|
|
list_create(&arc_prune_list, sizeof (arc_prune_t),
|
|
|
|
@ -7759,8 +7828,6 @@ arc_init(void)
|
|
|
|
|
arc_prune_taskq = taskq_create("arc_prune", max_ncpus, defclsyspri,
|
|
|
|
|
max_ncpus, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
|
|
|
|
|
|
|
|
|
|
arc_reclaim_thread_exit = B_FALSE;
|
|
|
|
|
|
|
|
|
|
arc_ksp = kstat_create("zfs", 0, "arcstats", "misc", KSTAT_TYPE_NAMED,
|
|
|
|
|
sizeof (arc_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
|
|
|
|
|
|
|
|
|
@ -7770,10 +7837,12 @@ arc_init(void)
|
|
|
|
|
kstat_install(arc_ksp);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
(void) thread_create(NULL, 0, arc_reclaim_thread, NULL, 0, &p0,
|
|
|
|
|
TS_RUN, defclsyspri);
|
|
|
|
|
arc_adjust_zthr = zthr_create(arc_adjust_cb_check,
|
|
|
|
|
arc_adjust_cb, NULL);
|
|
|
|
|
arc_reap_zthr = zthr_create_timer(arc_reap_cb_check,
|
|
|
|
|
arc_reap_cb, NULL, SEC2NSEC(1));
|
|
|
|
|
|
|
|
|
|
arc_dead = B_FALSE;
|
|
|
|
|
arc_initialized = B_TRUE;
|
|
|
|
|
arc_warm = B_FALSE;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
@ -7805,22 +7874,10 @@ arc_fini(void)
|
|
|
|
|
spl_unregister_shrinker(&arc_shrinker);
|
|
|
|
|
#endif /* _KERNEL */
|
|
|
|
|
|
|
|
|
|
mutex_enter(&arc_reclaim_lock);
|
|
|
|
|
arc_reclaim_thread_exit = B_TRUE;
|
|
|
|
|
/*
|
|
|
|
|
* The reclaim thread will set arc_reclaim_thread_exit back to
|
|
|
|
|
* B_FALSE when it is finished exiting; we're waiting for that.
|
|
|
|
|
*/
|
|
|
|
|
while (arc_reclaim_thread_exit) {
|
|
|
|
|
cv_signal(&arc_reclaim_thread_cv);
|
|
|
|
|
cv_wait(&arc_reclaim_thread_cv, &arc_reclaim_lock);
|
|
|
|
|
}
|
|
|
|
|
mutex_exit(&arc_reclaim_lock);
|
|
|
|
|
|
|
|
|
|
/* Use B_TRUE to ensure *all* buffers are evicted */
|
|
|
|
|
arc_flush(NULL, B_TRUE);
|
|
|
|
|
|
|
|
|
|
arc_dead = B_TRUE;
|
|
|
|
|
arc_initialized = B_FALSE;
|
|
|
|
|
|
|
|
|
|
if (arc_ksp != NULL) {
|
|
|
|
|
kstat_delete(arc_ksp);
|
|
|
|
@ -7841,9 +7898,14 @@ arc_fini(void)
|
|
|
|
|
|
|
|
|
|
list_destroy(&arc_prune_list);
|
|
|
|
|
mutex_destroy(&arc_prune_mtx);
|
|
|
|
|
mutex_destroy(&arc_reclaim_lock);
|
|
|
|
|
cv_destroy(&arc_reclaim_thread_cv);
|
|
|
|
|
cv_destroy(&arc_reclaim_waiters_cv);
|
|
|
|
|
(void) zthr_cancel(arc_adjust_zthr);
|
|
|
|
|
zthr_destroy(arc_adjust_zthr);
|
|
|
|
|
|
|
|
|
|
(void) zthr_cancel(arc_reap_zthr);
|
|
|
|
|
zthr_destroy(arc_reap_zthr);
|
|
|
|
|
|
|
|
|
|
mutex_destroy(&arc_adjust_lock);
|
|
|
|
|
cv_destroy(&arc_adjust_waiters_cv);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* buf_fini() must proceed arc_state_fini() because buf_fin() may
|
|
|
|
|