|
|
|
@ -26,7 +26,7 @@
|
|
|
|
|
* Copyright (c) 2017, Nexenta Systems, Inc. All rights reserved.
|
|
|
|
|
* Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
|
|
|
|
|
* Copyright (c) 2020, George Amanakis. All rights reserved.
|
|
|
|
|
* Copyright (c) 2019, 2023, Klara Inc.
|
|
|
|
|
* Copyright (c) 2019, 2024, Klara Inc.
|
|
|
|
|
* Copyright (c) 2019, Allan Jude
|
|
|
|
|
* Copyright (c) 2020, The FreeBSD Foundation [1]
|
|
|
|
|
*
|
|
|
|
@ -464,6 +464,9 @@ static uint_t zfs_arc_lotsfree_percent = 10;
|
|
|
|
|
*/
|
|
|
|
|
static int zfs_arc_prune_task_threads = 1;
|
|
|
|
|
|
|
|
|
|
/* Used by spa_export/spa_destroy to flush the arc asynchronously */
|
|
|
|
|
static taskq_t *arc_flush_taskq;
|
|
|
|
|
|
|
|
|
|
/* The 7 states: */
|
|
|
|
|
arc_state_t ARC_anon;
|
|
|
|
|
arc_state_t ARC_mru;
|
|
|
|
@ -772,6 +775,23 @@ static buf_hash_table_t buf_hash_table;
|
|
|
|
|
|
|
|
|
|
uint64_t zfs_crc64_table[256];
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Asynchronous ARC flush
|
|
|
|
|
*
|
|
|
|
|
* We track these in a list for arc_async_flush_guid_inuse()
|
|
|
|
|
*/
|
|
|
|
|
static list_t arc_async_flush_list;
|
|
|
|
|
static kmutex_t arc_async_flush_lock;
|
|
|
|
|
|
|
|
|
|
typedef struct arc_async_flush {
|
|
|
|
|
uint64_t af_spa_guid;
|
|
|
|
|
taskqid_t af_task_id;
|
|
|
|
|
list_node_t af_node;
|
|
|
|
|
} arc_async_flush_t;
|
|
|
|
|
|
|
|
|
|
static unsigned int arc_async_flush_init_spa_list(uint64_t spa_list[],
|
|
|
|
|
unsigned int list_len);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Level 2 ARC
|
|
|
|
|
*/
|
|
|
|
@ -1718,6 +1738,8 @@ arc_buf_alloc_l2only(size_t size, arc_buf_contents_t type, l2arc_dev_t *dev,
|
|
|
|
|
arc_buf_hdr_t *hdr;
|
|
|
|
|
|
|
|
|
|
ASSERT(size != 0);
|
|
|
|
|
ASSERT(dev->l2ad_vdev != NULL);
|
|
|
|
|
|
|
|
|
|
hdr = kmem_cache_alloc(hdr_l2only_cache, KM_SLEEP);
|
|
|
|
|
hdr->b_birth = birth;
|
|
|
|
|
hdr->b_type = type;
|
|
|
|
@ -1725,6 +1747,7 @@ arc_buf_alloc_l2only(size_t size, arc_buf_contents_t type, l2arc_dev_t *dev,
|
|
|
|
|
arc_hdr_set_flags(hdr, arc_bufc_to_flags(type) | ARC_FLAG_HAS_L2HDR);
|
|
|
|
|
HDR_SET_LSIZE(hdr, size);
|
|
|
|
|
HDR_SET_PSIZE(hdr, psize);
|
|
|
|
|
HDR_SET_ASIZE(hdr, vdev_psize_to_asize(dev->l2ad_vdev, psize));
|
|
|
|
|
arc_hdr_set_compress(hdr, compress);
|
|
|
|
|
hdr->b_complevel = complevel;
|
|
|
|
|
if (protected)
|
|
|
|
@ -3508,16 +3531,17 @@ static void
|
|
|
|
|
l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
|
|
|
|
|
boolean_t state_only)
|
|
|
|
|
{
|
|
|
|
|
l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr;
|
|
|
|
|
l2arc_dev_t *dev = l2hdr->b_dev;
|
|
|
|
|
uint64_t lsize = HDR_GET_LSIZE(hdr);
|
|
|
|
|
uint64_t psize = HDR_GET_PSIZE(hdr);
|
|
|
|
|
uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev, psize);
|
|
|
|
|
uint64_t asize = HDR_GET_ASIZE(hdr);
|
|
|
|
|
arc_buf_contents_t type = hdr->b_type;
|
|
|
|
|
int64_t lsize_s;
|
|
|
|
|
int64_t psize_s;
|
|
|
|
|
int64_t asize_s;
|
|
|
|
|
|
|
|
|
|
/* For L2 we expect the header's b_asize to be valid */
|
|
|
|
|
ASSERT3U(asize, >=, psize);
|
|
|
|
|
|
|
|
|
|
if (incr) {
|
|
|
|
|
lsize_s = lsize;
|
|
|
|
|
psize_s = psize;
|
|
|
|
@ -3579,8 +3603,6 @@ arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr)
|
|
|
|
|
{
|
|
|
|
|
l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr;
|
|
|
|
|
l2arc_dev_t *dev = l2hdr->b_dev;
|
|
|
|
|
uint64_t psize = HDR_GET_PSIZE(hdr);
|
|
|
|
|
uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev, psize);
|
|
|
|
|
|
|
|
|
|
ASSERT(MUTEX_HELD(&dev->l2ad_mtx));
|
|
|
|
|
ASSERT(HDR_HAS_L2HDR(hdr));
|
|
|
|
@ -3588,7 +3610,10 @@ arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr)
|
|
|
|
|
list_remove(&dev->l2ad_buflist, hdr);
|
|
|
|
|
|
|
|
|
|
l2arc_hdr_arcstats_decrement(hdr);
|
|
|
|
|
vdev_space_update(dev->l2ad_vdev, -asize, 0, 0);
|
|
|
|
|
if (dev->l2ad_vdev != NULL) {
|
|
|
|
|
uint64_t asize = HDR_GET_ASIZE(hdr);
|
|
|
|
|
vdev_space_update(dev->l2ad_vdev, -asize, 0, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
(void) zfs_refcount_remove_many(&dev->l2ad_alloc, arc_hdr_size(hdr),
|
|
|
|
|
hdr);
|
|
|
|
@ -3854,9 +3879,20 @@ arc_set_need_free(void)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static boolean_t
|
|
|
|
|
arc_spa_is_list_member(uint64_t spa_guid, uint64_t spa_list[],
|
|
|
|
|
unsigned int spa_cnt)
|
|
|
|
|
{
|
|
|
|
|
for (int i = 0; i < spa_cnt; i++) {
|
|
|
|
|
if (spa_list[i] == spa_guid)
|
|
|
|
|
return (B_TRUE);
|
|
|
|
|
}
|
|
|
|
|
return (B_FALSE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static uint64_t
|
|
|
|
|
arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
|
|
|
|
|
uint64_t spa, uint64_t bytes)
|
|
|
|
|
uint64_t bytes, uint64_t spa_list[], unsigned int spa_cnt)
|
|
|
|
|
{
|
|
|
|
|
multilist_sublist_t *mls;
|
|
|
|
|
uint64_t bytes_evicted = 0, real_evicted = 0;
|
|
|
|
@ -3898,8 +3934,13 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
|
|
|
|
|
if (hdr->b_spa == 0)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* we're only interested in evicting buffers of a certain spa */
|
|
|
|
|
if (spa != 0 && hdr->b_spa != spa) {
|
|
|
|
|
/*
|
|
|
|
|
* Check if we're only interested in evicting buffers from
|
|
|
|
|
* a specifc list of spas. This would typically be from
|
|
|
|
|
* spas that are being unloaded.
|
|
|
|
|
*/
|
|
|
|
|
if (spa_cnt > 0 &&
|
|
|
|
|
!arc_spa_is_list_member(hdr->b_spa, spa_list, spa_cnt)) {
|
|
|
|
|
ARCSTAT_BUMP(arcstat_evict_skip);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
@ -4035,8 +4076,8 @@ arc_state_free_markers(arc_buf_hdr_t **markers, int count)
|
|
|
|
|
* the given arc state; which is used by arc_flush().
|
|
|
|
|
*/
|
|
|
|
|
static uint64_t
|
|
|
|
|
arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t spa,
|
|
|
|
|
uint64_t bytes)
|
|
|
|
|
arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t bytes,
|
|
|
|
|
uint64_t spa_list[], unsigned int spa_cnt)
|
|
|
|
|
{
|
|
|
|
|
uint64_t total_evicted = 0;
|
|
|
|
|
multilist_t *ml = &state->arcs_list[type];
|
|
|
|
@ -4091,7 +4132,8 @@ arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t spa,
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
bytes_evicted = arc_evict_state_impl(ml, sublist_idx,
|
|
|
|
|
markers[sublist_idx], spa, bytes_remaining);
|
|
|
|
|
markers[sublist_idx], bytes_remaining, spa_list,
|
|
|
|
|
spa_cnt);
|
|
|
|
|
|
|
|
|
|
scan_evicted += bytes_evicted;
|
|
|
|
|
total_evicted += bytes_evicted;
|
|
|
|
@ -4156,9 +4198,11 @@ arc_flush_state(arc_state_t *state, uint64_t spa, arc_buf_contents_t type,
|
|
|
|
|
boolean_t retry)
|
|
|
|
|
{
|
|
|
|
|
uint64_t evicted = 0;
|
|
|
|
|
uint64_t spa_list[1] = {spa};
|
|
|
|
|
|
|
|
|
|
while (zfs_refcount_count(&state->arcs_esize[type]) != 0) {
|
|
|
|
|
evicted += arc_evict_state(state, type, spa, ARC_EVICT_ALL);
|
|
|
|
|
evicted += arc_evict_state(state, type, ARC_EVICT_ALL,
|
|
|
|
|
spa_list, spa == 0 ? 0 : 1);
|
|
|
|
|
|
|
|
|
|
if (!retry)
|
|
|
|
|
break;
|
|
|
|
@ -4182,7 +4226,15 @@ arc_evict_impl(arc_state_t *state, arc_buf_contents_t type, int64_t bytes)
|
|
|
|
|
if (bytes > 0 && zfs_refcount_count(&state->arcs_esize[type]) > 0) {
|
|
|
|
|
delta = MIN(zfs_refcount_count(&state->arcs_esize[type]),
|
|
|
|
|
bytes);
|
|
|
|
|
return (arc_evict_state(state, type, 0, delta));
|
|
|
|
|
/*
|
|
|
|
|
* Create a list of guids from any active ARC async flushes.
|
|
|
|
|
* The underlying arc_evict_state() function will target
|
|
|
|
|
* only spa guids from this list when it is not empty.
|
|
|
|
|
*/
|
|
|
|
|
uint64_t spa_list[16];
|
|
|
|
|
unsigned int spa_cnt =
|
|
|
|
|
arc_async_flush_init_spa_list(spa_list, 16);
|
|
|
|
|
return (arc_evict_state(state, type, delta, spa_list, spa_cnt));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
@ -4375,20 +4427,10 @@ arc_evict(void)
|
|
|
|
|
return (total_evicted);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
arc_flush(spa_t *spa, boolean_t retry)
|
|
|
|
|
static void
|
|
|
|
|
arc_flush_impl(uint64_t guid, boolean_t retry)
|
|
|
|
|
{
|
|
|
|
|
uint64_t guid = 0;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If retry is B_TRUE, a spa must not be specified since we have
|
|
|
|
|
* no good way to determine if all of a spa's buffers have been
|
|
|
|
|
* evicted from an arc state.
|
|
|
|
|
*/
|
|
|
|
|
ASSERT(!retry || spa == NULL);
|
|
|
|
|
|
|
|
|
|
if (spa != NULL)
|
|
|
|
|
guid = spa_load_guid(spa);
|
|
|
|
|
ASSERT(!retry || guid == 0);
|
|
|
|
|
|
|
|
|
|
(void) arc_flush_state(arc_mru, guid, ARC_BUFC_DATA, retry);
|
|
|
|
|
(void) arc_flush_state(arc_mru, guid, ARC_BUFC_METADATA, retry);
|
|
|
|
@ -4406,6 +4448,143 @@ arc_flush(spa_t *spa, boolean_t retry)
|
|
|
|
|
(void) arc_flush_state(arc_uncached, guid, ARC_BUFC_METADATA, retry);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
arc_flush(spa_t *spa, boolean_t retry)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* If retry is B_TRUE, a spa must not be specified since we have
|
|
|
|
|
* no good way to determine if all of a spa's buffers have been
|
|
|
|
|
* evicted from an arc state.
|
|
|
|
|
*/
|
|
|
|
|
ASSERT(!retry || spa == NULL);
|
|
|
|
|
|
|
|
|
|
arc_flush_impl(spa != NULL ? spa_load_guid(spa) : 0, retry);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static arc_async_flush_t *
|
|
|
|
|
arc_async_flush_add(uint64_t spa_guid, taskqid_t task_id)
|
|
|
|
|
{
|
|
|
|
|
arc_async_flush_t *af = kmem_alloc(sizeof (*af), KM_SLEEP);
|
|
|
|
|
af->af_spa_guid = spa_guid;
|
|
|
|
|
af->af_task_id = task_id;
|
|
|
|
|
list_link_init(&af->af_node);
|
|
|
|
|
|
|
|
|
|
mutex_enter(&arc_async_flush_lock);
|
|
|
|
|
list_insert_tail(&arc_async_flush_list, af);
|
|
|
|
|
mutex_exit(&arc_async_flush_lock);
|
|
|
|
|
|
|
|
|
|
return (af);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
arc_async_flush_remove(uint64_t spa_guid, taskqid_t task_id)
|
|
|
|
|
{
|
|
|
|
|
mutex_enter(&arc_async_flush_lock);
|
|
|
|
|
for (arc_async_flush_t *af = list_head(&arc_async_flush_list);
|
|
|
|
|
af != NULL; af = list_next(&arc_async_flush_list, af)) {
|
|
|
|
|
if (af->af_spa_guid == spa_guid && af->af_task_id == task_id) {
|
|
|
|
|
list_remove(&arc_async_flush_list, af);
|
|
|
|
|
kmem_free(af, sizeof (*af));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
mutex_exit(&arc_async_flush_lock);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
arc_flush_task(void *arg)
|
|
|
|
|
{
|
|
|
|
|
arc_async_flush_t *af = arg;
|
|
|
|
|
hrtime_t start_time = gethrtime();
|
|
|
|
|
uint64_t spa_guid = af->af_spa_guid;
|
|
|
|
|
|
|
|
|
|
arc_flush_impl(spa_guid, B_FALSE);
|
|
|
|
|
arc_async_flush_remove(spa_guid, af->af_task_id);
|
|
|
|
|
|
|
|
|
|
uint64_t elaspsed = NSEC2MSEC(gethrtime() - start_time);
|
|
|
|
|
if (elaspsed > 0) {
|
|
|
|
|
zfs_dbgmsg("spa %llu arc flushed in %llu ms",
|
|
|
|
|
(u_longlong_t)spa_guid, (u_longlong_t)elaspsed);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* ARC buffers use the spa's load guid and can continue to exist after
|
|
|
|
|
* the spa_t is gone (exported). The blocks are orphaned since each
|
|
|
|
|
* spa import has a different load guid.
|
|
|
|
|
*
|
|
|
|
|
* It's OK if the spa is re-imported while this asynchronous flush is
|
|
|
|
|
* still in progress. The new spa_load_guid will be different.
|
|
|
|
|
*
|
|
|
|
|
* Also, arc_fini will wait for any arc_flush_task to finish.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
arc_flush_async(spa_t *spa)
|
|
|
|
|
{
|
|
|
|
|
uint64_t spa_guid = spa_load_guid(spa);
|
|
|
|
|
arc_async_flush_t *af = arc_async_flush_add(spa_guid, TASKQID_INVALID);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Note that arc_flush_task() needs arc_async_flush_lock to remove af
|
|
|
|
|
* list node. So by holding the lock we avoid a race for af removal
|
|
|
|
|
* with our use here.
|
|
|
|
|
*/
|
|
|
|
|
mutex_enter(&arc_async_flush_lock);
|
|
|
|
|
taskqid_t tid = af->af_task_id = taskq_dispatch(arc_flush_taskq,
|
|
|
|
|
arc_flush_task, af, TQ_SLEEP);
|
|
|
|
|
mutex_exit(&arc_async_flush_lock);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* unlikely, but if we couldn't dispatch then use an inline flush
|
|
|
|
|
*/
|
|
|
|
|
if (tid == TASKQID_INVALID) {
|
|
|
|
|
arc_flush_impl(spa_guid, B_FALSE);
|
|
|
|
|
arc_async_flush_remove(spa_guid, TASKQID_INVALID);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Check if a guid is still in-use as part of an async teardown task
|
|
|
|
|
*/
|
|
|
|
|
boolean_t
|
|
|
|
|
arc_async_flush_guid_inuse(uint64_t spa_guid)
|
|
|
|
|
{
|
|
|
|
|
mutex_enter(&arc_async_flush_lock);
|
|
|
|
|
for (arc_async_flush_t *af = list_head(&arc_async_flush_list);
|
|
|
|
|
af != NULL; af = list_next(&arc_async_flush_list, af)) {
|
|
|
|
|
if (af->af_spa_guid == spa_guid) {
|
|
|
|
|
mutex_exit(&arc_async_flush_lock);
|
|
|
|
|
return (B_TRUE);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
mutex_exit(&arc_async_flush_lock);
|
|
|
|
|
return (B_FALSE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Initialize a list of spa guids that are being flushed.
|
|
|
|
|
*
|
|
|
|
|
* Used by arc_evict_state() to target headers belonging to spas on this list.
|
|
|
|
|
*/
|
|
|
|
|
static unsigned int
|
|
|
|
|
arc_async_flush_init_spa_list(uint64_t spa_list[], unsigned int list_len)
|
|
|
|
|
{
|
|
|
|
|
unsigned int init_cnt = 0;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Iterate until the end of the list or array slots are full.
|
|
|
|
|
*/
|
|
|
|
|
mutex_enter(&arc_async_flush_lock);
|
|
|
|
|
for (arc_async_flush_t *af = list_head(&arc_async_flush_list);
|
|
|
|
|
init_cnt < list_len && af != NULL;
|
|
|
|
|
af = list_next(&arc_async_flush_list, af)) {
|
|
|
|
|
spa_list[init_cnt++] = af->af_spa_guid;
|
|
|
|
|
}
|
|
|
|
|
mutex_exit(&arc_async_flush_lock);
|
|
|
|
|
|
|
|
|
|
return (init_cnt);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint64_t
|
|
|
|
|
arc_reduce_target_size(uint64_t to_free)
|
|
|
|
|
{
|
|
|
|
@ -7744,6 +7923,12 @@ arc_init(void)
|
|
|
|
|
arc_prune_taskq = taskq_create("arc_prune", zfs_arc_prune_task_threads,
|
|
|
|
|
defclsyspri, 100, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
|
|
|
|
|
|
|
|
|
|
list_create(&arc_async_flush_list, sizeof (arc_async_flush_t),
|
|
|
|
|
offsetof(arc_async_flush_t, af_node));
|
|
|
|
|
mutex_init(&arc_async_flush_lock, NULL, MUTEX_DEFAULT, NULL);
|
|
|
|
|
arc_flush_taskq = taskq_create("arc_flush", 75, defclsyspri,
|
|
|
|
|
1, INT_MAX, TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT);
|
|
|
|
|
|
|
|
|
|
arc_ksp = kstat_create("zfs", 0, "arcstats", "misc", KSTAT_TYPE_NAMED,
|
|
|
|
|
sizeof (arc_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
|
|
|
|
|
|
|
|
|
@ -7809,6 +7994,10 @@ arc_fini(void)
|
|
|
|
|
arc_lowmem_fini();
|
|
|
|
|
#endif /* _KERNEL */
|
|
|
|
|
|
|
|
|
|
/* Wait for any background flushes */
|
|
|
|
|
taskq_wait(arc_flush_taskq);
|
|
|
|
|
taskq_destroy(arc_flush_taskq);
|
|
|
|
|
|
|
|
|
|
/* Use B_TRUE to ensure *all* buffers are evicted */
|
|
|
|
|
arc_flush(NULL, B_TRUE);
|
|
|
|
|
|
|
|
|
@ -7820,6 +8009,9 @@ arc_fini(void)
|
|
|
|
|
taskq_wait(arc_prune_taskq);
|
|
|
|
|
taskq_destroy(arc_prune_taskq);
|
|
|
|
|
|
|
|
|
|
list_destroy(&arc_async_flush_list);
|
|
|
|
|
mutex_destroy(&arc_async_flush_lock);
|
|
|
|
|
|
|
|
|
|
mutex_enter(&arc_prune_mtx);
|
|
|
|
|
while ((p = list_remove_head(&arc_prune_list)) != NULL) {
|
|
|
|
|
(void) zfs_refcount_remove(&p->p_refcnt, &arc_prune_list);
|
|
|
|
@ -8191,6 +8383,18 @@ l2arc_write_interval(clock_t began, uint64_t wanted, uint64_t wrote)
|
|
|
|
|
return (next);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static boolean_t
|
|
|
|
|
l2arc_dev_invalid(const l2arc_dev_t *dev)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* We want to skip devices that are being rebuilt, trimmed,
|
|
|
|
|
* removed, or belong to a spa that is being exported.
|
|
|
|
|
*/
|
|
|
|
|
return (dev->l2ad_vdev == NULL || vdev_is_dead(dev->l2ad_vdev) ||
|
|
|
|
|
dev->l2ad_rebuild || dev->l2ad_trim_all ||
|
|
|
|
|
dev->l2ad_spa == NULL || dev->l2ad_spa->spa_is_exporting);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Cycle through L2ARC devices. This is how L2ARC load balances.
|
|
|
|
|
* If a device is returned, this also returns holding the spa config lock.
|
|
|
|
@ -8231,12 +8435,10 @@ l2arc_dev_get_next(void)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
ASSERT3P(next, !=, NULL);
|
|
|
|
|
} while (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild ||
|
|
|
|
|
next->l2ad_trim_all || next->l2ad_spa->spa_is_exporting);
|
|
|
|
|
} while (l2arc_dev_invalid(next));
|
|
|
|
|
|
|
|
|
|
/* if we were unable to find any usable vdevs, return NULL */
|
|
|
|
|
if (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild ||
|
|
|
|
|
next->l2ad_trim_all || next->l2ad_spa->spa_is_exporting)
|
|
|
|
|
if (l2arc_dev_invalid(next))
|
|
|
|
|
next = NULL;
|
|
|
|
|
|
|
|
|
|
l2arc_dev_last = next;
|
|
|
|
@ -8366,6 +8568,8 @@ top:
|
|
|
|
|
uint64_t psize = HDR_GET_PSIZE(hdr);
|
|
|
|
|
l2arc_hdr_arcstats_decrement(hdr);
|
|
|
|
|
|
|
|
|
|
ASSERT(dev->l2ad_vdev != NULL);
|
|
|
|
|
|
|
|
|
|
bytes_dropped +=
|
|
|
|
|
vdev_psize_to_asize(dev->l2ad_vdev, psize);
|
|
|
|
|
(void) zfs_refcount_remove_many(&dev->l2ad_alloc,
|
|
|
|
@ -8747,6 +8951,8 @@ l2arc_log_blk_overhead(uint64_t write_sz, l2arc_dev_t *dev)
|
|
|
|
|
if (dev->l2ad_log_entries == 0) {
|
|
|
|
|
return (0);
|
|
|
|
|
} else {
|
|
|
|
|
ASSERT(dev->l2ad_vdev != NULL);
|
|
|
|
|
|
|
|
|
|
uint64_t log_entries = write_sz >> SPA_MINBLOCKSHIFT;
|
|
|
|
|
|
|
|
|
|
uint64_t log_blocks = (log_entries +
|
|
|
|
@ -8775,6 +8981,9 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
|
|
|
|
|
vdev_t *vd = dev->l2ad_vdev;
|
|
|
|
|
boolean_t rerun;
|
|
|
|
|
|
|
|
|
|
ASSERT(vd != NULL || all);
|
|
|
|
|
ASSERT(dev->l2ad_spa != NULL || all);
|
|
|
|
|
|
|
|
|
|
buflist = &dev->l2ad_buflist;
|
|
|
|
|
|
|
|
|
|
top:
|
|
|
|
@ -8867,7 +9076,8 @@ retry:
|
|
|
|
|
if (!all && l2arc_log_blkptr_valid(dev, lb_ptr_buf->lb_ptr)) {
|
|
|
|
|
break;
|
|
|
|
|
} else {
|
|
|
|
|
vdev_space_update(vd, -asize, 0, 0);
|
|
|
|
|
if (vd != NULL)
|
|
|
|
|
vdev_space_update(vd, -asize, 0, 0);
|
|
|
|
|
ARCSTAT_INCR(arcstat_l2_log_blk_asize, -asize);
|
|
|
|
|
ARCSTAT_BUMPDOWN(arcstat_l2_log_blk_count);
|
|
|
|
|
zfs_refcount_remove_many(&dev->l2ad_lb_asize, asize,
|
|
|
|
@ -9281,6 +9491,8 @@ skip:
|
|
|
|
|
hdr->b_l2hdr.b_hits = 0;
|
|
|
|
|
hdr->b_l2hdr.b_arcs_state =
|
|
|
|
|
hdr->b_l1hdr.b_state->arcs_state;
|
|
|
|
|
/* l2arc_hdr_arcstats_update() expects a valid asize */
|
|
|
|
|
HDR_SET_ASIZE(hdr, asize);
|
|
|
|
|
mutex_enter(&dev->l2ad_mtx);
|
|
|
|
|
if (pio == NULL) {
|
|
|
|
|
/*
|
|
|
|
@ -9446,8 +9658,10 @@ l2arc_feed_thread(void *unused)
|
|
|
|
|
* held to prevent device removal. l2arc_dev_get_next()
|
|
|
|
|
* will grab and release l2arc_dev_mtx.
|
|
|
|
|
*/
|
|
|
|
|
if ((dev = l2arc_dev_get_next()) == NULL)
|
|
|
|
|
if ((dev = l2arc_dev_get_next()) == NULL ||
|
|
|
|
|
dev->l2ad_spa == NULL) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
spa = dev->l2ad_spa;
|
|
|
|
|
ASSERT3P(spa, !=, NULL);
|
|
|
|
@ -9532,6 +9746,12 @@ l2arc_rebuild_dev(l2arc_dev_t *dev, boolean_t reopen)
|
|
|
|
|
uint64_t l2dhdr_asize = dev->l2ad_dev_hdr_asize;
|
|
|
|
|
spa_t *spa = dev->l2ad_spa;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* After a l2arc_remove_vdev(), the spa_t will no longer be valid
|
|
|
|
|
*/
|
|
|
|
|
if (spa == NULL)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The L2ARC has to hold at least the payload of one log block for
|
|
|
|
|
* them to be restored (persistent L2ARC). The payload of a log block
|
|
|
|
@ -9699,39 +9919,19 @@ l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen)
|
|
|
|
|
l2arc_rebuild_dev(dev, reopen);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Remove a vdev from the L2ARC.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
l2arc_remove_vdev(vdev_t *vd)
|
|
|
|
|
typedef struct {
|
|
|
|
|
l2arc_dev_t *rva_l2arc_dev;
|
|
|
|
|
uint64_t rva_spa_gid;
|
|
|
|
|
uint64_t rva_vdev_gid;
|
|
|
|
|
taskqid_t rva_task_id;
|
|
|
|
|
} remove_vdev_args_t;
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
l2arc_device_teardown(void *arg)
|
|
|
|
|
{
|
|
|
|
|
l2arc_dev_t *remdev = NULL;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Find the device by vdev
|
|
|
|
|
*/
|
|
|
|
|
remdev = l2arc_vdev_get(vd);
|
|
|
|
|
ASSERT3P(remdev, !=, NULL);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Cancel any ongoing or scheduled rebuild.
|
|
|
|
|
*/
|
|
|
|
|
mutex_enter(&l2arc_rebuild_thr_lock);
|
|
|
|
|
if (remdev->l2ad_rebuild_began == B_TRUE) {
|
|
|
|
|
remdev->l2ad_rebuild_cancel = B_TRUE;
|
|
|
|
|
while (remdev->l2ad_rebuild == B_TRUE)
|
|
|
|
|
cv_wait(&l2arc_rebuild_thr_cv, &l2arc_rebuild_thr_lock);
|
|
|
|
|
}
|
|
|
|
|
mutex_exit(&l2arc_rebuild_thr_lock);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Remove device from global list
|
|
|
|
|
*/
|
|
|
|
|
mutex_enter(&l2arc_dev_mtx);
|
|
|
|
|
list_remove(l2arc_dev_list, remdev);
|
|
|
|
|
l2arc_dev_last = NULL; /* may have been invalidated */
|
|
|
|
|
atomic_dec_64(&l2arc_ndev);
|
|
|
|
|
mutex_exit(&l2arc_dev_mtx);
|
|
|
|
|
remove_vdev_args_t *rva = arg;
|
|
|
|
|
l2arc_dev_t *remdev = rva->rva_l2arc_dev;
|
|
|
|
|
hrtime_t start_time = gethrtime();
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Clear all buflists and ARC references. L2ARC device flush.
|
|
|
|
@ -9746,6 +9946,93 @@ l2arc_remove_vdev(vdev_t *vd)
|
|
|
|
|
zfs_refcount_destroy(&remdev->l2ad_lb_count);
|
|
|
|
|
kmem_free(remdev->l2ad_dev_hdr, remdev->l2ad_dev_hdr_asize);
|
|
|
|
|
vmem_free(remdev, sizeof (l2arc_dev_t));
|
|
|
|
|
|
|
|
|
|
uint64_t elaspsed = NSEC2MSEC(gethrtime() - start_time);
|
|
|
|
|
if (elaspsed > 0) {
|
|
|
|
|
zfs_dbgmsg("spa %llu, vdev %llu removed in %llu ms",
|
|
|
|
|
(u_longlong_t)rva->rva_spa_gid,
|
|
|
|
|
(u_longlong_t)rva->rva_vdev_gid,
|
|
|
|
|
(u_longlong_t)elaspsed);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
arc_async_flush_remove(rva->rva_spa_gid, rva->rva_task_id);
|
|
|
|
|
|
|
|
|
|
kmem_free(rva, sizeof (remove_vdev_args_t));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Remove a vdev from the L2ARC.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
l2arc_remove_vdev(vdev_t *vd)
|
|
|
|
|
{
|
|
|
|
|
spa_t *spa = vd->vdev_spa;
|
|
|
|
|
boolean_t asynchronous = spa->spa_state == POOL_STATE_EXPORTED ||
|
|
|
|
|
spa->spa_state == POOL_STATE_DESTROYED;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Find the device by vdev
|
|
|
|
|
*/
|
|
|
|
|
l2arc_dev_t *remdev = l2arc_vdev_get(vd);
|
|
|
|
|
ASSERT3P(remdev, !=, NULL);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Save info for final teardown
|
|
|
|
|
*/
|
|
|
|
|
remove_vdev_args_t *rva = kmem_alloc(sizeof (remove_vdev_args_t),
|
|
|
|
|
KM_SLEEP);
|
|
|
|
|
rva->rva_l2arc_dev = remdev;
|
|
|
|
|
rva->rva_spa_gid = spa_guid(remdev->l2ad_spa);
|
|
|
|
|
rva->rva_vdev_gid = remdev->l2ad_vdev->vdev_guid;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Cancel any ongoing or scheduled rebuild.
|
|
|
|
|
*/
|
|
|
|
|
mutex_enter(&l2arc_rebuild_thr_lock);
|
|
|
|
|
if (remdev->l2ad_rebuild_began == B_TRUE) {
|
|
|
|
|
remdev->l2ad_rebuild_cancel = B_TRUE;
|
|
|
|
|
while (remdev->l2ad_rebuild == B_TRUE)
|
|
|
|
|
cv_wait(&l2arc_rebuild_thr_cv, &l2arc_rebuild_thr_lock);
|
|
|
|
|
} else if (remdev->l2ad_rebuild == B_TRUE) {
|
|
|
|
|
/* Rebuild hasn't started yet so skip asynchronous teardown */
|
|
|
|
|
asynchronous = B_FALSE;
|
|
|
|
|
}
|
|
|
|
|
mutex_exit(&l2arc_rebuild_thr_lock);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Remove device from global list
|
|
|
|
|
*/
|
|
|
|
|
ASSERT(spa_config_held(spa, SCL_L2ARC, RW_WRITER) & SCL_L2ARC);
|
|
|
|
|
mutex_enter(&l2arc_dev_mtx);
|
|
|
|
|
list_remove(l2arc_dev_list, remdev);
|
|
|
|
|
l2arc_dev_last = NULL; /* may have been invalidated */
|
|
|
|
|
atomic_dec_64(&l2arc_ndev);
|
|
|
|
|
|
|
|
|
|
/* During a pool export spa & vdev will no longer be valid */
|
|
|
|
|
if (asynchronous) {
|
|
|
|
|
remdev->l2ad_spa = NULL;
|
|
|
|
|
remdev->l2ad_vdev = NULL;
|
|
|
|
|
}
|
|
|
|
|
mutex_exit(&l2arc_dev_mtx);
|
|
|
|
|
|
|
|
|
|
if (!asynchronous) {
|
|
|
|
|
l2arc_device_teardown(rva);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint64_t spa_guid = spa_load_guid(spa);
|
|
|
|
|
arc_async_flush_t *af = arc_async_flush_add(spa_guid, TASKQID_INVALID);
|
|
|
|
|
|
|
|
|
|
mutex_enter(&arc_async_flush_lock);
|
|
|
|
|
taskqid_t tid = taskq_dispatch(arc_flush_taskq, l2arc_device_teardown,
|
|
|
|
|
rva, TQ_SLEEP);
|
|
|
|
|
rva->rva_task_id = af->af_task_id = tid;
|
|
|
|
|
mutex_exit(&arc_async_flush_lock);
|
|
|
|
|
|
|
|
|
|
if (tid == TASKQID_INVALID) {
|
|
|
|
|
l2arc_device_teardown(rva);
|
|
|
|
|
arc_async_flush_remove(spa_guid, TASKQID_INVALID);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
@ -10003,6 +10290,7 @@ l2arc_rebuild(l2arc_dev_t *dev)
|
|
|
|
|
mutex_enter(&l2arc_rebuild_thr_lock);
|
|
|
|
|
if (dev->l2ad_rebuild_cancel) {
|
|
|
|
|
dev->l2ad_rebuild = B_FALSE;
|
|
|
|
|
/* After signaling, the spa & vdev go away */
|
|
|
|
|
cv_signal(&l2arc_rebuild_thr_cv);
|
|
|
|
|
mutex_exit(&l2arc_rebuild_thr_lock);
|
|
|
|
|
err = SET_ERROR(ECANCELED);
|
|
|
|
@ -10042,7 +10330,15 @@ out:
|
|
|
|
|
vmem_free(this_lb, sizeof (*this_lb));
|
|
|
|
|
vmem_free(next_lb, sizeof (*next_lb));
|
|
|
|
|
|
|
|
|
|
if (!l2arc_rebuild_enabled) {
|
|
|
|
|
if (err == ECANCELED) {
|
|
|
|
|
/*
|
|
|
|
|
* In case the rebuild was canceled do not log to spa history
|
|
|
|
|
* log as the pool may be in the process of being removed.
|
|
|
|
|
*/
|
|
|
|
|
zfs_dbgmsg("L2ARC rebuild aborted, restored %llu blocks",
|
|
|
|
|
(u_longlong_t)zfs_refcount_count(&dev->l2ad_lb_count));
|
|
|
|
|
return (err);
|
|
|
|
|
} else if (!l2arc_rebuild_enabled) {
|
|
|
|
|
spa_history_log_internal(spa, "L2ARC rebuild", NULL,
|
|
|
|
|
"disabled");
|
|
|
|
|
} else if (err == 0 && zfs_refcount_count(&dev->l2ad_lb_count) > 0) {
|
|
|
|
@ -10060,13 +10356,6 @@ out:
|
|
|
|
|
"no valid log blocks");
|
|
|
|
|
memset(l2dhdr, 0, dev->l2ad_dev_hdr_asize);
|
|
|
|
|
l2arc_dev_hdr_update(dev);
|
|
|
|
|
} else if (err == ECANCELED) {
|
|
|
|
|
/*
|
|
|
|
|
* In case the rebuild was canceled do not log to spa history
|
|
|
|
|
* log as the pool may be in the process of being removed.
|
|
|
|
|
*/
|
|
|
|
|
zfs_dbgmsg("L2ARC rebuild aborted, restored %llu blocks",
|
|
|
|
|
(u_longlong_t)zfs_refcount_count(&dev->l2ad_lb_count));
|
|
|
|
|
} else if (err != 0) {
|
|
|
|
|
spa_history_log_internal(spa, "L2ARC rebuild", NULL,
|
|
|
|
|
"aborted, restored %llu blocks",
|
|
|
|
@ -10354,6 +10643,7 @@ l2arc_hdr_restore(const l2arc_log_ent_phys_t *le, l2arc_dev_t *dev)
|
|
|
|
|
L2BLK_GET_STATE((le)->le_prop));
|
|
|
|
|
asize = vdev_psize_to_asize(dev->l2ad_vdev,
|
|
|
|
|
L2BLK_GET_PSIZE((le)->le_prop));
|
|
|
|
|
ASSERT3U(asize, ==, HDR_GET_ASIZE(hdr));
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* vdev_space_update() has to be called before arc_hdr_destroy() to
|
|
|
|
@ -10383,6 +10673,8 @@ l2arc_hdr_restore(const l2arc_log_ent_phys_t *le, l2arc_dev_t *dev)
|
|
|
|
|
exists->b_l2hdr.b_daddr = le->le_daddr;
|
|
|
|
|
exists->b_l2hdr.b_arcs_state =
|
|
|
|
|
L2BLK_GET_STATE((le)->le_prop);
|
|
|
|
|
/* l2arc_hdr_arcstats_update() expects a valid asize */
|
|
|
|
|
HDR_SET_ASIZE(exists, asize);
|
|
|
|
|
mutex_enter(&dev->l2ad_mtx);
|
|
|
|
|
list_insert_tail(&dev->l2ad_buflist, exists);
|
|
|
|
|
(void) zfs_refcount_add_many(&dev->l2ad_alloc,
|
|
|
|
|