Include scatter_chunk_waste in arc_size
The ARC caches data in scatter ABD's, which are collections of pages, which are typically 4K. Therefore, the space used to cache each block is rounded up to a multiple of 4K. The ABD subsystem tracks this wasted memory in the `scatter_chunk_waste` kstat. However, the ARC's `size` is not aware of the memory used by this round-up, it only accounts for the size that it requested from the ABD subsystem. Therefore, the ARC is effectively using more memory than it is aware of, due to the `scatter_chunk_waste`. This impacts observability, e.g. `arcstat` will show that the ARC is using less memory than it effectively is. It also impacts how the ARC responds to memory pressure. As the amount of `scatter_chunk_waste` changes, it appears to the ARC as memory pressure, so it needs to resize `arc_c`. If the sector size (`1<<ashift`) is the same as the page size (or larger), there won't be any waste. If the (compressed) block size is relatively large compared to the page size, the amount of `scatter_chunk_waste` will be small, so the problematic effects are minimal. However, if using 512B sectors (`ashift=9`), and the (compressed) block size is small (e.g. `compression=on` with the default `volblocksize=8k` or a decreased `recordsize`), the amount of `scatter_chunk_waste` can be very large. On a production system, with `arc_size` at a constant 50% of memory, `scatter_chunk_waste` has been been observed to be 10-30% of memory. This commit adds `scatter_chunk_waste` to `arc_size`, and adds a new `waste` field to `arcstat`. As a result, the ARC's memory usage is more observable, and `arc_c` does not need to be adjusted as frequently. Reviewed-by: Pavel Zakharov <pavel.zakharov@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: George Wilson <gwilson@delphix.com> Reviewed-by: Ryan Moeller <ryan@iXsystems.com> Signed-off-by: Matthew Ahrens <mahrens@delphix.com> Closes #10701
This commit is contained in:
parent
994de7e4b7
commit
85ec5cbae2
|
@ -103,6 +103,7 @@ cols = {
|
||||||
"need": [4, 1024, "ARC reclaim need"],
|
"need": [4, 1024, "ARC reclaim need"],
|
||||||
"free": [4, 1024, "ARC free memory"],
|
"free": [4, 1024, "ARC free memory"],
|
||||||
"avail": [5, 1024, "ARC available memory"],
|
"avail": [5, 1024, "ARC available memory"],
|
||||||
|
"waste": [5, 1024, "Wasted memory due to round up to pagesize"],
|
||||||
}
|
}
|
||||||
|
|
||||||
v = {}
|
v = {}
|
||||||
|
@ -452,6 +453,7 @@ def calculate():
|
||||||
v["need"] = cur["arc_need_free"]
|
v["need"] = cur["arc_need_free"]
|
||||||
v["free"] = cur["memory_free_bytes"]
|
v["free"] = cur["memory_free_bytes"]
|
||||||
v["avail"] = cur["memory_available_bytes"]
|
v["avail"] = cur["memory_available_bytes"]
|
||||||
|
v["waste"] = cur["abd_chunk_waste_size"]
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
|
@ -203,6 +203,7 @@ typedef enum arc_space_type {
|
||||||
ARC_SPACE_DBUF,
|
ARC_SPACE_DBUF,
|
||||||
ARC_SPACE_DNODE,
|
ARC_SPACE_DNODE,
|
||||||
ARC_SPACE_BONUS,
|
ARC_SPACE_BONUS,
|
||||||
|
ARC_SPACE_ABD_CHUNK_WASTE,
|
||||||
ARC_SPACE_NUMTYPES
|
ARC_SPACE_NUMTYPES
|
||||||
} arc_space_type_t;
|
} arc_space_type_t;
|
||||||
|
|
||||||
|
|
|
@ -844,6 +844,7 @@ typedef struct arc_stats {
|
||||||
kstat_named_t arcstat_sys_free;
|
kstat_named_t arcstat_sys_free;
|
||||||
kstat_named_t arcstat_raw_size;
|
kstat_named_t arcstat_raw_size;
|
||||||
kstat_named_t arcstat_cached_only_in_progress;
|
kstat_named_t arcstat_cached_only_in_progress;
|
||||||
|
kstat_named_t arcstat_abd_chunk_waste_size;
|
||||||
} arc_stats_t;
|
} arc_stats_t;
|
||||||
|
|
||||||
typedef struct arc_evict_waiter {
|
typedef struct arc_evict_waiter {
|
||||||
|
|
|
@ -131,16 +131,17 @@ abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op)
|
||||||
{
|
{
|
||||||
size_t n = abd_scatter_chunkcnt(abd);
|
size_t n = abd_scatter_chunkcnt(abd);
|
||||||
ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
|
ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
|
||||||
|
int waste = n * zfs_abd_chunk_size - abd->abd_size;
|
||||||
if (op == ABDSTAT_INCR) {
|
if (op == ABDSTAT_INCR) {
|
||||||
ABDSTAT_BUMP(abdstat_scatter_cnt);
|
ABDSTAT_BUMP(abdstat_scatter_cnt);
|
||||||
ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size);
|
ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size);
|
||||||
ABDSTAT_INCR(abdstat_scatter_chunk_waste,
|
ABDSTAT_INCR(abdstat_scatter_chunk_waste, waste);
|
||||||
n * zfs_abd_chunk_size - abd->abd_size);
|
arc_space_consume(waste, ARC_SPACE_ABD_CHUNK_WASTE);
|
||||||
} else {
|
} else {
|
||||||
ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
|
ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
|
||||||
ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size);
|
ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size);
|
||||||
ABDSTAT_INCR(abdstat_scatter_chunk_waste,
|
ABDSTAT_INCR(abdstat_scatter_chunk_waste, -waste);
|
||||||
abd->abd_size - n * zfs_abd_chunk_size);
|
arc_space_return(waste, ARC_SPACE_ABD_CHUNK_WASTE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -54,6 +54,7 @@
|
||||||
#include <sys/abd_impl.h>
|
#include <sys/abd_impl.h>
|
||||||
#include <sys/param.h>
|
#include <sys/param.h>
|
||||||
#include <sys/zio.h>
|
#include <sys/zio.h>
|
||||||
|
#include <sys/arc.h>
|
||||||
#include <sys/zfs_context.h>
|
#include <sys/zfs_context.h>
|
||||||
#include <sys/zfs_znode.h>
|
#include <sys/zfs_znode.h>
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
|
@ -631,17 +632,17 @@ void
|
||||||
abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op)
|
abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op)
|
||||||
{
|
{
|
||||||
ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
|
ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
|
||||||
|
int waste = P2ROUNDUP(abd->abd_size, PAGESIZE) - abd->abd_size;
|
||||||
if (op == ABDSTAT_INCR) {
|
if (op == ABDSTAT_INCR) {
|
||||||
ABDSTAT_BUMP(abdstat_scatter_cnt);
|
ABDSTAT_BUMP(abdstat_scatter_cnt);
|
||||||
ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size);
|
ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size);
|
||||||
ABDSTAT_INCR(abdstat_scatter_chunk_waste,
|
ABDSTAT_INCR(abdstat_scatter_chunk_waste, waste);
|
||||||
P2ROUNDUP(abd->abd_size, PAGESIZE) - abd->abd_size);
|
arc_space_consume(waste, ARC_SPACE_ABD_CHUNK_WASTE);
|
||||||
} else {
|
} else {
|
||||||
ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
|
ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
|
||||||
ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size);
|
ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size);
|
||||||
ABDSTAT_INCR(abdstat_scatter_chunk_waste,
|
ABDSTAT_INCR(abdstat_scatter_chunk_waste, -waste);
|
||||||
(int)abd->abd_size
|
arc_space_return(waste, ARC_SPACE_ABD_CHUNK_WASTE);
|
||||||
-(int)P2ROUNDUP(abd->abd_size, PAGESIZE));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -583,6 +583,7 @@ arc_stats_t arc_stats = {
|
||||||
{ "arc_sys_free", KSTAT_DATA_UINT64 },
|
{ "arc_sys_free", KSTAT_DATA_UINT64 },
|
||||||
{ "arc_raw_size", KSTAT_DATA_UINT64 },
|
{ "arc_raw_size", KSTAT_DATA_UINT64 },
|
||||||
{ "cached_only_in_progress", KSTAT_DATA_UINT64 },
|
{ "cached_only_in_progress", KSTAT_DATA_UINT64 },
|
||||||
|
{ "abd_chunk_waste_size", KSTAT_DATA_UINT64 },
|
||||||
};
|
};
|
||||||
|
|
||||||
#define ARCSTAT_MAX(stat, val) { \
|
#define ARCSTAT_MAX(stat, val) { \
|
||||||
|
@ -685,6 +686,7 @@ aggsum_t astat_dnode_size;
|
||||||
aggsum_t astat_bonus_size;
|
aggsum_t astat_bonus_size;
|
||||||
aggsum_t astat_hdr_size;
|
aggsum_t astat_hdr_size;
|
||||||
aggsum_t astat_l2_hdr_size;
|
aggsum_t astat_l2_hdr_size;
|
||||||
|
aggsum_t astat_abd_chunk_waste_size;
|
||||||
|
|
||||||
hrtime_t arc_growtime;
|
hrtime_t arc_growtime;
|
||||||
list_t arc_prune_list;
|
list_t arc_prune_list;
|
||||||
|
@ -2611,9 +2613,18 @@ arc_space_consume(uint64_t space, arc_space_type_t type)
|
||||||
case ARC_SPACE_L2HDRS:
|
case ARC_SPACE_L2HDRS:
|
||||||
aggsum_add(&astat_l2_hdr_size, space);
|
aggsum_add(&astat_l2_hdr_size, space);
|
||||||
break;
|
break;
|
||||||
|
case ARC_SPACE_ABD_CHUNK_WASTE:
|
||||||
|
/*
|
||||||
|
* Note: this includes space wasted by all scatter ABD's, not
|
||||||
|
* just those allocated by the ARC. But the vast majority of
|
||||||
|
* scatter ABD's come from the ARC, because other users are
|
||||||
|
* very short-lived.
|
||||||
|
*/
|
||||||
|
aggsum_add(&astat_abd_chunk_waste_size, space);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type != ARC_SPACE_DATA)
|
if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE)
|
||||||
aggsum_add(&arc_meta_used, space);
|
aggsum_add(&arc_meta_used, space);
|
||||||
|
|
||||||
aggsum_add(&arc_size, space);
|
aggsum_add(&arc_size, space);
|
||||||
|
@ -2648,9 +2659,12 @@ arc_space_return(uint64_t space, arc_space_type_t type)
|
||||||
case ARC_SPACE_L2HDRS:
|
case ARC_SPACE_L2HDRS:
|
||||||
aggsum_add(&astat_l2_hdr_size, -space);
|
aggsum_add(&astat_l2_hdr_size, -space);
|
||||||
break;
|
break;
|
||||||
|
case ARC_SPACE_ABD_CHUNK_WASTE:
|
||||||
|
aggsum_add(&astat_abd_chunk_waste_size, -space);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type != ARC_SPACE_DATA) {
|
if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE) {
|
||||||
ASSERT(aggsum_compare(&arc_meta_used, space) >= 0);
|
ASSERT(aggsum_compare(&arc_meta_used, space) >= 0);
|
||||||
/*
|
/*
|
||||||
* We use the upper bound here rather than the precise value
|
* We use the upper bound here rather than the precise value
|
||||||
|
@ -7090,6 +7104,8 @@ arc_kstat_update(kstat_t *ksp, int rw)
|
||||||
ARCSTAT(arcstat_dbuf_size) = aggsum_value(&astat_dbuf_size);
|
ARCSTAT(arcstat_dbuf_size) = aggsum_value(&astat_dbuf_size);
|
||||||
ARCSTAT(arcstat_dnode_size) = aggsum_value(&astat_dnode_size);
|
ARCSTAT(arcstat_dnode_size) = aggsum_value(&astat_dnode_size);
|
||||||
ARCSTAT(arcstat_bonus_size) = aggsum_value(&astat_bonus_size);
|
ARCSTAT(arcstat_bonus_size) = aggsum_value(&astat_bonus_size);
|
||||||
|
ARCSTAT(arcstat_abd_chunk_waste_size) =
|
||||||
|
aggsum_value(&astat_abd_chunk_waste_size);
|
||||||
|
|
||||||
as->arcstat_memory_all_bytes.value.ui64 =
|
as->arcstat_memory_all_bytes.value.ui64 =
|
||||||
arc_all_memory();
|
arc_all_memory();
|
||||||
|
@ -7329,6 +7345,7 @@ arc_state_init(void)
|
||||||
aggsum_init(&astat_bonus_size, 0);
|
aggsum_init(&astat_bonus_size, 0);
|
||||||
aggsum_init(&astat_dnode_size, 0);
|
aggsum_init(&astat_dnode_size, 0);
|
||||||
aggsum_init(&astat_dbuf_size, 0);
|
aggsum_init(&astat_dbuf_size, 0);
|
||||||
|
aggsum_init(&astat_abd_chunk_waste_size, 0);
|
||||||
|
|
||||||
arc_anon->arcs_state = ARC_STATE_ANON;
|
arc_anon->arcs_state = ARC_STATE_ANON;
|
||||||
arc_mru->arcs_state = ARC_STATE_MRU;
|
arc_mru->arcs_state = ARC_STATE_MRU;
|
||||||
|
@ -7381,6 +7398,7 @@ arc_state_fini(void)
|
||||||
aggsum_fini(&astat_bonus_size);
|
aggsum_fini(&astat_bonus_size);
|
||||||
aggsum_fini(&astat_dnode_size);
|
aggsum_fini(&astat_dnode_size);
|
||||||
aggsum_fini(&astat_dbuf_size);
|
aggsum_fini(&astat_dbuf_size);
|
||||||
|
aggsum_fini(&astat_abd_chunk_waste_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t
|
uint64_t
|
||||||
|
|
Loading…
Reference in New Issue