From 85ec5cbae228defb4332da4cf0ebb64d53aea157 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Mon, 17 Aug 2020 20:04:04 -0700 Subject: [PATCH] Include scatter_chunk_waste in arc_size The ARC caches data in scatter ABD's, which are collections of pages, which are typically 4K. Therefore, the space used to cache each block is rounded up to a multiple of 4K. The ABD subsystem tracks this wasted memory in the `scatter_chunk_waste` kstat. However, the ARC's `size` is not aware of the memory used by this round-up, it only accounts for the size that it requested from the ABD subsystem. Therefore, the ARC is effectively using more memory than it is aware of, due to the `scatter_chunk_waste`. This impacts observability, e.g. `arcstat` will show that the ARC is using less memory than it effectively is. It also impacts how the ARC responds to memory pressure. As the amount of `scatter_chunk_waste` changes, it appears to the ARC as memory pressure, so it needs to resize `arc_c`. If the sector size (`1< Reviewed-by: Brian Behlendorf Reviewed-by: George Wilson Reviewed-by: Ryan Moeller Signed-off-by: Matthew Ahrens Closes #10701 --- cmd/arcstat/arcstat.in | 2 ++ include/sys/arc.h | 1 + include/sys/arc_impl.h | 1 + module/os/freebsd/zfs/abd_os.c | 9 +++++---- module/os/linux/zfs/abd_os.c | 11 ++++++----- module/zfs/arc.c | 22 ++++++++++++++++++++-- 6 files changed, 35 insertions(+), 11 deletions(-) diff --git a/cmd/arcstat/arcstat.in b/cmd/arcstat/arcstat.in index b230e4b852..c83a1c7459 100755 --- a/cmd/arcstat/arcstat.in +++ b/cmd/arcstat/arcstat.in @@ -103,6 +103,7 @@ cols = { "need": [4, 1024, "ARC reclaim need"], "free": [4, 1024, "ARC free memory"], "avail": [5, 1024, "ARC available memory"], + "waste": [5, 1024, "Wasted memory due to round up to pagesize"], } v = {} @@ -452,6 +453,7 @@ def calculate(): v["need"] = cur["arc_need_free"] v["free"] = cur["memory_free_bytes"] v["avail"] = cur["memory_available_bytes"] + v["waste"] = cur["abd_chunk_waste_size"] def main(): diff --git a/include/sys/arc.h b/include/sys/arc.h index f322d6328f..3fdf36e2a2 100644 --- a/include/sys/arc.h +++ b/include/sys/arc.h @@ -203,6 +203,7 @@ typedef enum arc_space_type { ARC_SPACE_DBUF, ARC_SPACE_DNODE, ARC_SPACE_BONUS, + ARC_SPACE_ABD_CHUNK_WASTE, ARC_SPACE_NUMTYPES } arc_space_type_t; diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h index bb9163ba79..d07791d07d 100644 --- a/include/sys/arc_impl.h +++ b/include/sys/arc_impl.h @@ -844,6 +844,7 @@ typedef struct arc_stats { kstat_named_t arcstat_sys_free; kstat_named_t arcstat_raw_size; kstat_named_t arcstat_cached_only_in_progress; + kstat_named_t arcstat_abd_chunk_waste_size; } arc_stats_t; typedef struct arc_evict_waiter { diff --git a/module/os/freebsd/zfs/abd_os.c b/module/os/freebsd/zfs/abd_os.c index 6fb43d6bbf..a7bda509bf 100644 --- a/module/os/freebsd/zfs/abd_os.c +++ b/module/os/freebsd/zfs/abd_os.c @@ -131,16 +131,17 @@ abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op) { size_t n = abd_scatter_chunkcnt(abd); ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR); + int waste = n * zfs_abd_chunk_size - abd->abd_size; if (op == ABDSTAT_INCR) { ABDSTAT_BUMP(abdstat_scatter_cnt); ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size); - ABDSTAT_INCR(abdstat_scatter_chunk_waste, - n * zfs_abd_chunk_size - abd->abd_size); + ABDSTAT_INCR(abdstat_scatter_chunk_waste, waste); + arc_space_consume(waste, ARC_SPACE_ABD_CHUNK_WASTE); } else { ABDSTAT_BUMPDOWN(abdstat_scatter_cnt); ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size); - ABDSTAT_INCR(abdstat_scatter_chunk_waste, - abd->abd_size - n * zfs_abd_chunk_size); + ABDSTAT_INCR(abdstat_scatter_chunk_waste, -waste); + arc_space_return(waste, ARC_SPACE_ABD_CHUNK_WASTE); } } diff --git a/module/os/linux/zfs/abd_os.c b/module/os/linux/zfs/abd_os.c index 1754ce032f..c2281449ed 100644 --- a/module/os/linux/zfs/abd_os.c +++ b/module/os/linux/zfs/abd_os.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #ifdef _KERNEL @@ -631,17 +632,17 @@ void abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op) { ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR); + int waste = P2ROUNDUP(abd->abd_size, PAGESIZE) - abd->abd_size; if (op == ABDSTAT_INCR) { ABDSTAT_BUMP(abdstat_scatter_cnt); ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size); - ABDSTAT_INCR(abdstat_scatter_chunk_waste, - P2ROUNDUP(abd->abd_size, PAGESIZE) - abd->abd_size); + ABDSTAT_INCR(abdstat_scatter_chunk_waste, waste); + arc_space_consume(waste, ARC_SPACE_ABD_CHUNK_WASTE); } else { ABDSTAT_BUMPDOWN(abdstat_scatter_cnt); ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size); - ABDSTAT_INCR(abdstat_scatter_chunk_waste, - (int)abd->abd_size - -(int)P2ROUNDUP(abd->abd_size, PAGESIZE)); + ABDSTAT_INCR(abdstat_scatter_chunk_waste, -waste); + arc_space_return(waste, ARC_SPACE_ABD_CHUNK_WASTE); } } diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 06c2d5fac9..f63f92b86a 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -583,6 +583,7 @@ arc_stats_t arc_stats = { { "arc_sys_free", KSTAT_DATA_UINT64 }, { "arc_raw_size", KSTAT_DATA_UINT64 }, { "cached_only_in_progress", KSTAT_DATA_UINT64 }, + { "abd_chunk_waste_size", KSTAT_DATA_UINT64 }, }; #define ARCSTAT_MAX(stat, val) { \ @@ -685,6 +686,7 @@ aggsum_t astat_dnode_size; aggsum_t astat_bonus_size; aggsum_t astat_hdr_size; aggsum_t astat_l2_hdr_size; +aggsum_t astat_abd_chunk_waste_size; hrtime_t arc_growtime; list_t arc_prune_list; @@ -2611,9 +2613,18 @@ arc_space_consume(uint64_t space, arc_space_type_t type) case ARC_SPACE_L2HDRS: aggsum_add(&astat_l2_hdr_size, space); break; + case ARC_SPACE_ABD_CHUNK_WASTE: + /* + * Note: this includes space wasted by all scatter ABD's, not + * just those allocated by the ARC. But the vast majority of + * scatter ABD's come from the ARC, because other users are + * very short-lived. + */ + aggsum_add(&astat_abd_chunk_waste_size, space); + break; } - if (type != ARC_SPACE_DATA) + if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE) aggsum_add(&arc_meta_used, space); aggsum_add(&arc_size, space); @@ -2648,9 +2659,12 @@ arc_space_return(uint64_t space, arc_space_type_t type) case ARC_SPACE_L2HDRS: aggsum_add(&astat_l2_hdr_size, -space); break; + case ARC_SPACE_ABD_CHUNK_WASTE: + aggsum_add(&astat_abd_chunk_waste_size, -space); + break; } - if (type != ARC_SPACE_DATA) { + if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE) { ASSERT(aggsum_compare(&arc_meta_used, space) >= 0); /* * We use the upper bound here rather than the precise value @@ -7090,6 +7104,8 @@ arc_kstat_update(kstat_t *ksp, int rw) ARCSTAT(arcstat_dbuf_size) = aggsum_value(&astat_dbuf_size); ARCSTAT(arcstat_dnode_size) = aggsum_value(&astat_dnode_size); ARCSTAT(arcstat_bonus_size) = aggsum_value(&astat_bonus_size); + ARCSTAT(arcstat_abd_chunk_waste_size) = + aggsum_value(&astat_abd_chunk_waste_size); as->arcstat_memory_all_bytes.value.ui64 = arc_all_memory(); @@ -7329,6 +7345,7 @@ arc_state_init(void) aggsum_init(&astat_bonus_size, 0); aggsum_init(&astat_dnode_size, 0); aggsum_init(&astat_dbuf_size, 0); + aggsum_init(&astat_abd_chunk_waste_size, 0); arc_anon->arcs_state = ARC_STATE_ANON; arc_mru->arcs_state = ARC_STATE_MRU; @@ -7381,6 +7398,7 @@ arc_state_fini(void) aggsum_fini(&astat_bonus_size); aggsum_fini(&astat_dnode_size); aggsum_fini(&astat_dbuf_size); + aggsum_fini(&astat_abd_chunk_waste_size); } uint64_t