Update arc_available_memory() to check freemem
While Linux doesn't provide detailed information about the state of the VM it does provide us total free pages. This information should be incorporated in to the arc_available_memory() calculation rather than solely relying on a signal from direct reclaim. Conceptually this brings arc_available_memory() back in sync with illumos. It is also desirable that the target amount of free memory be tunable on a system. While the default values are expected to work well for most workloads there may be cases where custom values are needed. The zfs_arc_sys_free module option was added for this purpose. zfs_arc_sys_free - The target number of bytes the ARC should leave as free memory on the system. This value can checked in /proc/spl/kstat/zfs/arcstats and setting this module option will override the default value. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #3637
This commit is contained in:
parent
6339c1b9dc
commit
11f552fa90
|
@ -566,6 +566,19 @@ log2(fraction of arc to reclaim)
|
|||
Default value: \fB5\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fBzfs_arc_sys_free\fR (ulong)
|
||||
.ad
|
||||
.RS 12n
|
||||
The target number of bytes the ARC should leave as free memory on the system.
|
||||
Defaults to the larger of 1/64 of physical memory or 512K. Setting this
|
||||
option to a non-zero value will override the default.
|
||||
.sp
|
||||
Default value: \fB0\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
|
|
|
@ -240,6 +240,7 @@ int zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
|
|||
/*
|
||||
* These tunables are Linux specific
|
||||
*/
|
||||
unsigned long zfs_arc_sys_free = 0;
|
||||
int zfs_arc_memory_throttle_disable = 1;
|
||||
int zfs_arc_min_prefetch_lifespan = 0;
|
||||
int zfs_arc_p_aggressive_disable = 1;
|
||||
|
@ -473,6 +474,8 @@ typedef struct arc_stats {
|
|||
kstat_named_t arcstat_meta_limit;
|
||||
kstat_named_t arcstat_meta_max;
|
||||
kstat_named_t arcstat_meta_min;
|
||||
kstat_named_t arcstat_need_free;
|
||||
kstat_named_t arcstat_sys_free;
|
||||
} arc_stats_t;
|
||||
|
||||
static arc_stats_t arc_stats = {
|
||||
|
@ -564,7 +567,9 @@ static arc_stats_t arc_stats = {
|
|||
{ "arc_meta_used", KSTAT_DATA_UINT64 },
|
||||
{ "arc_meta_limit", KSTAT_DATA_UINT64 },
|
||||
{ "arc_meta_max", KSTAT_DATA_UINT64 },
|
||||
{ "arc_meta_min", KSTAT_DATA_UINT64 }
|
||||
{ "arc_meta_min", KSTAT_DATA_UINT64 },
|
||||
{ "arc_need_free", KSTAT_DATA_UINT64 },
|
||||
{ "arc_sys_free", KSTAT_DATA_UINT64 }
|
||||
};
|
||||
|
||||
#define ARCSTAT(stat) (arc_stats.stat.value.ui64)
|
||||
|
@ -633,6 +638,8 @@ static arc_state_t *arc_l2c_only;
|
|||
#define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */
|
||||
#define arc_meta_used ARCSTAT(arcstat_meta_used) /* size of metadata */
|
||||
#define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */
|
||||
#define arc_need_free ARCSTAT(arcstat_need_free) /* bytes to be freed */
|
||||
#define arc_sys_free ARCSTAT(arcstat_sys_free) /* target system free bytes */
|
||||
|
||||
#define L2ARC_IS_VALID_COMPRESS(_c_) \
|
||||
((_c_) == ZIO_COMPRESS_LZ4 || (_c_) == ZIO_COMPRESS_EMPTY)
|
||||
|
@ -3222,12 +3229,6 @@ int64_t last_free_memory;
|
|||
free_memory_reason_t last_free_reason;
|
||||
|
||||
#ifdef _KERNEL
|
||||
#ifdef __linux__
|
||||
/*
|
||||
* expiration time for arc_no_grow set by direct memory reclaim.
|
||||
*/
|
||||
static clock_t arc_grow_time = 0;
|
||||
#else
|
||||
/*
|
||||
* Additional reserve of pages for pp_reserve.
|
||||
*/
|
||||
|
@ -3237,7 +3238,6 @@ int64_t arc_pages_pp_reserve = 64;
|
|||
* Additional reserve of pages for swapfs.
|
||||
*/
|
||||
int64_t arc_swapfs_reserve = 64;
|
||||
#endif
|
||||
#endif /* _KERNEL */
|
||||
|
||||
/*
|
||||
|
@ -3250,26 +3250,14 @@ arc_available_memory(void)
|
|||
{
|
||||
int64_t lowest = INT64_MAX;
|
||||
free_memory_reason_t r = FMR_UNKNOWN;
|
||||
|
||||
#ifdef _KERNEL
|
||||
#ifdef __linux__
|
||||
/*
|
||||
* Under Linux we are not allowed to directly interrogate the global
|
||||
* memory state. Instead rely on observing that direct reclaim has
|
||||
* recently occurred therefore the system must be low on memory. The
|
||||
* exact values returned are not critical but should be small.
|
||||
*/
|
||||
if (ddi_time_after_eq(ddi_get_lbolt(), arc_grow_time))
|
||||
lowest = PAGE_SIZE;
|
||||
else
|
||||
lowest = -PAGE_SIZE;
|
||||
#else
|
||||
int64_t n;
|
||||
#ifdef __linux__
|
||||
pgcnt_t needfree = btop(arc_need_free);
|
||||
pgcnt_t lotsfree = btop(arc_sys_free);
|
||||
pgcnt_t desfree = 0;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Platforms like illumos have greater visibility in to the memory
|
||||
* subsystem and can return a more detailed analysis of memory.
|
||||
*/
|
||||
if (needfree > 0) {
|
||||
n = PAGESIZE * (-needfree);
|
||||
if (n < lowest) {
|
||||
|
@ -3291,6 +3279,7 @@ arc_available_memory(void)
|
|||
r = FMR_LOTSFREE;
|
||||
}
|
||||
|
||||
#ifndef __linux__
|
||||
/*
|
||||
* check to make sure that swapfs has enough space so that anon
|
||||
* reservations can still succeed. anon_resvmem() checks that the
|
||||
|
@ -3319,6 +3308,7 @@ arc_available_memory(void)
|
|||
lowest = n;
|
||||
r = FMR_PAGES_PP_MAXIMUM;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__i386)
|
||||
/*
|
||||
|
@ -3357,12 +3347,11 @@ arc_available_memory(void)
|
|||
r = FMR_ZIO_ARENA;
|
||||
}
|
||||
}
|
||||
#endif /* __linux__ */
|
||||
#else
|
||||
#else /* _KERNEL */
|
||||
/* Every 100 calls, free a small amount */
|
||||
if (spa_get_random(100) == 0)
|
||||
lowest = -1024;
|
||||
#endif
|
||||
#endif /* _KERNEL */
|
||||
|
||||
last_free_memory = lowest;
|
||||
last_free_reason = r;
|
||||
|
@ -3480,7 +3469,7 @@ arc_reclaim_thread(void)
|
|||
to_free = (arc_c >> arc_shrink_shift) - free_memory;
|
||||
if (to_free > 0) {
|
||||
#ifdef _KERNEL
|
||||
to_free = MAX(to_free, ptob(needfree));
|
||||
to_free = MAX(to_free, arc_need_free);
|
||||
#endif
|
||||
arc_shrink(to_free);
|
||||
}
|
||||
|
@ -3507,9 +3496,11 @@ arc_reclaim_thread(void)
|
|||
/*
|
||||
* We're either no longer overflowing, or we
|
||||
* can't evict anything more, so we should wake
|
||||
* up any threads before we go to sleep.
|
||||
* up any threads before we go to sleep and clear
|
||||
* arc_need_free since nothing more can be done.
|
||||
*/
|
||||
cv_broadcast(&arc_reclaim_waiters_cv);
|
||||
arc_need_free = 0;
|
||||
|
||||
/*
|
||||
* Block until signaled, or after one second (we
|
||||
|
@ -3713,7 +3704,7 @@ __arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc)
|
|||
ARCSTAT_BUMP(arcstat_memory_indirect_count);
|
||||
} else {
|
||||
arc_no_grow = B_TRUE;
|
||||
arc_grow_time = ddi_get_lbolt() + (zfs_arc_grow_retry * hz);
|
||||
arc_need_free = ptob(sc->nr_to_scan);
|
||||
ARCSTAT_BUMP(arcstat_memory_direct_count);
|
||||
}
|
||||
|
||||
|
@ -5288,6 +5279,10 @@ arc_tuning_update(void)
|
|||
/* Valid range: 1 - N ticks */
|
||||
if (zfs_arc_min_prefetch_lifespan)
|
||||
arc_min_prefetch_lifespan = zfs_arc_min_prefetch_lifespan;
|
||||
|
||||
/* Valid range: 0 - <all physical memory> */
|
||||
if ((zfs_arc_sys_free) && (zfs_arc_sys_free != arc_sys_free))
|
||||
arc_sys_free = MIN(MAX(zfs_arc_sys_free, 0), ptob(physmem));
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -5329,6 +5324,10 @@ arc_init(void)
|
|||
* swapping out pages when it is preferable to shrink the arc.
|
||||
*/
|
||||
spl_register_shrinker(&arc_shrinker);
|
||||
|
||||
/* Set to 1/64 of all memory or a minimum of 512K */
|
||||
arc_sys_free = MAX(ptob(physmem / 64), (512 * 1024));
|
||||
arc_need_free = 0;
|
||||
#endif
|
||||
|
||||
/* Set min cache to allow safe operation of arc_adapt() */
|
||||
|
@ -7064,4 +7063,7 @@ MODULE_PARM_DESC(l2arc_feed_again, "Turbo L2ARC warmup");
|
|||
module_param(l2arc_norw, int, 0644);
|
||||
MODULE_PARM_DESC(l2arc_norw, "No reads during writes");
|
||||
|
||||
module_param(zfs_arc_sys_free, ulong, 0644);
|
||||
MODULE_PARM_DESC(zfs_arc_sys_free, "System free memory target size in bytes");
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue