Update arc_available_memory() to check freemem

While Linux doesn't provide detailed information about the state of
the VM it does provide us total free pages.  This information should
be incorporated in to the arc_available_memory() calculation rather
than solely relying on a signal from direct reclaim.  Conceptually
this brings arc_available_memory() back in sync with illumos.

It is also desirable that the target amount of free memory be tunable
on a system.  While the default values are expected to work well
for most workloads there may be cases where custom values are needed.
The zfs_arc_sys_free module option was added for this purpose.

zfs_arc_sys_free - The target number of bytes the ARC should leave
                   as free memory on the system.  This value can
                   checked in /proc/spl/kstat/zfs/arcstats and
                   setting this module option will override the
                   default value.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3637
This commit is contained in:
Brian Behlendorf 2015-07-27 13:17:32 -07:00
parent 6339c1b9dc
commit 11f552fa90
2 changed files with 46 additions and 31 deletions

View File

@ -566,6 +566,19 @@ log2(fraction of arc to reclaim)
Default value: \fB5\fR.
.RE
.sp
.ne 2
.na
\fBzfs_arc_sys_free\fR (ulong)
.ad
.RS 12n
The target number of bytes the ARC should leave as free memory on the system.
Defaults to the larger of 1/64 of physical memory or 512K. Setting this
option to a non-zero value will override the default.
.sp
Default value: \fB0\fR.
.RE
.sp
.ne 2
.na

View File

@ -240,6 +240,7 @@ int zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
/*
* These tunables are Linux specific
*/
unsigned long zfs_arc_sys_free = 0;
int zfs_arc_memory_throttle_disable = 1;
int zfs_arc_min_prefetch_lifespan = 0;
int zfs_arc_p_aggressive_disable = 1;
@ -473,6 +474,8 @@ typedef struct arc_stats {
kstat_named_t arcstat_meta_limit;
kstat_named_t arcstat_meta_max;
kstat_named_t arcstat_meta_min;
kstat_named_t arcstat_need_free;
kstat_named_t arcstat_sys_free;
} arc_stats_t;
static arc_stats_t arc_stats = {
@ -564,7 +567,9 @@ static arc_stats_t arc_stats = {
{ "arc_meta_used", KSTAT_DATA_UINT64 },
{ "arc_meta_limit", KSTAT_DATA_UINT64 },
{ "arc_meta_max", KSTAT_DATA_UINT64 },
{ "arc_meta_min", KSTAT_DATA_UINT64 }
{ "arc_meta_min", KSTAT_DATA_UINT64 },
{ "arc_need_free", KSTAT_DATA_UINT64 },
{ "arc_sys_free", KSTAT_DATA_UINT64 }
};
#define ARCSTAT(stat) (arc_stats.stat.value.ui64)
@ -633,6 +638,8 @@ static arc_state_t *arc_l2c_only;
#define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */
#define arc_meta_used ARCSTAT(arcstat_meta_used) /* size of metadata */
#define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */
#define arc_need_free ARCSTAT(arcstat_need_free) /* bytes to be freed */
#define arc_sys_free ARCSTAT(arcstat_sys_free) /* target system free bytes */
#define L2ARC_IS_VALID_COMPRESS(_c_) \
((_c_) == ZIO_COMPRESS_LZ4 || (_c_) == ZIO_COMPRESS_EMPTY)
@ -3222,12 +3229,6 @@ int64_t last_free_memory;
free_memory_reason_t last_free_reason;
#ifdef _KERNEL
#ifdef __linux__
/*
* expiration time for arc_no_grow set by direct memory reclaim.
*/
static clock_t arc_grow_time = 0;
#else
/*
* Additional reserve of pages for pp_reserve.
*/
@ -3237,7 +3238,6 @@ int64_t arc_pages_pp_reserve = 64;
* Additional reserve of pages for swapfs.
*/
int64_t arc_swapfs_reserve = 64;
#endif
#endif /* _KERNEL */
/*
@ -3250,26 +3250,14 @@ arc_available_memory(void)
{
int64_t lowest = INT64_MAX;
free_memory_reason_t r = FMR_UNKNOWN;
#ifdef _KERNEL
#ifdef __linux__
/*
* Under Linux we are not allowed to directly interrogate the global
* memory state. Instead rely on observing that direct reclaim has
* recently occurred therefore the system must be low on memory. The
* exact values returned are not critical but should be small.
*/
if (ddi_time_after_eq(ddi_get_lbolt(), arc_grow_time))
lowest = PAGE_SIZE;
else
lowest = -PAGE_SIZE;
#else
int64_t n;
#ifdef __linux__
pgcnt_t needfree = btop(arc_need_free);
pgcnt_t lotsfree = btop(arc_sys_free);
pgcnt_t desfree = 0;
#endif
/*
* Platforms like illumos have greater visibility in to the memory
* subsystem and can return a more detailed analysis of memory.
*/
if (needfree > 0) {
n = PAGESIZE * (-needfree);
if (n < lowest) {
@ -3291,6 +3279,7 @@ arc_available_memory(void)
r = FMR_LOTSFREE;
}
#ifndef __linux__
/*
* check to make sure that swapfs has enough space so that anon
* reservations can still succeed. anon_resvmem() checks that the
@ -3319,6 +3308,7 @@ arc_available_memory(void)
lowest = n;
r = FMR_PAGES_PP_MAXIMUM;
}
#endif
#if defined(__i386)
/*
@ -3357,12 +3347,11 @@ arc_available_memory(void)
r = FMR_ZIO_ARENA;
}
}
#endif /* __linux__ */
#else
#else /* _KERNEL */
/* Every 100 calls, free a small amount */
if (spa_get_random(100) == 0)
lowest = -1024;
#endif
#endif /* _KERNEL */
last_free_memory = lowest;
last_free_reason = r;
@ -3480,7 +3469,7 @@ arc_reclaim_thread(void)
to_free = (arc_c >> arc_shrink_shift) - free_memory;
if (to_free > 0) {
#ifdef _KERNEL
to_free = MAX(to_free, ptob(needfree));
to_free = MAX(to_free, arc_need_free);
#endif
arc_shrink(to_free);
}
@ -3507,9 +3496,11 @@ arc_reclaim_thread(void)
/*
* We're either no longer overflowing, or we
* can't evict anything more, so we should wake
* up any threads before we go to sleep.
* up any threads before we go to sleep and clear
* arc_need_free since nothing more can be done.
*/
cv_broadcast(&arc_reclaim_waiters_cv);
arc_need_free = 0;
/*
* Block until signaled, or after one second (we
@ -3713,7 +3704,7 @@ __arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc)
ARCSTAT_BUMP(arcstat_memory_indirect_count);
} else {
arc_no_grow = B_TRUE;
arc_grow_time = ddi_get_lbolt() + (zfs_arc_grow_retry * hz);
arc_need_free = ptob(sc->nr_to_scan);
ARCSTAT_BUMP(arcstat_memory_direct_count);
}
@ -5288,6 +5279,10 @@ arc_tuning_update(void)
/* Valid range: 1 - N ticks */
if (zfs_arc_min_prefetch_lifespan)
arc_min_prefetch_lifespan = zfs_arc_min_prefetch_lifespan;
/* Valid range: 0 - <all physical memory> */
if ((zfs_arc_sys_free) && (zfs_arc_sys_free != arc_sys_free))
arc_sys_free = MIN(MAX(zfs_arc_sys_free, 0), ptob(physmem));
}
void
@ -5329,6 +5324,10 @@ arc_init(void)
* swapping out pages when it is preferable to shrink the arc.
*/
spl_register_shrinker(&arc_shrinker);
/* Set to 1/64 of all memory or a minimum of 512K */
arc_sys_free = MAX(ptob(physmem / 64), (512 * 1024));
arc_need_free = 0;
#endif
/* Set min cache to allow safe operation of arc_adapt() */
@ -7064,4 +7063,7 @@ MODULE_PARM_DESC(l2arc_feed_again, "Turbo L2ARC warmup");
module_param(l2arc_norw, int, 0644);
MODULE_PARM_DESC(l2arc_norw, "No reads during writes");
module_param(zfs_arc_sys_free, ulong, 0644);
MODULE_PARM_DESC(zfs_arc_sys_free, "System free memory target size in bytes");
#endif