Add zfs_arc_meta_limit_percent tunable

ARC will evict meta buffers that exceed the arc_meta_limit. Before a further
investigating on whether we should take special protection on meta buffers,
this tunable make arc_meta_limit adjustable for different workloads.

People can set zfs_arc_meta_limit_percent to any value while insmod zfs.ko,
so some range check is added to guarantee a suitable arc_meta_limit.

Suggested by Tim Chase, zfs_arc_dnode_limit is changed to a percent-style
tunable as well.

Signed-off-by: GeLiXin <ge.lixin@zte.com.cn>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #4957
This commit is contained in:
GeLiXin 2016-08-11 11:15:37 +08:00 committed by Brian Behlendorf
parent 3e635ac15c
commit 9907cc1cc8
2 changed files with 71 additions and 12 deletions

41
man/man5/zfs-module-parameters.5 Normal file → Executable file
View File

@ -402,14 +402,30 @@ Default value: \fB2\fR.
.RS 12n .RS 12n
When the number of bytes consumed by dnodes in the ARC exceeds this number of When the number of bytes consumed by dnodes in the ARC exceeds this number of
bytes, try to unpin some of it in response to demand for non-metadata. This bytes, try to unpin some of it in response to demand for non-metadata. This
value acts as a floor to the amount of dnode metadata. value acts as a floor to the amount of dnode metadata, and defaults to 0 which
indicates that a percent which is based on \fBzfs_arc_dnode_limit_percent\fR of
the ARC meta buffers that may be used for dnodes.
See also \fBzfs_arc_meta_prune\fR which serves a similar purpose but is used See also \fBzfs_arc_meta_prune\fR which serves a similar purpose but is used
when the amount of metadata in the ARC exceeds \fBzfs_arc_meta_limit\fR rather when the amount of metadata in the ARC exceeds \fBzfs_arc_meta_limit\fR rather
than in response to overall demand for non-metadata. than in response to overall demand for non-metadata.
.sp .sp
Default value: \fB10% of zfs_arc_meta_limit\fR. Default value: \fB0\fR.
.RE
.sp
.ne 2
.na
\fBzfs_arc_dnode_limit_percent\fR (ulong)
.ad
.RS 12n
Percentage that can be consumed by dnodes of ARC meta buffers.
.sp
See also \fBzfs_arc_dnode_limit\fR which serves a similar purpose but has a
higher priority if set to nonzero value.
.sp
Default value: \fB10\fR.
.RE .RE
.sp .sp
@ -503,15 +519,30 @@ Default value: \fB0\fR.
The maximum allowed size in bytes that meta data buffers are allowed to The maximum allowed size in bytes that meta data buffers are allowed to
consume in the ARC. When this limit is reached meta data buffers will consume in the ARC. When this limit is reached meta data buffers will
be reclaimed even if the overall arc_c_max has not been reached. This be reclaimed even if the overall arc_c_max has not been reached. This
value defaults to 0 which indicates that 3/4 of the ARC may be used value defaults to 0 which indicates that a percent which is based on
for meta data. \fBzfs_arc_meta_limit_percent\fR of the ARC may be used for meta data.
.sp .sp
This value my be changed dynamically except that it cannot be set back to 0 This value my be changed dynamically except that it cannot be set back to 0
for 3/4 of the ARC; it must be set to an explicit value. for a specific percent of the ARC; it must be set to an explicit value.
.sp .sp
Default value: \fB0\fR. Default value: \fB0\fR.
.RE .RE
.sp
.ne 2
.na
\fBzfs_arc_meta_limit_percent\fR (ulong)
.ad
.RS 12n
Percentage of ARC buffers that can be used for meta data.
See also \fBzfs_arc_meta_limit\fR which serves a similar purpose but has a
higher priority if set to nonzero value.
.sp
Default value: \fB75\fR.
.RE
.sp .sp
.ne 2 .ne 2
.na .na

40
module/zfs/arc.c Normal file → Executable file
View File

@ -239,6 +239,17 @@ int zfs_arc_p_min_shift = 0;
int zfs_disable_dup_eviction = 0; int zfs_disable_dup_eviction = 0;
int zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ int zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
/*
* ARC will evict meta buffers that exceed arc_meta_limit. This
* tunable make arc_meta_limit adjustable for different workloads.
*/
unsigned long zfs_arc_meta_limit_percent = 75;
/*
* Percentage that can be consumed by dnodes of ARC meta buffers.
*/
unsigned long zfs_arc_dnode_limit_percent = 10;
/* /*
* These tunables are Linux specific * These tunables are Linux specific
*/ */
@ -5357,6 +5368,7 @@ arc_state_multilist_index_func(multilist_t *ml, void *obj)
static void static void
arc_tuning_update(void) arc_tuning_update(void)
{ {
uint64_t percent;
/* Valid range: 64M - <all physical memory> */ /* Valid range: 64M - <all physical memory> */
if ((zfs_arc_max) && (zfs_arc_max != arc_c_max) && if ((zfs_arc_max) && (zfs_arc_max != arc_c_max) &&
(zfs_arc_max > 64 << 20) && (zfs_arc_max < ptob(physmem)) && (zfs_arc_max > 64 << 20) && (zfs_arc_max < ptob(physmem)) &&
@ -5364,8 +5376,11 @@ arc_tuning_update(void)
arc_c_max = zfs_arc_max; arc_c_max = zfs_arc_max;
arc_c = arc_c_max; arc_c = arc_c_max;
arc_p = (arc_c >> 1); arc_p = (arc_c >> 1);
arc_meta_limit = (3 * arc_c_max) / 4; /* Valid range of arc_meta_limit: arc_meta_min - arc_c_max */
arc_dnode_limit = arc_meta_limit / 10; percent = MIN(zfs_arc_meta_limit_percent, 100);
arc_meta_limit = MAX(arc_meta_min, (percent * arc_c_max) / 100);
percent = MIN(zfs_arc_dnode_limit_percent, 100);
arc_dnode_limit = (percent * arc_meta_limit) / 100;
} }
/* Valid range: 32M - <arc_c_max> */ /* Valid range: 32M - <arc_c_max> */
@ -5437,6 +5452,7 @@ arc_init(void)
#else #else
uint64_t allmem = (physmem * PAGESIZE) / 2; uint64_t allmem = (physmem * PAGESIZE) / 2;
#endif #endif
uint64_t percent;
mutex_init(&arc_reclaim_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&arc_reclaim_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&arc_reclaim_thread_cv, NULL, CV_DEFAULT, NULL); cv_init(&arc_reclaim_thread_cv, NULL, CV_DEFAULT, NULL);
@ -5493,10 +5509,14 @@ arc_init(void)
arc_meta_min = 1ULL << SPA_MAXBLOCKSHIFT; arc_meta_min = 1ULL << SPA_MAXBLOCKSHIFT;
/* Initialize maximum observed usage to zero */ /* Initialize maximum observed usage to zero */
arc_meta_max = 0; arc_meta_max = 0;
/* Set limit to 3/4 of arc_c_max with a floor of arc_meta_min */ /*
arc_meta_limit = MAX((3 * arc_c_max) / 4, arc_meta_min); * Set arc_meta_limit to a percent of arc_c_max with a floor of
/* Default dnode limit is 10% of overall meta limit */ * arc_meta_min, and a ceiling of arc_c_max.
arc_dnode_limit = arc_meta_limit / 10; */
percent = MIN(zfs_arc_meta_limit_percent, 100);
arc_meta_limit = MAX(arc_meta_min, (percent * arc_c_max) / 100);
percent = MIN(zfs_arc_dnode_limit_percent, 100);
arc_dnode_limit = (percent * arc_meta_limit) / 100;
/* Apply user specified tunings */ /* Apply user specified tunings */
arc_tuning_update(); arc_tuning_update();
@ -7169,6 +7189,10 @@ MODULE_PARM_DESC(zfs_arc_max, "Max arc size");
module_param(zfs_arc_meta_limit, ulong, 0644); module_param(zfs_arc_meta_limit, ulong, 0644);
MODULE_PARM_DESC(zfs_arc_meta_limit, "Meta limit for arc size"); MODULE_PARM_DESC(zfs_arc_meta_limit, "Meta limit for arc size");
module_param(zfs_arc_meta_limit_percent, ulong, 0644);
MODULE_PARM_DESC(zfs_arc_meta_limit_percent,
"Percent of arc size for arc meta limit");
module_param(zfs_arc_meta_min, ulong, 0644); module_param(zfs_arc_meta_min, ulong, 0644);
MODULE_PARM_DESC(zfs_arc_meta_min, "Min arc metadata"); MODULE_PARM_DESC(zfs_arc_meta_min, "Min arc metadata");
@ -7253,6 +7277,10 @@ MODULE_PARM_DESC(zfs_arc_sys_free, "System free memory target size in bytes");
module_param(zfs_arc_dnode_limit, ulong, 0644); module_param(zfs_arc_dnode_limit, ulong, 0644);
MODULE_PARM_DESC(zfs_arc_dnode_limit, "Minimum bytes of dnodes in arc"); MODULE_PARM_DESC(zfs_arc_dnode_limit, "Minimum bytes of dnodes in arc");
module_param(zfs_arc_dnode_limit_percent, ulong, 0644);
MODULE_PARM_DESC(zfs_arc_dnode_limit_percent,
"Percent of ARC meta buffers for dnodes");
module_param(zfs_arc_dnode_reduce_percent, ulong, 0644); module_param(zfs_arc_dnode_reduce_percent, ulong, 0644);
MODULE_PARM_DESC(zfs_arc_dnode_reduce_percent, MODULE_PARM_DESC(zfs_arc_dnode_reduce_percent,
"Percentage of excess dnodes to try to unpin"); "Percentage of excess dnodes to try to unpin");