zfs_arc_num_sublists_per_state should be common to all multilists

The global tunable zfs_arc_num_sublists_per_state is used by the ARC and
the dbuf cache, and other users are planned. We should change this
tunable to be common to all multilists.  This tuning may be overridden
on a per-multilist basis.

Reviewed-by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Matthew Ahrens <mahrens@delphix.com>
Closes #5764
This commit is contained in:
Matthew Ahrens 2017-02-15 15:49:33 -08:00 committed by Brian Behlendorf
parent 3d3fe9f9bb
commit c30e58c462
6 changed files with 58 additions and 37 deletions

View File

@ -70,8 +70,6 @@ extern int zfs_arc_average_blocksize;
arc_done_func_t arc_bcopy_func; arc_done_func_t arc_bcopy_func;
arc_done_func_t arc_getbuf_func; arc_done_func_t arc_getbuf_func;
extern int zfs_arc_num_sublists_per_state;
/* generic arc_prune_func_t wrapper for callbacks */ /* generic arc_prune_func_t wrapper for callbacks */
struct arc_prune { struct arc_prune {
arc_prune_func_t *p_pfunc; arc_prune_func_t *p_pfunc;

View File

@ -13,7 +13,7 @@
* CDDL HEADER END * CDDL HEADER END
*/ */
/* /*
* Copyright (c) 2013, 2014 by Delphix. All rights reserved. * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
*/ */
#ifndef _SYS_MULTILIST_H #ifndef _SYS_MULTILIST_H
@ -72,7 +72,7 @@ struct multilist {
}; };
void multilist_destroy(multilist_t *); void multilist_destroy(multilist_t *);
void multilist_create(multilist_t *, size_t, size_t, unsigned int, void multilist_create(multilist_t *, size_t, size_t,
multilist_sublist_index_func_t *); multilist_sublist_index_func_t *);
void multilist_insert(multilist_t *, void *); void multilist_insert(multilist_t *, void *);

View File

@ -622,15 +622,16 @@ Default value: \fB0\fR.
.sp .sp
.ne 2 .ne 2
.na .na
\fBzfs_arc_num_sublists_per_state\fR (int) \fBzfs_multilist_num_sublists\fR (int)
.ad .ad
.RS 12n .RS 12n
To allow more fine-grained locking, each ARC state contains a series To allow more fine-grained locking, each ARC state contains a series
of lists for both data and meta data objects. Locking is performed at of lists for both data and meta data objects. Locking is performed at
the level of these "sub-lists". This parameters controls the number of the level of these "sub-lists". This parameters controls the number of
sub-lists per ARC state. sub-lists per ARC state, and also applies to other uses of the
multilist data structure.
.sp .sp
Default value: \fB1\fR or the number of online CPUs, whichever is greater Default value: \fB4\fR or the number of online CPUs, whichever is greater
.RE .RE
.sp .sp

View File

@ -21,7 +21,7 @@
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2011, 2016 by Delphix. All rights reserved. * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2014 by Saso Kiselkov. All rights reserved. * Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/ */
@ -307,13 +307,6 @@ static kcondvar_t arc_reclaim_waiters_cv;
*/ */
int zfs_arc_evict_batch_limit = 10; int zfs_arc_evict_batch_limit = 10;
/*
* The number of sublists used for each of the arc state lists. If this
* is not set to a suitable value by the user, it will be configured to
* the number of CPUs on the system in arc_init().
*/
int zfs_arc_num_sublists_per_state = 0;
/* number of seconds before growing cache again */ /* number of seconds before growing cache again */
static int arc_grow_retry = 5; static int arc_grow_retry = 5;
@ -6285,43 +6278,43 @@ arc_state_init(void)
multilist_create(&arc_mru->arcs_list[ARC_BUFC_METADATA], multilist_create(&arc_mru->arcs_list[ARC_BUFC_METADATA],
sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); arc_state_multilist_index_func);
multilist_create(&arc_mru->arcs_list[ARC_BUFC_DATA], multilist_create(&arc_mru->arcs_list[ARC_BUFC_DATA],
sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); arc_state_multilist_index_func);
multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA], multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA],
sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); arc_state_multilist_index_func);
multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA], multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA],
sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); arc_state_multilist_index_func);
multilist_create(&arc_mfu->arcs_list[ARC_BUFC_METADATA], multilist_create(&arc_mfu->arcs_list[ARC_BUFC_METADATA],
sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); arc_state_multilist_index_func);
multilist_create(&arc_mfu->arcs_list[ARC_BUFC_DATA], multilist_create(&arc_mfu->arcs_list[ARC_BUFC_DATA],
sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); arc_state_multilist_index_func);
multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA], multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA],
sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); arc_state_multilist_index_func);
multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA], multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA],
sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); arc_state_multilist_index_func);
multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA], multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA],
sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); arc_state_multilist_index_func);
multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA], multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA],
sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); arc_state_multilist_index_func);
refcount_create(&arc_anon->arcs_esize[ARC_BUFC_METADATA]); refcount_create(&arc_anon->arcs_esize[ARC_BUFC_METADATA]);
refcount_create(&arc_anon->arcs_esize[ARC_BUFC_DATA]); refcount_create(&arc_anon->arcs_esize[ARC_BUFC_DATA]);
@ -6452,9 +6445,6 @@ arc_init(void)
/* Apply user specified tunings */ /* Apply user specified tunings */
arc_tuning_update(); arc_tuning_update();
if (zfs_arc_num_sublists_per_state < 1)
zfs_arc_num_sublists_per_state = MAX(boot_ncpus, 1);
/* if kmem_flags are set, lets try to use less memory */ /* if kmem_flags are set, lets try to use less memory */
if (kmem_debugging()) if (kmem_debugging())
arc_c = arc_c / 2; arc_c = arc_c / 2;
@ -7745,10 +7735,6 @@ MODULE_PARM_DESC(zfs_compressed_arc_enabled, "Disable compressed arc buffers");
module_param(zfs_arc_min_prefetch_lifespan, int, 0644); module_param(zfs_arc_min_prefetch_lifespan, int, 0644);
MODULE_PARM_DESC(zfs_arc_min_prefetch_lifespan, "Min life of prefetch block"); MODULE_PARM_DESC(zfs_arc_min_prefetch_lifespan, "Min life of prefetch block");
module_param(zfs_arc_num_sublists_per_state, int, 0644);
MODULE_PARM_DESC(zfs_arc_num_sublists_per_state,
"Number of sublists used in each of the ARC state lists");
module_param(l2arc_write_max, ulong, 0644); module_param(l2arc_write_max, ulong, 0644);
MODULE_PARM_DESC(l2arc_write_max, "Max write bytes per interval"); MODULE_PARM_DESC(l2arc_write_max, "Max write bytes per interval");

View File

@ -673,7 +673,6 @@ retry:
multilist_create(&dbuf_cache, sizeof (dmu_buf_impl_t), multilist_create(&dbuf_cache, sizeof (dmu_buf_impl_t),
offsetof(dmu_buf_impl_t, db_cache_link), offsetof(dmu_buf_impl_t, db_cache_link),
zfs_arc_num_sublists_per_state,
dbuf_cache_multilist_index_func); dbuf_cache_multilist_index_func);
refcount_create(&dbuf_cache_size); refcount_create(&dbuf_cache_size);

View File

@ -13,7 +13,7 @@
* CDDL HEADER END * CDDL HEADER END
*/ */
/* /*
* Copyright (c) 2013, 2014 by Delphix. All rights reserved. * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
*/ */
#include <sys/zfs_context.h> #include <sys/zfs_context.h>
@ -23,6 +23,12 @@
/* needed for spa_get_random() */ /* needed for spa_get_random() */
#include <sys/spa.h> #include <sys/spa.h>
/*
* This overrides the number of sublists in each multilist_t, which defaults
* to the number of CPUs in the system (see multilist_create()).
*/
int zfs_multilist_num_sublists = 0;
/* /*
* Given the object contained on the list, return a pointer to the * Given the object contained on the list, return a pointer to the
* object's multilist_node_t structure it contains. * object's multilist_node_t structure it contains.
@ -62,9 +68,9 @@ multilist_d2l(multilist_t *ml, void *obj)
* requirement, but a general rule of thumb in order to garner the * requirement, but a general rule of thumb in order to garner the
* best multi-threaded performance out of the data structure. * best multi-threaded performance out of the data structure.
*/ */
void static void
multilist_create(multilist_t *ml, size_t size, size_t offset, unsigned int num, multilist_create_impl(multilist_t *ml, size_t size, size_t offset,
multilist_sublist_index_func_t *index_func) unsigned int num, multilist_sublist_index_func_t *index_func)
{ {
int i; int i;
@ -90,6 +96,26 @@ multilist_create(multilist_t *ml, size_t size, size_t offset, unsigned int num,
} }
} }
/*
* Initialize a new sublist, using the default number of sublists
* (the number of CPUs, or at least 4, or the tunable
* zfs_multilist_num_sublists).
*/
void
multilist_create(multilist_t *ml, size_t size, size_t offset,
multilist_sublist_index_func_t *index_func)
{
int num_sublists;
if (zfs_multilist_num_sublists > 0) {
num_sublists = zfs_multilist_num_sublists;
} else {
num_sublists = MAX(boot_ncpus, 4);
}
multilist_create_impl(ml, size, offset, num_sublists, index_func);
}
/* /*
* Destroy the given multilist object, and free up any memory it holds. * Destroy the given multilist object, and free up any memory it holds.
*/ */
@ -373,3 +399,14 @@ multilist_link_active(multilist_node_t *link)
{ {
return (list_link_active(link)); return (list_link_active(link));
} }
#if defined(_KERNEL) && defined(HAVE_SPL)
/* BEGIN CSTYLED */
module_param(zfs_multilist_num_sublists, int, 0644);
MODULE_PARM_DESC(zfs_multilist_num_sublists,
"Number of sublists used in each multilist");
/* END CSTYLED */
#endif