From c30e58c4628de46c36870cdedea9052711092a85 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Wed, 15 Feb 2017 15:49:33 -0800 Subject: [PATCH] zfs_arc_num_sublists_per_state should be common to all multilists The global tunable zfs_arc_num_sublists_per_state is used by the ARC and the dbuf cache, and other users are planned. We should change this tunable to be common to all multilists. This tuning may be overridden on a per-multilist basis. Reviewed-by: Pavel Zakharov Reviewed-by: Dan Kimmel Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf Signed-off-by: Matthew Ahrens Closes #5764 --- include/sys/arc.h | 2 -- include/sys/multilist.h | 4 +-- man/man5/zfs-module-parameters.5 | 7 ++--- module/zfs/arc.c | 36 ++++++++----------------- module/zfs/dbuf.c | 1 - module/zfs/multilist.c | 45 +++++++++++++++++++++++++++++--- 6 files changed, 58 insertions(+), 37 deletions(-) diff --git a/include/sys/arc.h b/include/sys/arc.h index e1422d2e10..07a72302df 100644 --- a/include/sys/arc.h +++ b/include/sys/arc.h @@ -70,8 +70,6 @@ extern int zfs_arc_average_blocksize; arc_done_func_t arc_bcopy_func; arc_done_func_t arc_getbuf_func; -extern int zfs_arc_num_sublists_per_state; - /* generic arc_prune_func_t wrapper for callbacks */ struct arc_prune { arc_prune_func_t *p_pfunc; diff --git a/include/sys/multilist.h b/include/sys/multilist.h index 98d707dd71..9b19d016d9 100644 --- a/include/sys/multilist.h +++ b/include/sys/multilist.h @@ -13,7 +13,7 @@ * CDDL HEADER END */ /* - * Copyright (c) 2013, 2014 by Delphix. All rights reserved. + * Copyright (c) 2013, 2017 by Delphix. All rights reserved. */ #ifndef _SYS_MULTILIST_H @@ -72,7 +72,7 @@ struct multilist { }; void multilist_destroy(multilist_t *); -void multilist_create(multilist_t *, size_t, size_t, unsigned int, +void multilist_create(multilist_t *, size_t, size_t, multilist_sublist_index_func_t *); void multilist_insert(multilist_t *, void *); diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index b1c99cb17c..ca92af5511 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -622,15 +622,16 @@ Default value: \fB0\fR. .sp .ne 2 .na -\fBzfs_arc_num_sublists_per_state\fR (int) +\fBzfs_multilist_num_sublists\fR (int) .ad .RS 12n To allow more fine-grained locking, each ARC state contains a series of lists for both data and meta data objects. Locking is performed at the level of these "sub-lists". This parameters controls the number of -sub-lists per ARC state. +sub-lists per ARC state, and also applies to other uses of the +multilist data structure. .sp -Default value: \fB1\fR or the number of online CPUs, whichever is greater +Default value: \fB4\fR or the number of online CPUs, whichever is greater .RE .sp diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 1495b0891f..9fac5a6c42 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. - * Copyright (c) 2011, 2016 by Delphix. All rights reserved. + * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright (c) 2014 by Saso Kiselkov. All rights reserved. * Copyright 2015 Nexenta Systems, Inc. All rights reserved. */ @@ -307,13 +307,6 @@ static kcondvar_t arc_reclaim_waiters_cv; */ int zfs_arc_evict_batch_limit = 10; -/* - * The number of sublists used for each of the arc state lists. If this - * is not set to a suitable value by the user, it will be configured to - * the number of CPUs on the system in arc_init(). - */ -int zfs_arc_num_sublists_per_state = 0; - /* number of seconds before growing cache again */ static int arc_grow_retry = 5; @@ -6285,43 +6278,43 @@ arc_state_init(void) multilist_create(&arc_mru->arcs_list[ARC_BUFC_METADATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), - zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); + arc_state_multilist_index_func); multilist_create(&arc_mru->arcs_list[ARC_BUFC_DATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), - zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); + arc_state_multilist_index_func); multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), - zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); + arc_state_multilist_index_func); multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), - zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); + arc_state_multilist_index_func); multilist_create(&arc_mfu->arcs_list[ARC_BUFC_METADATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), - zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); + arc_state_multilist_index_func); multilist_create(&arc_mfu->arcs_list[ARC_BUFC_DATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), - zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); + arc_state_multilist_index_func); multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), - zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); + arc_state_multilist_index_func); multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), - zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); + arc_state_multilist_index_func); multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), - zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); + arc_state_multilist_index_func); multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), - zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); + arc_state_multilist_index_func); refcount_create(&arc_anon->arcs_esize[ARC_BUFC_METADATA]); refcount_create(&arc_anon->arcs_esize[ARC_BUFC_DATA]); @@ -6452,9 +6445,6 @@ arc_init(void) /* Apply user specified tunings */ arc_tuning_update(); - if (zfs_arc_num_sublists_per_state < 1) - zfs_arc_num_sublists_per_state = MAX(boot_ncpus, 1); - /* if kmem_flags are set, lets try to use less memory */ if (kmem_debugging()) arc_c = arc_c / 2; @@ -7745,10 +7735,6 @@ MODULE_PARM_DESC(zfs_compressed_arc_enabled, "Disable compressed arc buffers"); module_param(zfs_arc_min_prefetch_lifespan, int, 0644); MODULE_PARM_DESC(zfs_arc_min_prefetch_lifespan, "Min life of prefetch block"); -module_param(zfs_arc_num_sublists_per_state, int, 0644); -MODULE_PARM_DESC(zfs_arc_num_sublists_per_state, - "Number of sublists used in each of the ARC state lists"); - module_param(l2arc_write_max, ulong, 0644); MODULE_PARM_DESC(l2arc_write_max, "Max write bytes per interval"); diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 173e1dbb8e..096f74a000 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -673,7 +673,6 @@ retry: multilist_create(&dbuf_cache, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_cache_link), - zfs_arc_num_sublists_per_state, dbuf_cache_multilist_index_func); refcount_create(&dbuf_cache_size); diff --git a/module/zfs/multilist.c b/module/zfs/multilist.c index e02a4bae33..fa927b43ba 100644 --- a/module/zfs/multilist.c +++ b/module/zfs/multilist.c @@ -13,7 +13,7 @@ * CDDL HEADER END */ /* - * Copyright (c) 2013, 2014 by Delphix. All rights reserved. + * Copyright (c) 2013, 2017 by Delphix. All rights reserved. */ #include @@ -23,6 +23,12 @@ /* needed for spa_get_random() */ #include +/* + * This overrides the number of sublists in each multilist_t, which defaults + * to the number of CPUs in the system (see multilist_create()). + */ +int zfs_multilist_num_sublists = 0; + /* * Given the object contained on the list, return a pointer to the * object's multilist_node_t structure it contains. @@ -62,9 +68,9 @@ multilist_d2l(multilist_t *ml, void *obj) * requirement, but a general rule of thumb in order to garner the * best multi-threaded performance out of the data structure. */ -void -multilist_create(multilist_t *ml, size_t size, size_t offset, unsigned int num, - multilist_sublist_index_func_t *index_func) +static void +multilist_create_impl(multilist_t *ml, size_t size, size_t offset, + unsigned int num, multilist_sublist_index_func_t *index_func) { int i; @@ -90,6 +96,26 @@ multilist_create(multilist_t *ml, size_t size, size_t offset, unsigned int num, } } +/* + * Initialize a new sublist, using the default number of sublists + * (the number of CPUs, or at least 4, or the tunable + * zfs_multilist_num_sublists). + */ +void +multilist_create(multilist_t *ml, size_t size, size_t offset, + multilist_sublist_index_func_t *index_func) +{ + int num_sublists; + + if (zfs_multilist_num_sublists > 0) { + num_sublists = zfs_multilist_num_sublists; + } else { + num_sublists = MAX(boot_ncpus, 4); + } + + multilist_create_impl(ml, size, offset, num_sublists, index_func); +} + /* * Destroy the given multilist object, and free up any memory it holds. */ @@ -373,3 +399,14 @@ multilist_link_active(multilist_node_t *link) { return (list_link_active(link)); } + +#if defined(_KERNEL) && defined(HAVE_SPL) + +/* BEGIN CSTYLED */ + +module_param(zfs_multilist_num_sublists, int, 0644); +MODULE_PARM_DESC(zfs_multilist_num_sublists, + "Number of sublists used in each multilist"); + +/* END CSTYLED */ +#endif