From ffdf019cb317b16109ec74ae199d4253a6662f5a Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Thu, 10 Jun 2021 12:42:31 -0400 Subject: [PATCH] Re-embed multilist_t storage This commit partially reverts changes to multilists in PR 7968 (multi-threaded spa-sync()) and adds some cache line alignments to separate read-only multilists and heavily modified refcount's to different cache lines. Reviewed-by: Matthew Ahrens Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored-by: iXsystems, Inc. Closes #12158 --- include/sys/arc_impl.h | 12 +++--- include/sys/dmu_objset.h | 4 +- include/sys/metaslab_impl.h | 2 +- include/sys/multilist.h | 3 +- module/zfs/arc.c | 86 ++++++++++++++++++------------------- module/zfs/dbuf.c | 20 ++++----- module/zfs/dmu_objset.c | 30 ++++++------- module/zfs/dnode.c | 2 +- module/zfs/dsl_dataset.c | 3 +- module/zfs/dsl_pool.c | 3 +- module/zfs/metaslab.c | 24 +++++------ module/zfs/multilist.c | 14 +++--- 12 files changed, 99 insertions(+), 104 deletions(-) diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h index 94123fc10e..c01da46e01 100644 --- a/include/sys/arc_impl.h +++ b/include/sys/arc_impl.h @@ -74,20 +74,20 @@ typedef struct arc_state { /* * list of evictable buffers */ - multilist_t *arcs_list[ARC_BUFC_NUMTYPES]; + multilist_t arcs_list[ARC_BUFC_NUMTYPES]; + /* + * supports the "dbufs" kstat + */ + arc_state_type_t arcs_state; /* * total amount of evictable data in this state */ - zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES]; + zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES] ____cacheline_aligned; /* * total amount of data in this state; this includes: evictable, * non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA. */ zfs_refcount_t arcs_size; - /* - * supports the "dbufs" kstat - */ - arc_state_type_t arcs_state; } arc_state_t; typedef struct arc_callback arc_callback_t; diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h index a8cb812714..e89ee64ea6 100644 --- a/include/sys/dmu_objset.h +++ b/include/sys/dmu_objset.h @@ -153,7 +153,7 @@ struct objset { /* no lock needed: */ struct dmu_tx *os_synctx; /* XXX sketchy */ zil_header_t os_zil_header; - multilist_t *os_synced_dnodes; + multilist_t os_synced_dnodes; uint64_t os_flags; uint64_t os_freed_dnodes; boolean_t os_rescan_dnodes; @@ -172,7 +172,7 @@ struct objset { /* Protected by os_lock */ kmutex_t os_lock; - multilist_t *os_dirty_dnodes[TXG_SIZE]; + multilist_t os_dirty_dnodes[TXG_SIZE]; list_t os_dnodes; list_t os_downgraded_dbufs; diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h index 3be0c466c4..9924c3ba0e 100644 --- a/include/sys/metaslab_impl.h +++ b/include/sys/metaslab_impl.h @@ -206,7 +206,7 @@ struct metaslab_class { * List of all loaded metaslabs in the class, sorted in order of most * recent use. */ - multilist_t *mc_metaslab_txg_list; + multilist_t mc_metaslab_txg_list; metaslab_class_allocator_t mc_allocator[]; }; diff --git a/include/sys/multilist.h b/include/sys/multilist.h index 0c7b4075d9..26f37c37ab 100644 --- a/include/sys/multilist.h +++ b/include/sys/multilist.h @@ -71,8 +71,9 @@ struct multilist { multilist_sublist_index_func_t *ml_index_func; }; +void multilist_create(multilist_t *, size_t, size_t, + multilist_sublist_index_func_t *); void multilist_destroy(multilist_t *); -multilist_t *multilist_create(size_t, size_t, multilist_sublist_index_func_t *); void multilist_insert(multilist_t *, void *); void multilist_remove(multilist_t *, void *); diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 5526cae378..737904f348 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -2327,7 +2327,7 @@ add_reference(arc_buf_hdr_t *hdr, void *tag) (state != arc_anon)) { /* We don't use the L2-only state list. */ if (state != arc_l2c_only) { - multilist_remove(state->arcs_list[arc_buf_type(hdr)], + multilist_remove(&state->arcs_list[arc_buf_type(hdr)], hdr); arc_evictable_space_decrement(hdr, state); } @@ -2361,7 +2361,7 @@ remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag) */ if (((cnt = zfs_refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) == 0) && (state != arc_anon)) { - multilist_insert(state->arcs_list[arc_buf_type(hdr)], hdr); + multilist_insert(&state->arcs_list[arc_buf_type(hdr)], hdr); ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0); arc_evictable_space_increment(hdr, state); } @@ -2464,7 +2464,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, if (refcnt == 0) { if (old_state != arc_anon && old_state != arc_l2c_only) { ASSERT(HDR_HAS_L1HDR(hdr)); - multilist_remove(old_state->arcs_list[buftype], hdr); + multilist_remove(&old_state->arcs_list[buftype], hdr); if (GHOST_STATE(old_state)) { ASSERT0(bufcnt); @@ -2481,7 +2481,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, * beforehand. */ ASSERT(HDR_HAS_L1HDR(hdr)); - multilist_insert(new_state->arcs_list[buftype], hdr); + multilist_insert(&new_state->arcs_list[buftype], hdr); if (GHOST_STATE(new_state)) { ASSERT0(bufcnt); @@ -2633,8 +2633,8 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, * L2 headers should never be on the L2 state list since they don't * have L1 headers allocated. */ - ASSERT(multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_DATA]) && - multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_METADATA])); + ASSERT(multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]) && + multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA])); } void @@ -4200,7 +4200,7 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes, arc_buf_contents_t type) { uint64_t total_evicted = 0; - multilist_t *ml = state->arcs_list[type]; + multilist_t *ml = &state->arcs_list[type]; int num_sublists; arc_buf_hdr_t **markers; @@ -4534,8 +4534,8 @@ arc_evict_meta(uint64_t meta_used) static arc_buf_contents_t arc_evict_type(arc_state_t *state) { - multilist_t *data_ml = state->arcs_list[ARC_BUFC_DATA]; - multilist_t *meta_ml = state->arcs_list[ARC_BUFC_METADATA]; + multilist_t *data_ml = &state->arcs_list[ARC_BUFC_DATA]; + multilist_t *meta_ml = &state->arcs_list[ARC_BUFC_METADATA]; int data_idx = multilist_get_random_index(data_ml); int meta_idx = multilist_get_random_index(meta_ml); multilist_sublist_t *data_mls; @@ -7455,44 +7455,44 @@ arc_state_init(void) arc_mfu_ghost = &ARC_mfu_ghost; arc_l2c_only = &ARC_l2c_only; - arc_mru->arcs_list[ARC_BUFC_METADATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_mru->arcs_list[ARC_BUFC_METADATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); - arc_mru->arcs_list[ARC_BUFC_DATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_mru->arcs_list[ARC_BUFC_DATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); - arc_mru_ghost->arcs_list[ARC_BUFC_METADATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); - arc_mru_ghost->arcs_list[ARC_BUFC_DATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); - arc_mfu->arcs_list[ARC_BUFC_METADATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_mfu->arcs_list[ARC_BUFC_METADATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); - arc_mfu->arcs_list[ARC_BUFC_DATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_mfu->arcs_list[ARC_BUFC_DATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); - arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); - arc_mfu_ghost->arcs_list[ARC_BUFC_DATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); - arc_l2c_only->arcs_list[ARC_BUFC_METADATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); - arc_l2c_only->arcs_list[ARC_BUFC_DATA] = - multilist_create(sizeof (arc_buf_hdr_t), + multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA], + sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), arc_state_multilist_index_func); @@ -7558,16 +7558,16 @@ arc_state_fini(void) zfs_refcount_destroy(&arc_mfu_ghost->arcs_size); zfs_refcount_destroy(&arc_l2c_only->arcs_size); - multilist_destroy(arc_mru->arcs_list[ARC_BUFC_METADATA]); - multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]); - multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_METADATA]); - multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]); - multilist_destroy(arc_mru->arcs_list[ARC_BUFC_DATA]); - multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_DATA]); - multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_DATA]); - multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]); - multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]); - multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_DATA]); + multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_METADATA]); + multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]); + multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_METADATA]); + multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]); + multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_DATA]); + multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA]); + multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_DATA]); + multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]); + multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]); + multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]); aggsum_fini(&arc_meta_used); aggsum_fini(&arc_size); @@ -8624,16 +8624,16 @@ l2arc_sublist_lock(int list_num) switch (list_num) { case 0: - ml = arc_mfu->arcs_list[ARC_BUFC_METADATA]; + ml = &arc_mfu->arcs_list[ARC_BUFC_METADATA]; break; case 1: - ml = arc_mru->arcs_list[ARC_BUFC_METADATA]; + ml = &arc_mru->arcs_list[ARC_BUFC_METADATA]; break; case 2: - ml = arc_mfu->arcs_list[ARC_BUFC_DATA]; + ml = &arc_mfu->arcs_list[ARC_BUFC_DATA]; break; case 3: - ml = arc_mru->arcs_list[ARC_BUFC_DATA]; + ml = &arc_mru->arcs_list[ARC_BUFC_DATA]; break; default: return (NULL); diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 764383b2d0..9d741545fa 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -203,8 +203,8 @@ static boolean_t dbuf_evict_thread_exit; * by those caches' matching enum values (from dbuf_cached_state_t). */ typedef struct dbuf_cache { - multilist_t *cache; - zfs_refcount_t size; + multilist_t cache; + zfs_refcount_t size ____cacheline_aligned; } dbuf_cache_t; dbuf_cache_t dbuf_caches[DB_CACHE_MAX]; @@ -667,9 +667,9 @@ dbuf_cache_above_lowater(void) static void dbuf_evict_one(void) { - int idx = multilist_get_random_index(dbuf_caches[DB_DBUF_CACHE].cache); + int idx = multilist_get_random_index(&dbuf_caches[DB_DBUF_CACHE].cache); multilist_sublist_t *mls = multilist_sublist_lock( - dbuf_caches[DB_DBUF_CACHE].cache, idx); + &dbuf_caches[DB_DBUF_CACHE].cache, idx); ASSERT(!MUTEX_HELD(&dbuf_evict_lock)); @@ -833,8 +833,8 @@ retry: dbu_evict_taskq = taskq_create("dbu_evict", 1, defclsyspri, 0, 0, 0); for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) { - dbuf_caches[dcs].cache = - multilist_create(sizeof (dmu_buf_impl_t), + multilist_create(&dbuf_caches[dcs].cache, + sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_cache_link), dbuf_cache_multilist_index_func); zfs_refcount_create(&dbuf_caches[dcs].size); @@ -901,7 +901,7 @@ dbuf_fini(void) for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) { zfs_refcount_destroy(&dbuf_caches[dcs].size); - multilist_destroy(dbuf_caches[dcs].cache); + multilist_destroy(&dbuf_caches[dcs].cache); } if (dbuf_ksp != NULL) { @@ -2755,7 +2755,7 @@ dbuf_destroy(dmu_buf_impl_t *db) ASSERT(db->db_caching_status == DB_DBUF_CACHE || db->db_caching_status == DB_DBUF_METADATA_CACHE); - multilist_remove(dbuf_caches[db->db_caching_status].cache, db); + multilist_remove(&dbuf_caches[db->db_caching_status].cache, db); (void) zfs_refcount_remove_many( &dbuf_caches[db->db_caching_status].size, db->db.db_size, db); @@ -3465,7 +3465,7 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, ASSERT(db->db_caching_status == DB_DBUF_CACHE || db->db_caching_status == DB_DBUF_METADATA_CACHE); - multilist_remove(dbuf_caches[db->db_caching_status].cache, db); + multilist_remove(&dbuf_caches[db->db_caching_status].cache, db); (void) zfs_refcount_remove_many( &dbuf_caches[db->db_caching_status].size, db->db.db_size, db); @@ -3707,7 +3707,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting) DB_DBUF_METADATA_CACHE : DB_DBUF_CACHE; db->db_caching_status = dcs; - multilist_insert(dbuf_caches[dcs].cache, db); + multilist_insert(&dbuf_caches[dcs].cache, db); size = zfs_refcount_add_many( &dbuf_caches[dcs].size, db->db.db_size, db); diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index bfb4adf262..8c244dc4c3 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -601,7 +601,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, os->os_zil = zil_alloc(os, &os->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { - os->os_dirty_dnodes[i] = multilist_create(sizeof (dnode_t), + multilist_create(&os->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i]), dnode_multilist_index_func); } @@ -995,9 +995,8 @@ dmu_objset_evict_done(objset_t *os) mutex_destroy(&os->os_obj_lock); mutex_destroy(&os->os_user_ptr_lock); mutex_destroy(&os->os_upgrade_lock); - for (int i = 0; i < TXG_SIZE; i++) { - multilist_destroy(os->os_dirty_dnodes[i]); - } + for (int i = 0; i < TXG_SIZE; i++) + multilist_destroy(&os->os_dirty_dnodes[i]); spa_evicting_os_deregister(os->os_spa, os); kmem_free(os, sizeof (objset_t)); } @@ -1520,7 +1519,7 @@ dmu_objset_sync_dnodes(multilist_sublist_t *list, dmu_tx_t *tx) * of why this dnode hold is always needed (even when not * doing user accounting). */ - multilist_t *newlist = dn->dn_objset->os_synced_dnodes; + multilist_t *newlist = &dn->dn_objset->os_synced_dnodes; (void) dnode_add_ref(dn, newlist); multilist_insert(newlist, dn); @@ -1689,17 +1688,16 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) * dn_dirty_link[] of this txg. But it may already * exist because we call dsl_dataset_sync() twice per txg. */ - if (os->os_synced_dnodes == NULL) { - os->os_synced_dnodes = - multilist_create(sizeof (dnode_t), + if (os->os_synced_dnodes.ml_sublists == NULL) { + multilist_create(&os->os_synced_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[txgoff]), dnode_multilist_index_func); } else { - ASSERT3U(os->os_synced_dnodes->ml_offset, ==, + ASSERT3U(os->os_synced_dnodes.ml_offset, ==, offsetof(dnode_t, dn_dirty_link[txgoff])); } - ml = os->os_dirty_dnodes[txgoff]; + ml = &os->os_dirty_dnodes[txgoff]; num_sublists = multilist_get_num_sublists(ml); for (int i = 0; i < num_sublists; i++) { if (multilist_sublist_is_empty_idx(ml, i)) @@ -1738,7 +1736,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) boolean_t dmu_objset_is_dirty(objset_t *os, uint64_t txg) { - return (!multilist_is_empty(os->os_dirty_dnodes[txg & TXG_MASK])); + return (!multilist_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK])); } static file_info_cb_t *file_cbs[DMU_OST_NUMTYPES]; @@ -1949,7 +1947,7 @@ userquota_updates_task(void *arg) userquota_cache_t cache = { { 0 } }; multilist_sublist_t *list = - multilist_sublist_lock(os->os_synced_dnodes, uua->uua_sublist_idx); + multilist_sublist_lock(&os->os_synced_dnodes, uua->uua_sublist_idx); ASSERT(multilist_sublist_head(list) == NULL || dmu_objset_userused_enabled(os)); @@ -2006,7 +2004,7 @@ userquota_updates_task(void *arg) mutex_exit(&dn->dn_mtx); multilist_sublist_remove(list, dn); - dnode_rele(dn, os->os_synced_dnodes); + dnode_rele(dn, &os->os_synced_dnodes); } do_userquota_cacheflush(os, &cache, tx); multilist_sublist_unlock(list); @@ -2032,12 +2030,12 @@ dnode_rele_task(void *arg) objset_t *os = uua->uua_os; multilist_sublist_t *list = - multilist_sublist_lock(os->os_synced_dnodes, uua->uua_sublist_idx); + multilist_sublist_lock(&os->os_synced_dnodes, uua->uua_sublist_idx); dnode_t *dn; while ((dn = multilist_sublist_head(list)) != NULL) { multilist_sublist_remove(list, dn); - dnode_rele(dn, os->os_synced_dnodes); + dnode_rele(dn, &os->os_synced_dnodes); } multilist_sublist_unlock(list); kmem_free(uua, sizeof (*uua)); @@ -2093,7 +2091,7 @@ dmu_objset_sync_done(objset_t *os, dmu_tx_t *tx) { boolean_t need_userquota = dmu_objset_do_userquota_updates_prep(os, tx); - int num_sublists = multilist_get_num_sublists(os->os_synced_dnodes); + int num_sublists = multilist_get_num_sublists(&os->os_synced_dnodes); for (int i = 0; i < num_sublists; i++) { userquota_updates_arg_t *uua = kmem_alloc(sizeof (*uua), KM_SLEEP); diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index 0fc788e28f..8434e72aa4 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -1671,7 +1671,7 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) */ dmu_objset_userquota_get_ids(dn, B_TRUE, tx); - multilist_t *dirtylist = os->os_dirty_dnodes[txg & TXG_MASK]; + multilist_t *dirtylist = &os->os_dirty_dnodes[txg & TXG_MASK]; multilist_sublist_t *mls = multilist_sublist_lock_obj(dirtylist, dn); /* diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 6da5faf01e..9b9bb42287 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -2267,8 +2267,7 @@ dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx) dsl_bookmark_sync_done(ds, tx); - multilist_destroy(os->os_synced_dnodes); - os->os_synced_dnodes = NULL; + multilist_destroy(&os->os_synced_dnodes); if (os->os_encrypted) os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE; diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index c770eafa75..e66c136a9e 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -568,8 +568,7 @@ dsl_pool_sync_mos(dsl_pool_t *dp, dmu_tx_t *tx) VERIFY0(zio_wait(zio)); dmu_objset_sync_done(dp->dp_meta_objset, tx); taskq_wait(dp->dp_sync_taskq); - multilist_destroy(dp->dp_meta_objset->os_synced_dnodes); - dp->dp_meta_objset->os_synced_dnodes = NULL; + multilist_destroy(&dp->dp_meta_objset->os_synced_dnodes); dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", ""); spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp); diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 3b2b79b2f4..e588765b33 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -416,7 +416,7 @@ metaslab_class_create(spa_t *spa, metaslab_ops_t *ops) mc->mc_spa = spa; mc->mc_ops = ops; mutex_init(&mc->mc_lock, NULL, MUTEX_DEFAULT, NULL); - mc->mc_metaslab_txg_list = multilist_create(sizeof (metaslab_t), + multilist_create(&mc->mc_metaslab_txg_list, sizeof (metaslab_t), offsetof(metaslab_t, ms_class_txg_node), metaslab_idx_func); for (int i = 0; i < spa->spa_alloc_count; i++) { metaslab_class_allocator_t *mca = &mc->mc_allocator[i]; @@ -443,7 +443,7 @@ metaslab_class_destroy(metaslab_class_t *mc) zfs_refcount_destroy(&mca->mca_alloc_slots); } mutex_destroy(&mc->mc_lock); - multilist_destroy(mc->mc_metaslab_txg_list); + multilist_destroy(&mc->mc_metaslab_txg_list); kmem_free(mc, offsetof(metaslab_class_t, mc_allocator[spa->spa_alloc_count])); } @@ -639,7 +639,7 @@ metaslab_class_expandable_space(metaslab_class_t *mc) void metaslab_class_evict_old(metaslab_class_t *mc, uint64_t txg) { - multilist_t *ml = mc->mc_metaslab_txg_list; + multilist_t *ml = &mc->mc_metaslab_txg_list; for (int i = 0; i < multilist_get_num_sublists(ml); i++) { multilist_sublist_t *mls = multilist_sublist_lock(ml, i); metaslab_t *msp = multilist_sublist_head(mls); @@ -1139,7 +1139,7 @@ metaslab_group_remove(metaslab_group_t *mg, metaslab_t *msp) metaslab_class_t *mc = msp->ms_group->mg_class; multilist_sublist_t *mls = - multilist_sublist_lock_obj(mc->mc_metaslab_txg_list, msp); + multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp); if (multilist_link_active(&msp->ms_class_txg_node)) multilist_sublist_remove(mls, msp); multilist_sublist_unlock(mls); @@ -2175,20 +2175,20 @@ metaslab_potentially_evict(metaslab_class_t *mc) uint64_t size = spl_kmem_cache_entry_size(zfs_btree_leaf_cache); int tries = 0; for (; allmem * zfs_metaslab_mem_limit / 100 < inuse * size && - tries < multilist_get_num_sublists(mc->mc_metaslab_txg_list) * 2; + tries < multilist_get_num_sublists(&mc->mc_metaslab_txg_list) * 2; tries++) { unsigned int idx = multilist_get_random_index( - mc->mc_metaslab_txg_list); + &mc->mc_metaslab_txg_list); multilist_sublist_t *mls = - multilist_sublist_lock(mc->mc_metaslab_txg_list, idx); + multilist_sublist_lock(&mc->mc_metaslab_txg_list, idx); metaslab_t *msp = multilist_sublist_head(mls); multilist_sublist_unlock(mls); while (msp != NULL && allmem * zfs_metaslab_mem_limit / 100 < inuse * size) { VERIFY3P(mls, ==, multilist_sublist_lock( - mc->mc_metaslab_txg_list, idx)); + &mc->mc_metaslab_txg_list, idx)); ASSERT3U(idx, ==, - metaslab_idx_func(mc->mc_metaslab_txg_list, msp)); + metaslab_idx_func(&mc->mc_metaslab_txg_list, msp)); if (!multilist_link_active(&msp->ms_class_txg_node)) { multilist_sublist_unlock(mls); @@ -2535,7 +2535,7 @@ metaslab_unload(metaslab_t *msp) if (msp->ms_group != NULL) { metaslab_class_t *mc = msp->ms_group->mg_class; multilist_sublist_t *mls = - multilist_sublist_lock_obj(mc->mc_metaslab_txg_list, msp); + multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp); if (multilist_link_active(&msp->ms_class_txg_node)) multilist_sublist_remove(mls, msp); multilist_sublist_unlock(mls); @@ -2600,7 +2600,7 @@ metaslab_set_selected_txg(metaslab_t *msp, uint64_t txg) ASSERT(MUTEX_HELD(&msp->ms_lock)); metaslab_class_t *mc = msp->ms_group->mg_class; multilist_sublist_t *mls = - multilist_sublist_lock_obj(mc->mc_metaslab_txg_list, msp); + multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp); if (multilist_link_active(&msp->ms_class_txg_node)) multilist_sublist_remove(mls, msp); msp->ms_selected_txg = txg; @@ -5682,7 +5682,7 @@ metaslab_claim_concrete(vdev_t *vd, uint64_t offset, uint64_t size, if (spa_writeable(spa)) { /* don't dirty if we're zdb(8) */ metaslab_class_t *mc = msp->ms_group->mg_class; multilist_sublist_t *mls = - multilist_sublist_lock_obj(mc->mc_metaslab_txg_list, msp); + multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp); if (!multilist_link_active(&msp->ms_class_txg_node)) { msp->ms_selected_txg = txg; multilist_sublist_insert_head(mls, msp); diff --git a/module/zfs/multilist.c b/module/zfs/multilist.c index 36c0d33bf1..eeac73bd7a 100644 --- a/module/zfs/multilist.c +++ b/module/zfs/multilist.c @@ -68,8 +68,8 @@ multilist_d2l(multilist_t *ml, void *obj) * requirement, but a general rule of thumb in order to garner the * best multi-threaded performance out of the data structure. */ -static multilist_t * -multilist_create_impl(size_t size, size_t offset, +static void +multilist_create_impl(multilist_t *ml, size_t size, size_t offset, unsigned int num, multilist_sublist_index_func_t *index_func) { ASSERT3U(size, >, 0); @@ -77,7 +77,6 @@ multilist_create_impl(size_t size, size_t offset, ASSERT3U(num, >, 0); ASSERT3P(index_func, !=, NULL); - multilist_t *ml = kmem_alloc(sizeof (*ml), KM_SLEEP); ml->ml_offset = offset; ml->ml_num_sublists = num; ml->ml_index_func = index_func; @@ -92,7 +91,6 @@ multilist_create_impl(size_t size, size_t offset, mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL); list_create(&mls->mls_list, size, offset); } - return (ml); } /* @@ -103,8 +101,8 @@ multilist_create_impl(size_t size, size_t offset, * reserve the RAM necessary to create the extra slots for additional CPUs up * front, and dynamically adding them is a complex task. */ -multilist_t * -multilist_create(size_t size, size_t offset, +void +multilist_create(multilist_t *ml, size_t size, size_t offset, multilist_sublist_index_func_t *index_func) { int num_sublists; @@ -115,7 +113,7 @@ multilist_create(size_t size, size_t offset, num_sublists = MAX(boot_ncpus, 4); } - return (multilist_create_impl(size, offset, num_sublists, index_func)); + multilist_create_impl(ml, size, offset, num_sublists, index_func); } /* @@ -141,7 +139,7 @@ multilist_destroy(multilist_t *ml) ml->ml_num_sublists = 0; ml->ml_offset = 0; - kmem_free(ml, sizeof (multilist_t)); + ml->ml_sublists = NULL; } /*