diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index c436139a22..869b7a07ff 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -910,11 +910,8 @@ dump_metaslab(metaslab_t *msp) if (dump_opt['m'] > 2 && !dump_opt['L']) { mutex_enter(&msp->ms_lock); - metaslab_load_wait(msp); - if (!msp->ms_loaded) { - VERIFY0(metaslab_load(msp)); - range_tree_stat_verify(msp->ms_allocatable); - } + VERIFY0(metaslab_load(msp)); + range_tree_stat_verify(msp->ms_allocatable); dump_metaslab_stats(msp); metaslab_unload(msp); mutex_exit(&msp->ms_lock); diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h index fca233a389..f47bc19cfc 100644 --- a/include/sys/metaslab.h +++ b/include/sys/metaslab.h @@ -49,7 +49,6 @@ int metaslab_init(metaslab_group_t *, uint64_t, uint64_t, uint64_t, metaslab_t **); void metaslab_fini(metaslab_t *); -void metaslab_load_wait(metaslab_t *); int metaslab_load(metaslab_t *); void metaslab_unload(metaslab_t *); diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h index 3e32eace6d..137a847692 100644 --- a/include/sys/metaslab_impl.h +++ b/include/sys/metaslab_impl.h @@ -369,8 +369,8 @@ struct metaslab { uint64_t ms_initializing; /* leaves initializing this ms */ /* - * We must hold both ms_lock and ms_group->mg_lock in order to - * modify ms_loaded. + * We must always hold the ms_lock when modifying ms_loaded + * and ms_loading. */ boolean_t ms_loaded; boolean_t ms_loading; diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 71688b4206..20e7f0ed38 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -1402,7 +1402,7 @@ metaslab_ops_t *zfs_metaslab_ops = &metaslab_ndf_ops; /* * Wait for any in-progress metaslab loads to complete. */ -void +static void metaslab_load_wait(metaslab_t *msp) { ASSERT(MUTEX_HELD(&msp->ms_lock)); @@ -1413,20 +1413,17 @@ metaslab_load_wait(metaslab_t *msp) } } -int -metaslab_load(metaslab_t *msp) +static int +metaslab_load_impl(metaslab_t *msp) { int error = 0; - boolean_t success = B_FALSE; ASSERT(MUTEX_HELD(&msp->ms_lock)); - ASSERT(!msp->ms_loaded); - ASSERT(!msp->ms_loading); + ASSERT(msp->ms_loading); - msp->ms_loading = B_TRUE; /* * Nobody else can manipulate a loading metaslab, so it's now safe - * to drop the lock. This way we don't have to hold the lock while + * to drop the lock. This way we don't have to hold the lock while * reading the spacemap from disk. */ mutex_exit(&msp->ms_lock); @@ -1443,29 +1440,49 @@ metaslab_load(metaslab_t *msp) msp->ms_start, msp->ms_size); } - success = (error == 0); - mutex_enter(&msp->ms_lock); - msp->ms_loading = B_FALSE; - if (success) { - ASSERT3P(msp->ms_group, !=, NULL); - msp->ms_loaded = B_TRUE; + if (error != 0) + return (error); - /* - * If the metaslab already has a spacemap, then we need to - * remove all segments from the defer tree; otherwise, the - * metaslab is completely empty and we can skip this. - */ - if (msp->ms_sm != NULL) { - for (int t = 0; t < TXG_DEFER_SIZE; t++) { - range_tree_walk(msp->ms_defer[t], - range_tree_remove, msp->ms_allocatable); - } + ASSERT3P(msp->ms_group, !=, NULL); + msp->ms_loaded = B_TRUE; + + /* + * If the metaslab already has a spacemap, then we need to + * remove all segments from the defer tree; otherwise, the + * metaslab is completely empty and we can skip this. + */ + if (msp->ms_sm != NULL) { + for (int t = 0; t < TXG_DEFER_SIZE; t++) { + range_tree_walk(msp->ms_defer[t], + range_tree_remove, msp->ms_allocatable); } - msp->ms_max_size = metaslab_block_maxsize(msp); } + msp->ms_max_size = metaslab_block_maxsize(msp); + + return (0); +} + +int +metaslab_load(metaslab_t *msp) +{ + ASSERT(MUTEX_HELD(&msp->ms_lock)); + + /* + * There may be another thread loading the same metaslab, if that's + * the case just wait until the other thread is done and return. + */ + metaslab_load_wait(msp); + if (msp->ms_loaded) + return (0); + VERIFY(!msp->ms_loading); + + msp->ms_loading = B_TRUE; + int error = metaslab_load_impl(msp); + msp->ms_loading = B_FALSE; cv_broadcast(&msp->ms_load_cv); + return (error); } @@ -2041,13 +2058,10 @@ metaslab_activate(metaslab_t *msp, int allocator, uint64_t activation_weight) ASSERT(MUTEX_HELD(&msp->ms_lock)); if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) { - int error = 0; - metaslab_load_wait(msp); - if (!msp->ms_loaded) { - if ((error = metaslab_load(msp)) != 0) { - metaslab_group_sort(msp->ms_group, msp, 0); - return (error); - } + int error = metaslab_load(msp); + if (error != 0) { + metaslab_group_sort(msp->ms_group, msp, 0); + return (error); } if ((msp->ms_weight & METASLAB_ACTIVE_MASK) != 0) { /* @@ -2161,9 +2175,7 @@ metaslab_preload(void *arg) ASSERT(!MUTEX_HELD(&msp->ms_group->mg_lock)); mutex_enter(&msp->ms_lock); - metaslab_load_wait(msp); - if (!msp->ms_loaded) - (void) metaslab_load(msp); + (void) metaslab_load(msp); msp->ms_selected_txg = spa_syncing_txg(spa); mutex_exit(&msp->ms_lock); spl_fstrans_unmark(cookie); diff --git a/module/zfs/vdev_initialize.c b/module/zfs/vdev_initialize.c index 939a5e9389..e68f23e3f4 100644 --- a/module/zfs/vdev_initialize.c +++ b/module/zfs/vdev_initialize.c @@ -362,16 +362,6 @@ vdev_initialize_ranges(vdev_t *vd, abd_t *data) return (0); } -static void -vdev_initialize_ms_load(metaslab_t *msp) -{ - ASSERT(MUTEX_HELD(&msp->ms_lock)); - - metaslab_load_wait(msp); - if (!msp->ms_loaded) - VERIFY0(metaslab_load(msp)); -} - static void vdev_initialize_mg_wait(metaslab_group_t *mg) { @@ -494,7 +484,7 @@ vdev_initialize_calculate_progress(vdev_t *vd) * metaslab. Load it and walk the free tree for more accurate * progress estimation. */ - vdev_initialize_ms_load(msp); + VERIFY0(metaslab_load(msp)); for (range_seg_t *rs = avl_first(&msp->ms_allocatable->rt_root); rs; rs = AVL_NEXT(&msp->ms_allocatable->rt_root, rs)) { @@ -630,7 +620,7 @@ vdev_initialize_thread(void *arg) vdev_initialize_ms_mark(msp); mutex_enter(&msp->ms_lock); - vdev_initialize_ms_load(msp); + VERIFY0(metaslab_load(msp)); range_tree_walk(msp->ms_allocatable, vdev_initialize_range_add, vd);