From 687e4d7f9cadc17a4b8ebb67bc156b7b0a699192 Mon Sep 17 00:00:00 2001 From: Don Brady Date: Tue, 5 Dec 2023 15:27:56 -0700 Subject: [PATCH] Extend import_progress kstat with a notes field Detail the import progress of log spacemaps as they can take a very long time. Also grab the spa_note() messages to, as they provide insight into what is happening Sponsored-By: OpenDrives Inc. Sponsored-By: Klara Inc. Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Signed-off-by: Don Brady Co-authored-by: Allan Jude Closes #15539 --- include/sys/spa.h | 4 + module/zfs/spa.c | 41 +++++- module/zfs/spa_log_spacemap.c | 12 +- module/zfs/spa_misc.c | 74 +++++++++- tests/runfiles/common.run | 3 +- .../zpool_import/zpool_import_status.ksh | 132 ++++++++++++++++++ 6 files changed, 256 insertions(+), 10 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_status.ksh diff --git a/include/sys/spa.h b/include/sys/spa.h index cef7933df4..08099cd4fa 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -971,6 +971,10 @@ extern int spa_import_progress_set_max_txg(uint64_t pool_guid, uint64_t max_txg); extern int spa_import_progress_set_state(uint64_t pool_guid, spa_load_state_t spa_load_state); +extern void spa_import_progress_set_notes(spa_t *spa, + const char *fmt, ...) __printflike(2, 3); +extern void spa_import_progress_set_notes_nolog(spa_t *spa, + const char *fmt, ...) __printflike(2, 3); /* Pool configuration locks */ extern int spa_config_tryenter(spa_t *spa, int locks, const void *tag, diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 5161326c03..2ca5e7bac1 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -3109,6 +3109,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type) spa->spa_load_state = state; (void) spa_import_progress_set_state(spa_guid(spa), spa_load_state(spa)); + spa_import_progress_set_notes(spa, "spa_load()"); gethrestime(&spa->spa_loaded_ts); error = spa_load_impl(spa, type, &ereport); @@ -3337,7 +3338,7 @@ spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config) uint64_t mmp_config = ub->ub_mmp_config; uint16_t mmp_seq = MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0; uint64_t import_delay; - hrtime_t import_expire; + hrtime_t import_expire, now; nvlist_t *mmp_label = NULL; vdev_t *rvd = spa->spa_root_vdev; kcondvar_t cv; @@ -3375,7 +3376,17 @@ spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config) import_expire = gethrtime() + import_delay; - while (gethrtime() < import_expire) { + spa_import_progress_set_notes(spa, "Checking MMP activity, waiting " + "%llu ms", (u_longlong_t)NSEC2MSEC(import_delay)); + + int interations = 0; + while ((now = gethrtime()) < import_expire) { + if (interations++ % 30 == 0) { + spa_import_progress_set_notes(spa, "Checking MMP " + "activity, %llu ms remaining", + (u_longlong_t)NSEC2MSEC(import_expire - now)); + } + (void) spa_import_progress_set_mmp_check(spa_guid(spa), NSEC2SEC(import_expire - gethrtime())); @@ -4995,6 +5006,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) /* * Retrieve the checkpoint txg if the pool has a checkpoint. */ + spa_import_progress_set_notes(spa, "Loading checkpoint txg"); error = spa_ld_read_checkpoint_txg(spa); if (error != 0) return (error); @@ -5007,6 +5019,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * initiated. Otherwise we could be reading from indirect vdevs before * we have loaded their mappings. */ + spa_import_progress_set_notes(spa, "Loading indirect vdev metadata"); error = spa_ld_open_indirect_vdev_metadata(spa); if (error != 0) return (error); @@ -5015,6 +5028,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Retrieve the full list of active features from the MOS and check if * they are all supported. */ + spa_import_progress_set_notes(spa, "Checking feature flags"); error = spa_ld_check_features(spa, &missing_feat_write); if (error != 0) return (error); @@ -5023,6 +5037,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Load several special directories from the MOS needed by the dsl_pool * layer. */ + spa_import_progress_set_notes(spa, "Loading special MOS directories"); error = spa_ld_load_special_directories(spa); if (error != 0) return (error); @@ -5030,6 +5045,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) /* * Retrieve pool properties from the MOS. */ + spa_import_progress_set_notes(spa, "Loading properties"); error = spa_ld_get_props(spa); if (error != 0) return (error); @@ -5038,6 +5054,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Retrieve the list of auxiliary devices - cache devices and spares - * and open them. */ + spa_import_progress_set_notes(spa, "Loading AUX vdevs"); error = spa_ld_open_aux_vdevs(spa, type); if (error != 0) return (error); @@ -5046,14 +5063,17 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Load the metadata for all vdevs. Also check if unopenable devices * should be autoreplaced. */ + spa_import_progress_set_notes(spa, "Loading vdev metadata"); error = spa_ld_load_vdev_metadata(spa); if (error != 0) return (error); + spa_import_progress_set_notes(spa, "Loading dedup tables"); error = spa_ld_load_dedup_tables(spa); if (error != 0) return (error); + spa_import_progress_set_notes(spa, "Loading BRT"); error = spa_ld_load_brt(spa); if (error != 0) return (error); @@ -5062,6 +5082,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Verify the logs now to make sure we don't have any unexpected errors * when we claim log blocks later. */ + spa_import_progress_set_notes(spa, "Verifying Log Devices"); error = spa_ld_verify_logs(spa, type, ereport); if (error != 0) return (error); @@ -5083,6 +5104,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * state. When performing an extreme rewind, we verify the whole pool, * which can take a very long time. */ + spa_import_progress_set_notes(spa, "Verifying pool data"); error = spa_ld_verify_pool_data(spa); if (error != 0) return (error); @@ -5092,6 +5114,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * we write anything to the pool because we'd need to update the space * accounting using the deflated sizes. */ + spa_import_progress_set_notes(spa, "Calculating deflated space"); spa_update_dspace(spa); /* @@ -5099,6 +5122,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * pool. If we are importing the pool in read-write mode, a few * additional steps must be performed to finish the import. */ + spa_import_progress_set_notes(spa, "Starting import"); if (spa_writeable(spa) && (spa->spa_load_state == SPA_LOAD_RECOVER || spa->spa_load_max_txg == UINT64_MAX)) { uint64_t config_cache_txg = spa->spa_config_txg; @@ -5122,6 +5146,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) (u_longlong_t)spa->spa_uberblock.ub_checkpoint_txg); } + spa_import_progress_set_notes(spa, "Claiming ZIL blocks"); /* * Traverse the ZIL and claim all blocks. */ @@ -5141,6 +5166,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * will have been set for us by ZIL traversal operations * performed above. */ + spa_import_progress_set_notes(spa, "Syncing ZIL claims"); txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg); /* @@ -5148,6 +5174,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * next sync, we would update the config stored in vdev labels * and the cachefile (by default /etc/zfs/zpool.cache). */ + spa_import_progress_set_notes(spa, "Updating configs"); spa_ld_check_for_config_update(spa, config_cache_txg, update_config_cache); @@ -5156,6 +5183,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Then check all DTLs to see if anything needs resilvering. * The resilver will be deferred if a rebuild was started. */ + spa_import_progress_set_notes(spa, "Starting resilvers"); if (vdev_rebuild_active(spa->spa_root_vdev)) { vdev_rebuild_restart(spa); } else if (!dsl_scan_resilvering(spa->spa_dsl_pool) && @@ -5169,6 +5197,8 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) */ spa_history_log_version(spa, "open", NULL); + spa_import_progress_set_notes(spa, + "Restarting device removals"); spa_restart_removal(spa); spa_spawn_aux_threads(spa); @@ -5181,19 +5211,26 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * auxiliary threads above (from which the livelist * deletion zthr is part of). */ + spa_import_progress_set_notes(spa, + "Cleaning up inconsistent objsets"); (void) dmu_objset_find(spa_name(spa), dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); /* * Clean up any stale temporary dataset userrefs. */ + spa_import_progress_set_notes(spa, + "Cleaning up temporary userrefs"); dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + spa_import_progress_set_notes(spa, "Restarting initialize"); vdev_initialize_restart(spa->spa_root_vdev); + spa_import_progress_set_notes(spa, "Restarting TRIM"); vdev_trim_restart(spa->spa_root_vdev); vdev_autotrim_restart(spa); spa_config_exit(spa, SCL_CONFIG, FTAG); + spa_import_progress_set_notes(spa, "Finished importing"); } spa_import_progress_remove(spa_guid(spa)); diff --git a/module/zfs/spa_log_spacemap.c b/module/zfs/spa_log_spacemap.c index cf05158b63..873089a53e 100644 --- a/module/zfs/spa_log_spacemap.c +++ b/module/zfs/spa_log_spacemap.c @@ -1153,6 +1153,7 @@ spa_ld_log_sm_data(spa_t *spa) uint_t pn = 0; uint64_t ps = 0; + uint64_t nsm = 0; psls = sls = avl_first(&spa->spa_sm_logs_by_txg); while (sls != NULL) { /* Prefetch log spacemaps up to 16 TXGs or MBs ahead. */ @@ -1185,6 +1186,10 @@ spa_ld_log_sm_data(spa_t *spa) summary_add_data(spa, sls->sls_txg, sls->sls_mscount, 0, sls->sls_nblocks); + spa_import_progress_set_notes_nolog(spa, + "Read %llu of %lu log space maps", (u_longlong_t)nsm, + avl_numnodes(&spa->spa_sm_logs_by_txg)); + struct spa_ld_log_sm_arg vla = { .slls_spa = spa, .slls_txg = sls->sls_txg @@ -1200,6 +1205,7 @@ spa_ld_log_sm_data(spa_t *spa) pn--; ps -= space_map_length(sls->sls_sm); + nsm++; space_map_close(sls->sls_sm); sls->sls_sm = NULL; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls); @@ -1210,11 +1216,11 @@ spa_ld_log_sm_data(spa_t *spa) hrtime_t read_logs_endtime = gethrtime(); spa_load_note(spa, - "read %llu log space maps (%llu total blocks - blksz = %llu bytes) " - "in %lld ms", (u_longlong_t)avl_numnodes(&spa->spa_sm_logs_by_txg), + "Read %lu log space maps (%llu total blocks - blksz = %llu bytes) " + "in %lld ms", avl_numnodes(&spa->spa_sm_logs_by_txg), (u_longlong_t)spa_log_sm_nblocks(spa), (u_longlong_t)zfs_log_sm_blksz, - (longlong_t)((read_logs_endtime - read_logs_starttime) / 1000000)); + (longlong_t)NSEC2MSEC(read_logs_endtime - read_logs_starttime)); out: if (error != 0) { diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 3990af98c7..1e5ab59eb4 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -426,6 +426,8 @@ spa_load_note(spa_t *spa, const char *fmt, ...) zfs_dbgmsg("spa_load(%s, config %s): %s", spa->spa_name, spa->spa_trust_config ? "trusted" : "untrusted", buf); + + spa_import_progress_set_notes_nolog(spa, "%s", buf); } /* @@ -2184,6 +2186,7 @@ typedef struct spa_import_progress { uint64_t pool_guid; /* unique id for updates */ char *pool_name; spa_load_state_t spa_load_state; + char *spa_load_notes; uint64_t mmp_sec_remaining; /* MMP activity check */ uint64_t spa_load_max_txg; /* rewind txg */ procfs_list_node_t smh_node; @@ -2194,9 +2197,9 @@ spa_history_list_t *spa_import_progress_list = NULL; static int spa_import_progress_show_header(struct seq_file *f) { - seq_printf(f, "%-20s %-14s %-14s %-12s %s\n", "pool_guid", + seq_printf(f, "%-20s %-14s %-14s %-12s %-16s %s\n", "pool_guid", "load_state", "multihost_secs", "max_txg", - "pool_name"); + "pool_name", "notes"); return (0); } @@ -2205,11 +2208,12 @@ spa_import_progress_show(struct seq_file *f, void *data) { spa_import_progress_t *sip = (spa_import_progress_t *)data; - seq_printf(f, "%-20llu %-14llu %-14llu %-12llu %s\n", + seq_printf(f, "%-20llu %-14llu %-14llu %-12llu %-16s %s\n", (u_longlong_t)sip->pool_guid, (u_longlong_t)sip->spa_load_state, (u_longlong_t)sip->mmp_sec_remaining, (u_longlong_t)sip->spa_load_max_txg, - (sip->pool_name ? sip->pool_name : "-")); + (sip->pool_name ? sip->pool_name : "-"), + (sip->spa_load_notes ? sip->spa_load_notes : "-")); return (0); } @@ -2223,6 +2227,8 @@ spa_import_progress_truncate(spa_history_list_t *shl, unsigned int size) sip = list_remove_head(&shl->procfs_list.pl_list); if (sip->pool_name) spa_strfree(sip->pool_name); + if (sip->spa_load_notes) + kmem_strfree(sip->spa_load_notes); kmem_free(sip, sizeof (spa_import_progress_t)); shl->size--; } @@ -2278,6 +2284,10 @@ spa_import_progress_set_state(uint64_t pool_guid, sip = list_prev(&shl->procfs_list.pl_list, sip)) { if (sip->pool_guid == pool_guid) { sip->spa_load_state = load_state; + if (sip->spa_load_notes != NULL) { + kmem_strfree(sip->spa_load_notes); + sip->spa_load_notes = NULL; + } error = 0; break; } @@ -2287,6 +2297,59 @@ spa_import_progress_set_state(uint64_t pool_guid, return (error); } +static void +spa_import_progress_set_notes_impl(spa_t *spa, boolean_t log_dbgmsg, + const char *fmt, va_list adx) +{ + spa_history_list_t *shl = spa_import_progress_list; + spa_import_progress_t *sip; + uint64_t pool_guid = spa_guid(spa); + + if (shl->size == 0) + return; + + char *notes = kmem_vasprintf(fmt, adx); + + mutex_enter(&shl->procfs_list.pl_lock); + for (sip = list_tail(&shl->procfs_list.pl_list); sip != NULL; + sip = list_prev(&shl->procfs_list.pl_list, sip)) { + if (sip->pool_guid == pool_guid) { + if (sip->spa_load_notes != NULL) { + kmem_strfree(sip->spa_load_notes); + sip->spa_load_notes = NULL; + } + sip->spa_load_notes = notes; + if (log_dbgmsg) + zfs_dbgmsg("'%s' %s", sip->pool_name, notes); + notes = NULL; + break; + } + } + mutex_exit(&shl->procfs_list.pl_lock); + if (notes != NULL) + kmem_strfree(notes); +} + +void +spa_import_progress_set_notes(spa_t *spa, const char *fmt, ...) +{ + va_list adx; + + va_start(adx, fmt); + spa_import_progress_set_notes_impl(spa, B_TRUE, fmt, adx); + va_end(adx); +} + +void +spa_import_progress_set_notes_nolog(spa_t *spa, const char *fmt, ...) +{ + va_list adx; + + va_start(adx, fmt); + spa_import_progress_set_notes_impl(spa, B_FALSE, fmt, adx); + va_end(adx); +} + int spa_import_progress_set_max_txg(uint64_t pool_guid, uint64_t load_max_txg) { @@ -2355,6 +2418,7 @@ spa_import_progress_add(spa_t *spa) poolname = spa_name(spa); sip->pool_name = spa_strdup(poolname); sip->spa_load_state = spa_load_state(spa); + sip->spa_load_notes = NULL; mutex_enter(&shl->procfs_list.pl_lock); procfs_list_add(&shl->procfs_list, sip); @@ -2374,6 +2438,8 @@ spa_import_progress_remove(uint64_t pool_guid) if (sip->pool_guid == pool_guid) { if (sip->pool_name) spa_strfree(sip->pool_name); + if (sip->spa_load_notes) + spa_strfree(sip->spa_load_notes); list_remove(&shl->procfs_list.pl_list, sip); shl->size--; kmem_free(sip, sizeof (spa_import_progress_t)); diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 20c9b823c6..dd44b63aaf 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -425,7 +425,8 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos', 'import_devices_missing', 'import_log_missing', 'import_paths_changed', 'import_rewind_config_changed', - 'import_rewind_device_replaced'] + 'import_rewind_device_replaced', + 'zpool_import_status'] tags = ['functional', 'cli_root', 'zpool_import'] timeout = 1200 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_status.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_status.ksh new file mode 100755 index 0000000000..c96961bf64 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_status.ksh @@ -0,0 +1,132 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2023 Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg + +# +# DESCRIPTION: +# During a pool import, the 'import_progress' kstat contains details +# on the import progress. +# +# STRATEGY: +# 1. Create test pool with several devices +# 2. Generate some ZIL records and spacemap logs +# 3. Export the pool +# 4. Import the pool in the background and monitor the kstat content +# 5. Check the zfs debug messages for import progress +# + +verify_runnable "global" + +function cleanup +{ + log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 0 + log_must set_tunable64 METASLAB_DEBUG_LOAD 0 + + destroy_pool $TESTPOOL1 +} + +log_assert "During a pool import, the 'import_progress' kstat contains " \ + "notes on the progress" + +log_onexit cleanup + +log_must zpool create $TESTPOOL1 $VDEV0 $VDEV1 $VDEV2 +typeset guid=$(zpool get -H -o value guid $TESTPOOL1) + +log_must zfs create -o recordsize=8k $TESTPOOL1/fs +# +# This dd command works around an issue where ZIL records aren't created +# after freezing the pool unless a ZIL header already exists. Create a file +# synchronously to force ZFS to write one out. +# +log_must dd if=/dev/zero of=/$TESTPOOL1/fs/sync conv=fsync bs=1 count=1 + +# +# Overwrite some blocks to populate spacemap logs +# +log_must dd if=/dev/urandom of=/$TESTPOOL1/fs/00 bs=1M count=200 +sync_all_pools +log_must dd if=/dev/urandom of=/$TESTPOOL1/fs/00 bs=1M count=200 +sync_all_pools + +# +# Freeze the pool to retain intent log records +# +log_must zpool freeze $TESTPOOL1 + +# fill_fs [destdir] [dirnum] [filenum] [bytes] [num_writes] [data] +log_must fill_fs /$TESTPOOL1/fs 1 2000 100 1024 R + +log_must zpool list -v $TESTPOOL1 + +# +# Unmount filesystem and export the pool +# +# At this stage the zfs intent log contains +# a set of records to replay. +# +log_must zfs unmount /$TESTPOOL1/fs + +log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 1 +log_must zpool export $TESTPOOL1 + +log_must set_tunable64 METASLAB_DEBUG_LOAD 1 +log_note "Starting zpool import in background at" $(date +'%H:%M:%S') +zpool import -d $DEVICE_DIR -f $guid & +pid=$! + +# +# capture progress until import is finished +# +log_note waiting for pid $pid to exit +kstat import_progress +while [[ -d /proc/"$pid" ]]; do + line=$(kstat import_progress | grep -v pool_guid) + if [[ -n $line ]]; then + echo $line + fi + if [[ -f /$TESTPOOL1/fs/00 ]]; then + break; + fi + sleep 0.0001 +done +log_note "zpool import completed at" $(date +'%H:%M:%S') + +entries=$(kstat dbgmsg | grep "spa_import_progress_set_notes_impl(): 'testpool1'" | wc -l) +log_note "found $entries progress notes in dbgmsg" +log_must test $entries -gt 20 + +log_must zpool status $TESTPOOL1 + +log_pass "During a pool import, the 'import_progress' kstat contains " \ + "notes on the progress"