From e197bb24f1857c823b44c2175b2318c472d79731 Mon Sep 17 00:00:00 2001 From: Alan Somers Date: Tue, 8 Nov 2022 13:38:08 -0700 Subject: [PATCH] Optionally skip zil_close during zvol_create_minor_impl If there were no zil entries to replay, skip zil_close. zil_close waits for a transaction to sync. That can take several seconds, for example during pool import of a resilvering pool. Skipping zil_close can cut the time for "zpool import" from 2 hours to 45 seconds on a resilvering pool with a thousand zvols. Reviewed-by: Richard Yao Reviewed-by: Alexander Motin Reviewed-by: Ryan Moeller Sponsored-by: Axcient Closes #13999 Closes #14015 --- include/sys/zil.h | 4 ++-- module/os/freebsd/zfs/zvol_os.c | 8 +++++--- module/os/linux/zfs/zvol_os.c | 8 +++++--- module/zfs/zil.c | 15 ++++++++++----- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/include/sys/zil.h b/include/sys/zil.h index 9591fb4f64..9ac4210433 100644 --- a/include/sys/zil.h +++ b/include/sys/zil.h @@ -539,10 +539,10 @@ extern zilog_t *zil_open(objset_t *os, zil_get_data_t *get_data, zil_sums_t *zil_sums); extern void zil_close(zilog_t *zilog); -extern void zil_replay(objset_t *os, void *arg, +extern boolean_t zil_replay(objset_t *os, void *arg, zil_replay_func_t *const replay_func[TX_MAX_TYPE]); extern boolean_t zil_replaying(zilog_t *zilog, dmu_tx_t *tx); -extern void zil_destroy(zilog_t *zilog, boolean_t keep_first); +extern boolean_t zil_destroy(zilog_t *zilog, boolean_t keep_first); extern void zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx); extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize); diff --git a/module/os/freebsd/zfs/zvol_os.c b/module/os/freebsd/zfs/zvol_os.c index 8d2a6d7762..631e020db9 100644 --- a/module/os/freebsd/zfs/zvol_os.c +++ b/module/os/freebsd/zfs/zvol_os.c @@ -1386,6 +1386,7 @@ zvol_os_create_minor(const char *name) uint64_t volsize; uint64_t volmode, hash; int error; + bool replayed_zil = B_FALSE; ZFS_LOG(1, "Creating ZVOL %s...", name); hash = zvol_name_hash(name); @@ -1490,11 +1491,12 @@ zvol_os_create_minor(const char *name) zv->zv_zilog = zil_open(os, zvol_get_data, &zv->zv_kstat.dk_zil_sums); if (spa_writeable(dmu_objset_spa(os))) { if (zil_replay_disable) - zil_destroy(zv->zv_zilog, B_FALSE); + replayed_zil = zil_destroy(zv->zv_zilog, B_FALSE); else - zil_replay(os, zv, zvol_replay_vector); + replayed_zil = zil_replay(os, zv, zvol_replay_vector); } - zil_close(zv->zv_zilog); + if (replayed_zil) + zil_close(zv->zv_zilog); zv->zv_zilog = NULL; /* TODO: prefetch for geom tasting */ diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c index d76bab3c01..01e6456207 100644 --- a/module/os/linux/zfs/zvol_os.c +++ b/module/os/linux/zfs/zvol_os.c @@ -1279,6 +1279,7 @@ zvol_os_create_minor(const char *name) int error = 0; int idx; uint64_t hash = zvol_name_hash(name); + bool replayed_zil = B_FALSE; if (zvol_inhibit_dev) return (0); @@ -1420,11 +1421,12 @@ zvol_os_create_minor(const char *name) zv->zv_zilog = zil_open(os, zvol_get_data, &zv->zv_kstat.dk_zil_sums); if (spa_writeable(dmu_objset_spa(os))) { if (zil_replay_disable) - zil_destroy(zv->zv_zilog, B_FALSE); + replayed_zil = zil_destroy(zv->zv_zilog, B_FALSE); else - zil_replay(os, zv, zvol_replay_vector); + replayed_zil = zil_replay(os, zv, zvol_replay_vector); } - zil_close(zv->zv_zilog); + if (replayed_zil) + zil_close(zv->zv_zilog); zv->zv_zilog = NULL; /* diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 59c7595f6d..02e6f4b83b 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -887,8 +887,9 @@ zil_create(zilog_t *zilog) * txg_wait_synced() here either when keep_first is set, because both * zil_create() and zil_destroy() will wait for any in-progress destroys * to complete. + * Return B_TRUE if there were any entries to replay. */ -void +boolean_t zil_destroy(zilog_t *zilog, boolean_t keep_first) { const zil_header_t *zh = zilog->zl_header; @@ -904,7 +905,7 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first) zilog->zl_old_header = *zh; /* debugging aid */ if (BP_IS_HOLE(&zh->zh_log)) - return; + return (B_FALSE); tx = dmu_tx_create(zilog->zl_os); VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); @@ -937,6 +938,8 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first) mutex_exit(&zilog->zl_lock); dmu_tx_commit(tx); + + return (B_TRUE); } void @@ -3849,8 +3852,9 @@ zil_incr_blks(zilog_t *zilog, const blkptr_t *bp, void *arg, uint64_t claim_txg) /* * If this dataset has a non-empty intent log, replay it and destroy it. + * Return B_TRUE if there were any entries to replay. */ -void +boolean_t zil_replay(objset_t *os, void *arg, zil_replay_func_t *const replay_func[TX_MAX_TYPE]) { @@ -3859,8 +3863,7 @@ zil_replay(objset_t *os, void *arg, zil_replay_arg_t zr; if ((zh->zh_flags & ZIL_REPLAY_NEEDED) == 0) { - zil_destroy(zilog, B_TRUE); - return; + return (zil_destroy(zilog, B_TRUE)); } zr.zr_replay = replay_func; @@ -3883,6 +3886,8 @@ zil_replay(objset_t *os, void *arg, zil_destroy(zilog, B_FALSE); txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); zilog->zl_replay = B_FALSE; + + return (B_TRUE); } boolean_t