From 589cea17a910f24c1382e44fc35bdc483f0fa8a1 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Mon, 10 Apr 2023 20:02:51 +1000 Subject: [PATCH] dmu_tx_wait: handle pool suspension when failmode=continue Let txg_wait_synced_tx fail, so the caller can retry. Signed-off-by: Rob Norris (cherry picked from commit d560d64dbdf853d8fb9e18fc7570bd309091b2e4) --- module/zfs/dmu_tx.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index 907c488977..80ba8d21f2 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -1192,8 +1192,19 @@ dmu_tx_wait_flags(dmu_tx_t *tx, txg_wait_flag_t flags) void dmu_tx_wait(dmu_tx_t *tx) { - - return (dmu_tx_wait_flags(tx, TXG_WAIT_F_NONE)); + /* + * If we're in a non-blocking failmode, we call dmu_tx_wait_flags() with + * NOSUSPEND to ensure that if we end up in txg_wait_synced_tx(), we + * don't we don't get stuck there. + * + * If the pool does suspend and we're in failmode=continue, the caller + * will call dmu_tx_abort() and then try again. Eventually, it'll land + * back in dmu_tx_assign(NOWAIT), which will return EIO, and the caller + * will enter its error path. + */ + (void) dmu_tx_wait_flags(tx, + (spa_get_failmode(tx->tx_pool->dp_spa) == ZIO_FAILURE_MODE_CONTINUE) + ? TXG_WAIT_F_NOSUSPEND : 0); } static void