From 88bb9a3adda0d880f388dd934653a5e4365cf4bd Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Tue, 25 Jul 2023 19:59:07 +1000 Subject: [PATCH] zil_fail: set fail state as early as possible zil_failed() is called unlocked, and itxg_failed is only checked with itxg_lock held. This was making the ZIL appear to be not failed even as zil_fail() was in progress, scanning the itx lists. With zil_failed() returning false, zil_commit() would continue to zil_commit_impl() and also start processing itx lists, racing each other for locks. So instead, set the fail state as early as possible, before we start processing the itx lists. This won't stop new itxs arriving on the itxgs proper, but it will avoid additional commit itxs being created and will stop any attempts to collect and commit them. (cherry picked from commit 17579a79a2b481e746879d5a033626754931441e) --- module/zfs/zil.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/module/zfs/zil.c b/module/zfs/zil.c index f1f6013de2..52272187c7 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -1118,6 +1118,23 @@ zil_fail(zilog_t *zilog) */ uint64_t highest_txg = last_synced_txg; + /* + * At the earliest opportunity, set all the failure state. We need + * zil_failed() in particular to return true so that zil_commit() does + * not proceed to itx processing, since we're about to start that + * ourselves. + */ + zilog->zl_unfail_txg = highest_txg + 1; + + for (int i = 0; i < TXG_SIZE; i++) { + itxg_t *itxg = &zilog->zl_itxg[i]; + + mutex_enter(&itxg->itxg_lock); + ASSERT(!itxg->itxg_failed); + itxg->itxg_failed = B_TRUE; + mutex_exit(&itxg->itxg_lock); + } + ASSERT3U(fail_itxg->itxg_txg, ==, 0); ASSERT3P(fail_itxg->itxg_itxs, ==, NULL); @@ -1232,14 +1249,6 @@ zil_fail(zilog_t *zilog) itxg_t *itxg = &zilog->zl_itxg[i]; mutex_enter(&itxg->itxg_lock); - ASSERT(!itxg->itxg_failed); - - /* - * Flag itxgs as failed. Most itxg users (eg zil_itx_assign()) - * take itxg_lock but not zl_lock, to avoid contention. They - * need a cheap way to test for failure; this is it. - */ - itxg->itxg_failed = B_TRUE; if (itxg->itxg_txg == 0) { /* Previously cleaned itxg, nothing to do. */