zil_fail: set fail state as early as possible

zil_failed() is called unlocked, and itxg_failed is only checked with
itxg_lock held. This was making the ZIL appear to be not failed even as
zil_fail() was in progress, scanning the itx lists. With zil_failed()
returning false, zil_commit() would continue to zil_commit_impl() and
also start processing itx lists, racing each other for locks.

So instead, set the fail state as early as possible, before we start
processing the itx lists. This won't stop new itxs arriving on the itxgs
proper, but it will avoid additional commit itxs being created and will
stop any attempts to collect and commit them.

(cherry picked from commit 17579a79a2b481e746879d5a033626754931441e)
This commit is contained in:
Rob Norris 2023-07-25 19:59:07 +10:00 committed by Geoff Amey
parent 43f45f8df0
commit 88bb9a3add
1 changed files with 17 additions and 8 deletions

View File

@ -1118,6 +1118,23 @@ zil_fail(zilog_t *zilog)
*/ */
uint64_t highest_txg = last_synced_txg; uint64_t highest_txg = last_synced_txg;
/*
* At the earliest opportunity, set all the failure state. We need
* zil_failed() in particular to return true so that zil_commit() does
* not proceed to itx processing, since we're about to start that
* ourselves.
*/
zilog->zl_unfail_txg = highest_txg + 1;
for (int i = 0; i < TXG_SIZE; i++) {
itxg_t *itxg = &zilog->zl_itxg[i];
mutex_enter(&itxg->itxg_lock);
ASSERT(!itxg->itxg_failed);
itxg->itxg_failed = B_TRUE;
mutex_exit(&itxg->itxg_lock);
}
ASSERT3U(fail_itxg->itxg_txg, ==, 0); ASSERT3U(fail_itxg->itxg_txg, ==, 0);
ASSERT3P(fail_itxg->itxg_itxs, ==, NULL); ASSERT3P(fail_itxg->itxg_itxs, ==, NULL);
@ -1232,14 +1249,6 @@ zil_fail(zilog_t *zilog)
itxg_t *itxg = &zilog->zl_itxg[i]; itxg_t *itxg = &zilog->zl_itxg[i];
mutex_enter(&itxg->itxg_lock); mutex_enter(&itxg->itxg_lock);
ASSERT(!itxg->itxg_failed);
/*
* Flag itxgs as failed. Most itxg users (eg zil_itx_assign())
* take itxg_lock but not zl_lock, to avoid contention. They
* need a cheap way to test for failure; this is it.
*/
itxg->itxg_failed = B_TRUE;
if (itxg->itxg_txg == 0) { if (itxg->itxg_txg == 0) {
/* Previously cleaned itxg, nothing to do. */ /* Previously cleaned itxg, nothing to do. */