zil_fail: set fail state as early as possible
zil_failed() is called unlocked, and itxg_failed is only checked with itxg_lock held. This was making the ZIL appear to be not failed even as zil_fail() was in progress, scanning the itx lists. With zil_failed() returning false, zil_commit() would continue to zil_commit_impl() and also start processing itx lists, racing each other for locks. So instead, set the fail state as early as possible, before we start processing the itx lists. This won't stop new itxs arriving on the itxgs proper, but it will avoid additional commit itxs being created and will stop any attempts to collect and commit them. (cherry picked from commit 17579a79a2b481e746879d5a033626754931441e)
This commit is contained in:
parent
43f45f8df0
commit
88bb9a3add
|
@ -1118,6 +1118,23 @@ zil_fail(zilog_t *zilog)
|
||||||
*/
|
*/
|
||||||
uint64_t highest_txg = last_synced_txg;
|
uint64_t highest_txg = last_synced_txg;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* At the earliest opportunity, set all the failure state. We need
|
||||||
|
* zil_failed() in particular to return true so that zil_commit() does
|
||||||
|
* not proceed to itx processing, since we're about to start that
|
||||||
|
* ourselves.
|
||||||
|
*/
|
||||||
|
zilog->zl_unfail_txg = highest_txg + 1;
|
||||||
|
|
||||||
|
for (int i = 0; i < TXG_SIZE; i++) {
|
||||||
|
itxg_t *itxg = &zilog->zl_itxg[i];
|
||||||
|
|
||||||
|
mutex_enter(&itxg->itxg_lock);
|
||||||
|
ASSERT(!itxg->itxg_failed);
|
||||||
|
itxg->itxg_failed = B_TRUE;
|
||||||
|
mutex_exit(&itxg->itxg_lock);
|
||||||
|
}
|
||||||
|
|
||||||
ASSERT3U(fail_itxg->itxg_txg, ==, 0);
|
ASSERT3U(fail_itxg->itxg_txg, ==, 0);
|
||||||
ASSERT3P(fail_itxg->itxg_itxs, ==, NULL);
|
ASSERT3P(fail_itxg->itxg_itxs, ==, NULL);
|
||||||
|
|
||||||
|
@ -1232,14 +1249,6 @@ zil_fail(zilog_t *zilog)
|
||||||
itxg_t *itxg = &zilog->zl_itxg[i];
|
itxg_t *itxg = &zilog->zl_itxg[i];
|
||||||
|
|
||||||
mutex_enter(&itxg->itxg_lock);
|
mutex_enter(&itxg->itxg_lock);
|
||||||
ASSERT(!itxg->itxg_failed);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Flag itxgs as failed. Most itxg users (eg zil_itx_assign())
|
|
||||||
* take itxg_lock but not zl_lock, to avoid contention. They
|
|
||||||
* need a cheap way to test for failure; this is it.
|
|
||||||
*/
|
|
||||||
itxg->itxg_failed = B_TRUE;
|
|
||||||
|
|
||||||
if (itxg->itxg_txg == 0) {
|
if (itxg->itxg_txg == 0) {
|
||||||
/* Previously cleaned itxg, nothing to do. */
|
/* Previously cleaned itxg, nothing to do. */
|
||||||
|
|
Loading…
Reference in New Issue