From 55b1842f92a24fe7192d129bca7b60882080d31a Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Fri, 9 Jun 2023 13:08:05 -0400 Subject: [PATCH] ZIL: Fix race introduced by f63811f0721. We are not allowed to access lwb after setting LWB_STATE_FLUSH_DONE state and dropping zl_lock, since it may be freed by zil_sync(). To free itxs and waiters after dropping the lock we need to move lwb_itxs and lwb_waiters lists elements to local storage. Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #14957 Closes #14959 --- module/zfs/zil.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 8672a61387..8c1fe5f668 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -1393,9 +1393,14 @@ zil_lwb_flush_vdevs_done(zio_t *zio) zil_commit_waiter_t *zcw; itx_t *itx; uint64_t txg; + list_t itxs, waiters; spa_config_exit(zilog->zl_spa, SCL_STATE, lwb); + list_create(&itxs, sizeof (itx_t), offsetof(itx_t, itx_node)); + list_create(&waiters, sizeof (zil_commit_waiter_t), + offsetof(zil_commit_waiter_t, zcw_node)); + hrtime_t t = gethrtime() - lwb->lwb_issued_timestamp; mutex_enter(&zilog->zl_lock); @@ -1404,9 +1409,6 @@ zil_lwb_flush_vdevs_done(zio_t *zio) lwb->lwb_root_zio = NULL; - ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE); - lwb->lwb_state = LWB_STATE_FLUSH_DONE; - if (zilog->zl_last_lwb_opened == lwb) { /* * Remember the highest committed log sequence number @@ -1417,15 +1419,21 @@ zil_lwb_flush_vdevs_done(zio_t *zio) zilog->zl_commit_lr_seq = zilog->zl_lr_seq; } + list_move_tail(&itxs, &lwb->lwb_itxs); + list_move_tail(&waiters, &lwb->lwb_waiters); + + ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE); + lwb->lwb_state = LWB_STATE_FLUSH_DONE; + mutex_exit(&zilog->zl_lock); - while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL) + while ((itx = list_remove_head(&itxs)) != NULL) zil_itx_destroy(itx); + list_destroy(&itxs); - while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) { + while ((zcw = list_remove_head(&waiters)) != NULL) { mutex_enter(&zcw->zcw_lock); - ASSERT3P(zcw->zcw_lwb, ==, lwb); zcw->zcw_lwb = NULL; /* * We expect any ZIO errors from child ZIOs to have been @@ -1450,6 +1458,7 @@ zil_lwb_flush_vdevs_done(zio_t *zio) mutex_exit(&zcw->zcw_lock); } + list_destroy(&waiters); mutex_enter(&zilog->zl_lwb_io_lock); txg = lwb->lwb_issued_txg;