ZIL: Fix config lock deadlock.
When we have some LWBs closed and their ZIOs ready to be issued, we can not afford sleeping on config lock if somebody else try to lock it as writer, or it will cause a deadlock. To solve it, move spa_config_enter() from zil_lwb_write_issue() to zil_lwb_write_close() under zl_issuer_lock to enforce lock ordering with other threads. Now if we can't immediately lock config, issue all previously closed LWBs so that they could drop their config locks after completion, and only then allow sleeping on our lock. Reviewed-by: Mark Maybee <mark.maybee@delphix.com> Reviewed-by: Prakash Surya <prakash.surya@delphix.com> Reviewed-by: George Wilson <george.wilson@delphix.com> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Closes #15078 Closes #15080
This commit is contained in:
parent
fb344f5aeb
commit
2cb992a99c
|
@ -151,6 +151,7 @@ static kmem_cache_t *zil_lwb_cache;
|
||||||
static kmem_cache_t *zil_zcw_cache;
|
static kmem_cache_t *zil_zcw_cache;
|
||||||
|
|
||||||
static void zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx);
|
static void zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx);
|
||||||
|
static void zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb);
|
||||||
static itx_t *zil_itx_clone(itx_t *oitx);
|
static itx_t *zil_itx_clone(itx_t *oitx);
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -1768,7 +1769,7 @@ static uint_t zil_maxblocksize = SPA_OLD_MAXBLOCKSIZE;
|
||||||
* Has to be called under zl_issuer_lock to chain more lwbs.
|
* Has to be called under zl_issuer_lock to chain more lwbs.
|
||||||
*/
|
*/
|
||||||
static lwb_t *
|
static lwb_t *
|
||||||
zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb)
|
zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, list_t *ilwbs)
|
||||||
{
|
{
|
||||||
lwb_t *nlwb = NULL;
|
lwb_t *nlwb = NULL;
|
||||||
zil_chain_t *zilc;
|
zil_chain_t *zilc;
|
||||||
|
@ -1870,6 +1871,27 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb)
|
||||||
|
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to acquire the config lock for the lwb to issue it later.
|
||||||
|
* However, if we already have a queue of closed parent lwbs already
|
||||||
|
* holding the config lock (but not yet issued), we can't block here
|
||||||
|
* waiting on the lock or we will deadlock. In that case we must
|
||||||
|
* first issue to parent IOs before waiting on the lock.
|
||||||
|
*/
|
||||||
|
if (ilwbs && !list_is_empty(ilwbs)) {
|
||||||
|
if (!spa_config_tryenter(spa, SCL_STATE, lwb, RW_READER)) {
|
||||||
|
lwb_t *tlwb;
|
||||||
|
while ((tlwb = list_remove_head(ilwbs)) != NULL)
|
||||||
|
zil_lwb_write_issue(zilog, tlwb);
|
||||||
|
spa_config_enter(spa, SCL_STATE, lwb, RW_READER);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
spa_config_enter(spa, SCL_STATE, lwb, RW_READER);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ilwbs)
|
||||||
|
list_insert_tail(ilwbs, lwb);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there was an allocation failure then nlwb will be null which
|
* If there was an allocation failure then nlwb will be null which
|
||||||
* forces a txg_wait_synced().
|
* forces a txg_wait_synced().
|
||||||
|
@ -1933,7 +1955,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
|
||||||
ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_alloc,
|
ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_alloc,
|
||||||
BP_GET_LSIZE(&lwb->lwb_blk));
|
BP_GET_LSIZE(&lwb->lwb_blk));
|
||||||
}
|
}
|
||||||
spa_config_enter(zilog->zl_spa, SCL_STATE, lwb, RW_READER);
|
ASSERT(spa_config_held(zilog->zl_spa, SCL_STATE, RW_READER));
|
||||||
zil_lwb_add_block(lwb, &lwb->lwb_blk);
|
zil_lwb_add_block(lwb, &lwb->lwb_blk);
|
||||||
lwb->lwb_issued_timestamp = gethrtime();
|
lwb->lwb_issued_timestamp = gethrtime();
|
||||||
zio_nowait(lwb->lwb_root_zio);
|
zio_nowait(lwb->lwb_root_zio);
|
||||||
|
@ -2037,8 +2059,7 @@ cont:
|
||||||
lwb_sp < zil_max_waste_space(zilog) &&
|
lwb_sp < zil_max_waste_space(zilog) &&
|
||||||
(dlen % max_log_data == 0 ||
|
(dlen % max_log_data == 0 ||
|
||||||
lwb_sp < reclen + dlen % max_log_data))) {
|
lwb_sp < reclen + dlen % max_log_data))) {
|
||||||
list_insert_tail(ilwbs, lwb);
|
lwb = zil_lwb_write_close(zilog, lwb, ilwbs);
|
||||||
lwb = zil_lwb_write_close(zilog, lwb);
|
|
||||||
if (lwb == NULL)
|
if (lwb == NULL)
|
||||||
return (NULL);
|
return (NULL);
|
||||||
zil_lwb_write_open(zilog, lwb);
|
zil_lwb_write_open(zilog, lwb);
|
||||||
|
@ -2937,8 +2958,7 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
|
||||||
zfs_commit_timeout_pct / 100;
|
zfs_commit_timeout_pct / 100;
|
||||||
if (sleep < zil_min_commit_timeout ||
|
if (sleep < zil_min_commit_timeout ||
|
||||||
lwb->lwb_sz - lwb->lwb_nused < lwb->lwb_sz / 8) {
|
lwb->lwb_sz - lwb->lwb_nused < lwb->lwb_sz / 8) {
|
||||||
list_insert_tail(ilwbs, lwb);
|
lwb = zil_lwb_write_close(zilog, lwb, ilwbs);
|
||||||
lwb = zil_lwb_write_close(zilog, lwb);
|
|
||||||
zilog->zl_cur_used = 0;
|
zilog->zl_cur_used = 0;
|
||||||
if (lwb == NULL) {
|
if (lwb == NULL) {
|
||||||
while ((lwb = list_remove_head(ilwbs))
|
while ((lwb = list_remove_head(ilwbs))
|
||||||
|
@ -3096,7 +3116,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
|
||||||
* since we've reached the commit waiter's timeout and it still
|
* since we've reached the commit waiter's timeout and it still
|
||||||
* hasn't been issued.
|
* hasn't been issued.
|
||||||
*/
|
*/
|
||||||
lwb_t *nlwb = zil_lwb_write_close(zilog, lwb);
|
lwb_t *nlwb = zil_lwb_write_close(zilog, lwb, NULL);
|
||||||
|
|
||||||
ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED);
|
ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue