From e6b28efccc4853b4f2715c2d37ab05cead2e1c3e Mon Sep 17 00:00:00 2001 From: John Poduska Date: Thu, 12 Mar 2020 13:25:56 -0400 Subject: [PATCH] Prevent race condition in dnode_dest (#10101) dnode_special_close() waits for the refcount of dn_holds to go to zero without holding the dn_mtx. dnode_rele_and_unlock() does the final remove to dn_holds with dn_mtx being held: refs = zfs_refcount_remove(&dn->dn_holds, tag); mutex_exit(&dn->dn_mtx); So, there is a race condition after the remove until dn_mtx is dropped. During that time, dnode_destroy() can get called, which ends up in dnode_dest() calling mutex_destroy() and a panic since the lock is still held. This change adds a condvar to wait for the final dnode_rele_and_unlock() to release the dn_mtx before calling dnode_destroy(). Reviewed-by: Paul Dagnelie Reviewed-by: Brian Behlendorf Reviewed-by: Matthew Ahrens Signed-off-by: John Poduska Closes #7814 Closes #10101 --- include/sys/dmu_impl.h | 1 + include/sys/dnode.h | 1 + module/zfs/dnode.c | 19 +++++++++++++------ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/include/sys/dmu_impl.h b/include/sys/dmu_impl.h index 8d0b960840..0c6273a3a7 100644 --- a/include/sys/dmu_impl.h +++ b/include/sys/dmu_impl.h @@ -164,6 +164,7 @@ extern "C" { * dn_dirty_txg * dd_assigned_tx * dn_notxholds + * dn_nodnholds * dn_dirtyctx * dn_dirtyctx_firstset * (dn_phys copy fields?) diff --git a/include/sys/dnode.h b/include/sys/dnode.h index 3ea7aeb7f0..14821bab27 100644 --- a/include/sys/dnode.h +++ b/include/sys/dnode.h @@ -332,6 +332,7 @@ struct dnode { uint64_t dn_assigned_txg; uint64_t dn_dirty_txg; /* txg dnode was last dirtied */ kcondvar_t dn_notxholds; + kcondvar_t dn_nodnholds; enum dnode_dirtycontext dn_dirtyctx; void *dn_dirtyctx_firstset; /* dbg: contents meaningless */ diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index 5a3de232cb..138f50630b 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -119,6 +119,7 @@ dnode_cons(void *arg, void *unused, int kmflag) mutex_init(&dn->dn_mtx, NULL, MUTEX_DEFAULT, NULL); mutex_init(&dn->dn_dbufs_mtx, NULL, MUTEX_DEFAULT, NULL); cv_init(&dn->dn_notxholds, NULL, CV_DEFAULT, NULL); + cv_init(&dn->dn_nodnholds, NULL, CV_DEFAULT, NULL); /* * Every dbuf has a reference, and dropping a tracked reference is @@ -183,6 +184,7 @@ dnode_dest(void *arg, void *unused) mutex_destroy(&dn->dn_mtx); mutex_destroy(&dn->dn_dbufs_mtx); cv_destroy(&dn->dn_notxholds); + cv_destroy(&dn->dn_nodnholds); zfs_refcount_destroy(&dn->dn_holds); zfs_refcount_destroy(&dn->dn_tx_holds); ASSERT(!list_link_active(&dn->dn_link)); @@ -1171,13 +1173,15 @@ dnode_special_close(dnode_handle_t *dnh) dnode_t *dn = dnh->dnh_dnode; /* - * Wait for final references to the dnode to clear. This can - * only happen if the arc is asynchronously evicting state that - * has a hold on this dnode while we are trying to evict this - * dnode. + * Ensure dnode_rele_and_unlock() has released dn_mtx, after final + * zfs_refcount_remove() */ - while (zfs_refcount_count(&dn->dn_holds) > 0) - delay(1); + mutex_enter(&dn->dn_mtx); + if (zfs_refcount_count(&dn->dn_holds) > 0) + cv_wait(&dn->dn_nodnholds, &dn->dn_mtx); + mutex_exit(&dn->dn_mtx); + ASSERT3U(zfs_refcount_count(&dn->dn_holds), ==, 0); + ASSERT(dn->dn_dbuf == NULL || dmu_buf_get_user(&dn->dn_dbuf->db) == NULL); zrl_add(&dnh->dnh_zrlock); @@ -1606,7 +1610,10 @@ dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting) dnode_handle_t *dnh = dn->dn_handle; refs = zfs_refcount_remove(&dn->dn_holds, tag); + if (refs == 0) + cv_broadcast(&dn->dn_nodnholds); mutex_exit(&dn->dn_mtx); + /* dnode could get destroyed at this point, so don't use it anymore */ /* * It's unsafe to release the last hold on a dnode by dnode_rele() or