From 2ade4a99f0b995df6fb45138d04a0209b959ef63 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Mon, 20 Mar 2017 15:38:11 -0700 Subject: [PATCH] OpenZFS 8126 - ztest assertion failed in dbuf_dirty due to dn_nlevels changing The sync thread is concurrently modifying dn_phys->dn_nlevels while dbuf_dirty() is trying to assert something about it, without holding the necessary lock. We need to move this assertion further down in the function, after we have acquired the dn_struct_rwlock. Authored by: Matthew Ahrens Reviewed by: Pavel Zakharov Reviewed by: Serapheim Dimitropoulos Approved by: Robert Mustacchi Reviewed-by: George Melikov Reviewed-by: Brian Behlendorf Ported-by: Giuseppe Di Natale OpenZFS-issue: https://www.illumos.org/issues/8126 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/0ef125d Closes #6314 --- module/zfs/dbuf.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index f4e24e2099..dc2c00495b 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -1625,11 +1625,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN)); ASSERT3U(dn->dn_nlevels, >, db->db_level); - ASSERT((dn->dn_phys->dn_nlevels == 0 && db->db_level == 0) || - dn->dn_phys->dn_nlevels > db->db_level || - dn->dn_next_nlevels[txgoff] > db->db_level || - dn->dn_next_nlevels[(tx->tx_txg-1) & TXG_MASK] > db->db_level || - dn->dn_next_nlevels[(tx->tx_txg-2) & TXG_MASK] > db->db_level); /* * We should only be dirtying in syncing context if it's the @@ -1747,6 +1742,16 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) drop_struct_lock = TRUE; } + /* + * We need to hold the dn_struct_rwlock to make this assertion, + * because it protects dn_phys / dn_next_nlevels from changing. + */ + ASSERT((dn->dn_phys->dn_nlevels == 0 && db->db_level == 0) || + dn->dn_phys->dn_nlevels > db->db_level || + dn->dn_next_nlevels[txgoff] > db->db_level || + dn->dn_next_nlevels[(tx->tx_txg-1) & TXG_MASK] > db->db_level || + dn->dn_next_nlevels[(tx->tx_txg-2) & TXG_MASK] > db->db_level); + /* * If we are overwriting a dedup BP, then unless it is snapshotted, * when we get to syncing context we will need to decrement its