Illumos 5630 - stale bonus buffer in recycled dnode_t leads to data corruption

5630 stale bonus buffer in recycled dnode_t leads to data corruption
Author: Justin T. Gibbs <justing@spectralogic.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george@delphix.com>
Reviewed by: Will Andrews <will@freebsd.org>
Approved by: Robert Mustacchi <rm@joyent.com>

References:
  https://www.illumos.org/issues/5630
  https://github.com/illumos/illumos-gate/commit/cd485b4

Ported-by: Chris Dunlop <chris@onthe.net.au>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <ryao@gentoo.org>
Issue #3172
This commit is contained in:
Justin T. Gibbs 2015-03-12 11:10:35 +11:00 committed by Brian Behlendorf
parent 73ad4a9f3c
commit 4c7b7eedcd
4 changed files with 65 additions and 12 deletions

View File

@ -290,6 +290,7 @@ int dnode_hold_impl(struct objset *dd, uint64_t object, int flag,
void *ref, dnode_t **dnp); void *ref, dnode_t **dnp);
boolean_t dnode_add_ref(dnode_t *dn, void *ref); boolean_t dnode_add_ref(dnode_t *dn, void *ref);
void dnode_rele(dnode_t *dn, void *ref); void dnode_rele(dnode_t *dn, void *ref);
void dnode_rele_and_unlock(dnode_t *dn, void *tag);
void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx); void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
void dnode_sync(dnode_t *dn, dmu_tx_t *tx); void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
@ -311,6 +312,7 @@ void dnode_fini(void);
int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off, int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off,
int minlvl, uint64_t blkfill, uint64_t txg); int minlvl, uint64_t blkfill, uint64_t txg);
void dnode_evict_dbufs(dnode_t *dn); void dnode_evict_dbufs(dnode_t *dn);
void dnode_evict_bonus(dnode_t *dn);
#ifdef ZFS_DEBUG #ifdef ZFS_DEBUG

View File

@ -2200,21 +2200,60 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
if (holds == 0) { if (holds == 0) {
if (db->db_blkid == DMU_BONUS_BLKID) { if (db->db_blkid == DMU_BONUS_BLKID) {
dnode_t *dn;
/*
* If the dnode moves here, we cannot cross this
* barrier until the move completes.
*/
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
atomic_dec_32(&dn->dn_dbufs_count);
/*
* Decrementing the dbuf count means that the bonus
* buffer's dnode hold is no longer discounted in
* dnode_move(). The dnode cannot move until after
* the dnode_rele_and_unlock() below.
*/
DB_DNODE_EXIT(db);
/*
* Do not reference db after its lock is dropped.
* Another thread may evict it.
*/
mutex_exit(&db->db_mtx); mutex_exit(&db->db_mtx);
/* /*
* If the dnode moves here, we cannot cross this barrier * If the dnode has been freed, evict the bonus
* until the move completes. * buffer immediately. The data in the bonus
* buffer is no longer relevant and this prevents
* a stale bonus buffer from being associated
* with this dnode_t should the dnode_t be reused
* prior to being destroyed.
*/ */
DB_DNODE_ENTER(db); mutex_enter(&dn->dn_mtx);
atomic_dec_32(&DB_DNODE(db)->dn_dbufs_count); if (dn->dn_type == DMU_OT_NONE ||
DB_DNODE_EXIT(db); dn->dn_free_txg != 0) {
/* /*
* The bonus buffer's dnode hold is no longer discounted * Drop dn_mtx. It is a leaf lock and
* in dnode_move(). The dnode cannot move until after * cannot be held when dnode_evict_bonus()
* the dnode_rele(). * acquires other locks in order to
* perform the eviction.
*
* Freed dnodes cannot be reused until the
* last hold is released. Since this bonus
* buffer has a hold, the dnode will remain
* in the free state, even without dn_mtx
* held, until the dnode_rele_and_unlock()
* below.
*/ */
dnode_rele(DB_DNODE(db), db); mutex_exit(&dn->dn_mtx);
dnode_evict_bonus(dn);
mutex_enter(&dn->dn_mtx);
}
dnode_rele_and_unlock(dn, db);
} else if (db->db_buf == NULL) { } else if (db->db_buf == NULL) {
/* /*
* This is a special case: we never associated this * This is a special case: we never associated this

View File

@ -1157,13 +1157,19 @@ dnode_add_ref(dnode_t *dn, void *tag)
void void
dnode_rele(dnode_t *dn, void *tag) dnode_rele(dnode_t *dn, void *tag)
{
mutex_enter(&dn->dn_mtx);
dnode_rele_and_unlock(dn, tag);
}
void
dnode_rele_and_unlock(dnode_t *dn, void *tag)
{ {
uint64_t refs; uint64_t refs;
/* Get while the hold prevents the dnode from moving. */ /* Get while the hold prevents the dnode from moving. */
dmu_buf_impl_t *db = dn->dn_dbuf; dmu_buf_impl_t *db = dn->dn_dbuf;
dnode_handle_t *dnh = dn->dn_handle; dnode_handle_t *dnh = dn->dn_handle;
mutex_enter(&dn->dn_mtx);
refs = refcount_remove(&dn->dn_holds, tag); refs = refcount_remove(&dn->dn_holds, tag);
mutex_exit(&dn->dn_mtx); mutex_exit(&dn->dn_mtx);

View File

@ -454,6 +454,12 @@ dnode_evict_dbufs(dnode_t *dn)
if (pass >= 100) if (pass >= 100)
dprintf("Required %d passes to evict dbufs\n", pass); dprintf("Required %d passes to evict dbufs\n", pass);
dnode_evict_bonus(dn);
}
void
dnode_evict_bonus(dnode_t *dn)
{
rw_enter(&dn->dn_struct_rwlock, RW_WRITER); rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) { if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) {
mutex_enter(&dn->dn_bonus->db_mtx); mutex_enter(&dn->dn_bonus->db_mtx);