Illumos 5630 - stale bonus buffer in recycled dnode_t leads to data corruption
5630 stale bonus buffer in recycled dnode_t leads to data corruption Author: Justin T. Gibbs <justing@spectralogic.com> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: George Wilson <george@delphix.com> Reviewed by: Will Andrews <will@freebsd.org> Approved by: Robert Mustacchi <rm@joyent.com> References: https://www.illumos.org/issues/5630 https://github.com/illumos/illumos-gate/commit/cd485b4 Ported-by: Chris Dunlop <chris@onthe.net.au> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Richard Yao <ryao@gentoo.org> Issue #3172
This commit is contained in:
parent
73ad4a9f3c
commit
4c7b7eedcd
|
@ -290,6 +290,7 @@ int dnode_hold_impl(struct objset *dd, uint64_t object, int flag,
|
||||||
void *ref, dnode_t **dnp);
|
void *ref, dnode_t **dnp);
|
||||||
boolean_t dnode_add_ref(dnode_t *dn, void *ref);
|
boolean_t dnode_add_ref(dnode_t *dn, void *ref);
|
||||||
void dnode_rele(dnode_t *dn, void *ref);
|
void dnode_rele(dnode_t *dn, void *ref);
|
||||||
|
void dnode_rele_and_unlock(dnode_t *dn, void *tag);
|
||||||
void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
|
void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
|
||||||
void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
|
void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
|
||||||
void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
|
void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
|
||||||
|
@ -311,6 +312,7 @@ void dnode_fini(void);
|
||||||
int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off,
|
int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off,
|
||||||
int minlvl, uint64_t blkfill, uint64_t txg);
|
int minlvl, uint64_t blkfill, uint64_t txg);
|
||||||
void dnode_evict_dbufs(dnode_t *dn);
|
void dnode_evict_dbufs(dnode_t *dn);
|
||||||
|
void dnode_evict_bonus(dnode_t *dn);
|
||||||
|
|
||||||
#ifdef ZFS_DEBUG
|
#ifdef ZFS_DEBUG
|
||||||
|
|
||||||
|
|
|
@ -2200,21 +2200,60 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
|
||||||
|
|
||||||
if (holds == 0) {
|
if (holds == 0) {
|
||||||
if (db->db_blkid == DMU_BONUS_BLKID) {
|
if (db->db_blkid == DMU_BONUS_BLKID) {
|
||||||
|
dnode_t *dn;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the dnode moves here, we cannot cross this
|
||||||
|
* barrier until the move completes.
|
||||||
|
*/
|
||||||
|
DB_DNODE_ENTER(db);
|
||||||
|
|
||||||
|
dn = DB_DNODE(db);
|
||||||
|
atomic_dec_32(&dn->dn_dbufs_count);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Decrementing the dbuf count means that the bonus
|
||||||
|
* buffer's dnode hold is no longer discounted in
|
||||||
|
* dnode_move(). The dnode cannot move until after
|
||||||
|
* the dnode_rele_and_unlock() below.
|
||||||
|
*/
|
||||||
|
DB_DNODE_EXIT(db);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Do not reference db after its lock is dropped.
|
||||||
|
* Another thread may evict it.
|
||||||
|
*/
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the dnode moves here, we cannot cross this barrier
|
* If the dnode has been freed, evict the bonus
|
||||||
* until the move completes.
|
* buffer immediately. The data in the bonus
|
||||||
|
* buffer is no longer relevant and this prevents
|
||||||
|
* a stale bonus buffer from being associated
|
||||||
|
* with this dnode_t should the dnode_t be reused
|
||||||
|
* prior to being destroyed.
|
||||||
*/
|
*/
|
||||||
DB_DNODE_ENTER(db);
|
mutex_enter(&dn->dn_mtx);
|
||||||
atomic_dec_32(&DB_DNODE(db)->dn_dbufs_count);
|
if (dn->dn_type == DMU_OT_NONE ||
|
||||||
DB_DNODE_EXIT(db);
|
dn->dn_free_txg != 0) {
|
||||||
/*
|
/*
|
||||||
* The bonus buffer's dnode hold is no longer discounted
|
* Drop dn_mtx. It is a leaf lock and
|
||||||
* in dnode_move(). The dnode cannot move until after
|
* cannot be held when dnode_evict_bonus()
|
||||||
* the dnode_rele().
|
* acquires other locks in order to
|
||||||
*/
|
* perform the eviction.
|
||||||
dnode_rele(DB_DNODE(db), db);
|
*
|
||||||
|
* Freed dnodes cannot be reused until the
|
||||||
|
* last hold is released. Since this bonus
|
||||||
|
* buffer has a hold, the dnode will remain
|
||||||
|
* in the free state, even without dn_mtx
|
||||||
|
* held, until the dnode_rele_and_unlock()
|
||||||
|
* below.
|
||||||
|
*/
|
||||||
|
mutex_exit(&dn->dn_mtx);
|
||||||
|
dnode_evict_bonus(dn);
|
||||||
|
mutex_enter(&dn->dn_mtx);
|
||||||
|
}
|
||||||
|
dnode_rele_and_unlock(dn, db);
|
||||||
} else if (db->db_buf == NULL) {
|
} else if (db->db_buf == NULL) {
|
||||||
/*
|
/*
|
||||||
* This is a special case: we never associated this
|
* This is a special case: we never associated this
|
||||||
|
|
|
@ -1157,13 +1157,19 @@ dnode_add_ref(dnode_t *dn, void *tag)
|
||||||
|
|
||||||
void
|
void
|
||||||
dnode_rele(dnode_t *dn, void *tag)
|
dnode_rele(dnode_t *dn, void *tag)
|
||||||
|
{
|
||||||
|
mutex_enter(&dn->dn_mtx);
|
||||||
|
dnode_rele_and_unlock(dn, tag);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
dnode_rele_and_unlock(dnode_t *dn, void *tag)
|
||||||
{
|
{
|
||||||
uint64_t refs;
|
uint64_t refs;
|
||||||
/* Get while the hold prevents the dnode from moving. */
|
/* Get while the hold prevents the dnode from moving. */
|
||||||
dmu_buf_impl_t *db = dn->dn_dbuf;
|
dmu_buf_impl_t *db = dn->dn_dbuf;
|
||||||
dnode_handle_t *dnh = dn->dn_handle;
|
dnode_handle_t *dnh = dn->dn_handle;
|
||||||
|
|
||||||
mutex_enter(&dn->dn_mtx);
|
|
||||||
refs = refcount_remove(&dn->dn_holds, tag);
|
refs = refcount_remove(&dn->dn_holds, tag);
|
||||||
mutex_exit(&dn->dn_mtx);
|
mutex_exit(&dn->dn_mtx);
|
||||||
|
|
||||||
|
|
|
@ -454,6 +454,12 @@ dnode_evict_dbufs(dnode_t *dn)
|
||||||
if (pass >= 100)
|
if (pass >= 100)
|
||||||
dprintf("Required %d passes to evict dbufs\n", pass);
|
dprintf("Required %d passes to evict dbufs\n", pass);
|
||||||
|
|
||||||
|
dnode_evict_bonus(dn);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
dnode_evict_bonus(dnode_t *dn)
|
||||||
|
{
|
||||||
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
|
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
|
||||||
if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) {
|
if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) {
|
||||||
mutex_enter(&dn->dn_bonus->db_mtx);
|
mutex_enter(&dn->dn_bonus->db_mtx);
|
||||||
|
|
Loading…
Reference in New Issue