Illumos 6267 - dn_bonus evicted too early
6267 dn_bonus evicted too early Reviewed by: Richard Yao <ryao@gentoo.org> Reviewed by: Xin LI <delphij@freebsd.org> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Approved by: Richard Lowe <richlowe@richlowe.net> References: https://www.illumos.org/issues/6267 https://github.com/illumos/illumos-gate/commit/d205810 Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Ported-by: Ned Bass <bass6@llnl.gov> Issue #3865 Issue #3443
This commit is contained in:
parent
cd887ab869
commit
f9f5394f74
|
@ -230,9 +230,25 @@ typedef struct dmu_buf_impl {
|
||||||
/* User callback information. */
|
/* User callback information. */
|
||||||
dmu_buf_user_t *db_user;
|
dmu_buf_user_t *db_user;
|
||||||
|
|
||||||
uint8_t db_immediate_evict;
|
/*
|
||||||
|
* Evict user data as soon as the dirty and reference
|
||||||
|
* counts are equal.
|
||||||
|
*/
|
||||||
|
uint8_t db_user_immediate_evict;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This block was freed while a read or write was
|
||||||
|
* active.
|
||||||
|
*/
|
||||||
uint8_t db_freed_in_flight;
|
uint8_t db_freed_in_flight;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* dnode_evict_dbufs() or dnode_evict_bonus() tried to
|
||||||
|
* evict this dbuf, but couldn't due to outstanding
|
||||||
|
* references. Evict once the refcount drops to 0.
|
||||||
|
*/
|
||||||
|
uint8_t db_pending_evict;
|
||||||
|
|
||||||
uint8_t db_dirtycnt;
|
uint8_t db_dirtycnt;
|
||||||
} dmu_buf_impl_t;
|
} dmu_buf_impl_t;
|
||||||
|
|
||||||
|
|
|
@ -93,7 +93,6 @@ struct objset {
|
||||||
uint8_t os_copies;
|
uint8_t os_copies;
|
||||||
enum zio_checksum os_dedup_checksum;
|
enum zio_checksum os_dedup_checksum;
|
||||||
boolean_t os_dedup_verify;
|
boolean_t os_dedup_verify;
|
||||||
boolean_t os_evicting;
|
|
||||||
zfs_logbias_op_t os_logbias;
|
zfs_logbias_op_t os_logbias;
|
||||||
zfs_cache_type_t os_primary_cache;
|
zfs_cache_type_t os_primary_cache;
|
||||||
zfs_cache_type_t os_secondary_cache;
|
zfs_cache_type_t os_secondary_cache;
|
||||||
|
|
|
@ -303,7 +303,7 @@ dbuf_verify_user(dmu_buf_impl_t *db, dbvu_verify_type_t verify_type)
|
||||||
*/
|
*/
|
||||||
ASSERT3U(holds, >=, db->db_dirtycnt);
|
ASSERT3U(holds, >=, db->db_dirtycnt);
|
||||||
} else {
|
} else {
|
||||||
if (db->db_immediate_evict == TRUE)
|
if (db->db_user_immediate_evict == TRUE)
|
||||||
ASSERT3U(holds, >=, db->db_dirtycnt);
|
ASSERT3U(holds, >=, db->db_dirtycnt);
|
||||||
else
|
else
|
||||||
ASSERT3U(holds, >, 0);
|
ASSERT3U(holds, >, 0);
|
||||||
|
@ -1880,8 +1880,9 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
|
||||||
db->db_blkptr = blkptr;
|
db->db_blkptr = blkptr;
|
||||||
|
|
||||||
db->db_user = NULL;
|
db->db_user = NULL;
|
||||||
db->db_immediate_evict = 0;
|
db->db_user_immediate_evict = FALSE;
|
||||||
db->db_freed_in_flight = 0;
|
db->db_freed_in_flight = FALSE;
|
||||||
|
db->db_pending_evict = FALSE;
|
||||||
|
|
||||||
if (blkid == DMU_BONUS_BLKID) {
|
if (blkid == DMU_BONUS_BLKID) {
|
||||||
ASSERT3P(parent, ==, dn->dn_dbuf);
|
ASSERT3P(parent, ==, dn->dn_dbuf);
|
||||||
|
@ -2318,12 +2319,13 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
|
||||||
arc_buf_freeze(db->db_buf);
|
arc_buf_freeze(db->db_buf);
|
||||||
|
|
||||||
if (holds == db->db_dirtycnt &&
|
if (holds == db->db_dirtycnt &&
|
||||||
db->db_level == 0 && db->db_immediate_evict)
|
db->db_level == 0 && db->db_user_immediate_evict)
|
||||||
dbuf_evict_user(db);
|
dbuf_evict_user(db);
|
||||||
|
|
||||||
if (holds == 0) {
|
if (holds == 0) {
|
||||||
if (db->db_blkid == DMU_BONUS_BLKID) {
|
if (db->db_blkid == DMU_BONUS_BLKID) {
|
||||||
dnode_t *dn;
|
dnode_t *dn;
|
||||||
|
boolean_t evict_dbuf = db->db_pending_evict;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the dnode moves here, we cannot cross this
|
* If the dnode moves here, we cannot cross this
|
||||||
|
@ -2338,7 +2340,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
|
||||||
* Decrementing the dbuf count means that the bonus
|
* Decrementing the dbuf count means that the bonus
|
||||||
* buffer's dnode hold is no longer discounted in
|
* buffer's dnode hold is no longer discounted in
|
||||||
* dnode_move(). The dnode cannot move until after
|
* dnode_move(). The dnode cannot move until after
|
||||||
* the dnode_rele_and_unlock() below.
|
* the dnode_rele() below.
|
||||||
*/
|
*/
|
||||||
DB_DNODE_EXIT(db);
|
DB_DNODE_EXIT(db);
|
||||||
|
|
||||||
|
@ -2348,35 +2350,10 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
|
||||||
*/
|
*/
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
|
|
||||||
/*
|
if (evict_dbuf)
|
||||||
* If the dnode has been freed, evict the bonus
|
|
||||||
* buffer immediately. The data in the bonus
|
|
||||||
* buffer is no longer relevant and this prevents
|
|
||||||
* a stale bonus buffer from being associated
|
|
||||||
* with this dnode_t should the dnode_t be reused
|
|
||||||
* prior to being destroyed.
|
|
||||||
*/
|
|
||||||
mutex_enter(&dn->dn_mtx);
|
|
||||||
if (dn->dn_type == DMU_OT_NONE ||
|
|
||||||
dn->dn_free_txg != 0) {
|
|
||||||
/*
|
|
||||||
* Drop dn_mtx. It is a leaf lock and
|
|
||||||
* cannot be held when dnode_evict_bonus()
|
|
||||||
* acquires other locks in order to
|
|
||||||
* perform the eviction.
|
|
||||||
*
|
|
||||||
* Freed dnodes cannot be reused until the
|
|
||||||
* last hold is released. Since this bonus
|
|
||||||
* buffer has a hold, the dnode will remain
|
|
||||||
* in the free state, even without dn_mtx
|
|
||||||
* held, until the dnode_rele_and_unlock()
|
|
||||||
* below.
|
|
||||||
*/
|
|
||||||
mutex_exit(&dn->dn_mtx);
|
|
||||||
dnode_evict_bonus(dn);
|
dnode_evict_bonus(dn);
|
||||||
mutex_enter(&dn->dn_mtx);
|
|
||||||
}
|
dnode_rele(dn, db);
|
||||||
dnode_rele_and_unlock(dn, db);
|
|
||||||
} else if (db->db_buf == NULL) {
|
} else if (db->db_buf == NULL) {
|
||||||
/*
|
/*
|
||||||
* This is a special case: we never associated this
|
* This is a special case: we never associated this
|
||||||
|
@ -2423,7 +2400,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
|
||||||
} else {
|
} else {
|
||||||
dbuf_clear(db);
|
dbuf_clear(db);
|
||||||
}
|
}
|
||||||
} else if (db->db_objset->os_evicting ||
|
} else if (db->db_pending_evict ||
|
||||||
arc_buf_eviction_needed(db->db_buf)) {
|
arc_buf_eviction_needed(db->db_buf)) {
|
||||||
dbuf_clear(db);
|
dbuf_clear(db);
|
||||||
} else {
|
} else {
|
||||||
|
@ -2471,7 +2448,7 @@ dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user)
|
||||||
{
|
{
|
||||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
|
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
|
||||||
|
|
||||||
db->db_immediate_evict = TRUE;
|
db->db_user_immediate_evict = TRUE;
|
||||||
return (dmu_buf_set_user(db_fake, user));
|
return (dmu_buf_set_user(db_fake, user));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -726,7 +726,6 @@ dmu_objset_evict(objset_t *os)
|
||||||
if (os->os_sa)
|
if (os->os_sa)
|
||||||
sa_tear_down(os);
|
sa_tear_down(os);
|
||||||
|
|
||||||
os->os_evicting = B_TRUE;
|
|
||||||
dmu_objset_evict_dbufs(os);
|
dmu_objset_evict_dbufs(os);
|
||||||
|
|
||||||
mutex_enter(&os->os_lock);
|
mutex_enter(&os->os_lock);
|
||||||
|
|
|
@ -432,6 +432,7 @@ dnode_evict_dbufs(dnode_t *dn)
|
||||||
db_next = AVL_NEXT(&dn->dn_dbufs, db_marker);
|
db_next = AVL_NEXT(&dn->dn_dbufs, db_marker);
|
||||||
avl_remove(&dn->dn_dbufs, db_marker);
|
avl_remove(&dn->dn_dbufs, db_marker);
|
||||||
} else {
|
} else {
|
||||||
|
db->db_pending_evict = TRUE;
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
db_next = AVL_NEXT(&dn->dn_dbufs, db);
|
db_next = AVL_NEXT(&dn->dn_dbufs, db);
|
||||||
}
|
}
|
||||||
|
@ -447,10 +448,14 @@ void
|
||||||
dnode_evict_bonus(dnode_t *dn)
|
dnode_evict_bonus(dnode_t *dn)
|
||||||
{
|
{
|
||||||
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
|
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
|
||||||
if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) {
|
if (dn->dn_bonus != NULL) {
|
||||||
mutex_enter(&dn->dn_bonus->db_mtx);
|
if (refcount_is_zero(&dn->dn_bonus->db_holds)) {
|
||||||
dbuf_evict(dn->dn_bonus);
|
mutex_enter(&dn->dn_bonus->db_mtx);
|
||||||
dn->dn_bonus = NULL;
|
dbuf_evict(dn->dn_bonus);
|
||||||
|
dn->dn_bonus = NULL;
|
||||||
|
} else {
|
||||||
|
dn->dn_bonus->db_pending_evict = TRUE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
rw_exit(&dn->dn_struct_rwlock);
|
rw_exit(&dn->dn_struct_rwlock);
|
||||||
}
|
}
|
||||||
|
@ -502,7 +507,6 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
|
||||||
|
|
||||||
dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
|
dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
|
||||||
dnode_evict_dbufs(dn);
|
dnode_evict_dbufs(dn);
|
||||||
ASSERT(avl_is_empty(&dn->dn_dbufs));
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* XXX - It would be nice to assert this, but we may still
|
* XXX - It would be nice to assert this, but we may still
|
||||||
|
|
Loading…
Reference in New Issue