The patch resolves the extra call to dnode_cons() in dnode_create().

The extra call to the constructor was there to reinitialize the non-
trivial primatives in the dnode (lists, mutexs, condvars, avl tree, etc).
This was safe, although not exactly clean, on Solaris because none of
the primitives allocate memory.  In the Linux port this is not true.
To keep stack usage to a minimum several of the primatives dynamically
allocate memory thus initializing them twice results in a memory leak.

This patch resolves this problem for Solaris and Linux by ensuring all
*_inits are called in the constructor, and all *_destroys are called
in the destructor.  Additionally we ensure that all dnode objects are
properly deconstructed before being freed to the slab, and when the
objects are allocated from the slab all required data members are
explicity initialized to correct values.
This commit is contained in:
Brian Behlendorf 2009-03-19 15:22:48 -07:00
parent 404fd8578f
commit 60d25f4b93
5 changed files with 107 additions and 33 deletions

15
.topmsg
View File

@ -1,7 +1,20 @@
From: Brian Behlendorf <behlendorf1@llnl.gov>
Subject: [PATCH] fix dnode constructor
Don't call the constructor twice
The patch removes the extra call to dnode_cons() in dnode_create(). The
extra call to the constructor was there to reinitialize the non-trivial
data structures in the dnode (lists, mutexs, condvars, avl tree, etc).
This was safe, although not exactly clean, on Solaris because none of
the primitives allocate memory. In the Linux port this is not true.
To keep stack usage to a minimum several of the primatives dynamically
allocate memory thus initializing them twice results in a memory leak.
This patch resolves this problem for Solaris and Linux by ensures all
*_init's are called in the constructor, and all *_destroy's are called
in the destructor. Additionally we ensure that all dnode objects are
properly deconstructed before being freed to the slab, and when the
objects are allocated from the slab all required data members are
explicity initialized to correct values.
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>

View File

@ -203,20 +203,53 @@ dmu_zfetch_dofetch(zfetch_t *zf, zstream_t *zs)
void
dmu_zfetch_init(zfetch_t *zf, dnode_t *dno)
{
if (zf == NULL) {
return;
}
zf->zf_dnode = dno;
zf->zf_stream_cnt = 0;
zf->zf_alloc_fail = 0;
}
/*
* Clean-up state associated with a zfetch structure. This frees allocated
* structure members, empties the zf_stream tree, and generally makes things
* nice. This doesn't free the zfetch_t itself, that's left to the caller.
*/
void
dmu_zfetch_rele(zfetch_t *zf)
{
zstream_t *zs;
zstream_t *zs_next;
for (zs = list_head(&zf->zf_stream); zs; zs = zs_next) {
zs_next = list_next(&zf->zf_stream, zs);
list_remove(&zf->zf_stream, zs);
mutex_destroy(&zs->zst_lock);
kmem_free(zs, sizeof (zstream_t));
}
}
/*
* Construct a zfetch structure.
*/
void
dmu_zfetch_cons(zfetch_t *zf)
{
list_create(&zf->zf_stream, sizeof (zstream_t),
offsetof(zstream_t, zst_node));
rw_init(&zf->zf_rwlock, NULL, RW_DEFAULT, NULL);
}
/*
* Destruct a zfetch structure.
*/
void
dmu_zfetch_dest(zfetch_t *zf)
{
list_destroy(&zf->zf_stream);
rw_destroy(&zf->zf_rwlock);
}
/*
* This function computes the actual size, in blocks, that can be prefetched,
* and fetches it.
@ -441,32 +474,6 @@ out:
return (rc);
}
/*
* Clean-up state associated with a zfetch structure. This frees allocated
* structure members, empties the zf_stream tree, and generally makes things
* nice. This doesn't free the zfetch_t itself, that's left to the caller.
*/
void
dmu_zfetch_rele(zfetch_t *zf)
{
zstream_t *zs;
zstream_t *zs_next;
ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock));
for (zs = list_head(&zf->zf_stream); zs; zs = zs_next) {
zs_next = list_next(&zf->zf_stream, zs);
list_remove(&zf->zf_stream, zs);
mutex_destroy(&zs->zst_lock);
kmem_free(zs, sizeof (zstream_t));
}
list_destroy(&zf->zf_stream);
rw_destroy(&zf->zf_rwlock);
zf->zf_dnode = NULL;
}
/*
* Given a zfetch and zstream structure, insert the zstream structure into the
* AVL tree contained within the zfetch structure. Peform the appropriate

View File

@ -72,6 +72,7 @@ dnode_cons(void *arg, void *unused, int kmflag)
list_create(&dn->dn_dbufs, sizeof (dmu_buf_impl_t),
offsetof(dmu_buf_impl_t, db_link));
dmu_zfetch_cons(&dn->dn_zfetch);
return (0);
}
@ -96,6 +97,7 @@ dnode_dest(void *arg, void *unused)
}
list_destroy(&dn->dn_dbufs);
dmu_zfetch_dest(&dn->dn_zfetch);
}
void
@ -165,6 +167,27 @@ dnode_verify(dnode_t *dn)
if (drop_struct_lock)
rw_exit(&dn->dn_struct_rwlock);
}
void
dnode_verify_clean(dnode_t *dn)
{
int i;
ASSERT(!RW_LOCK_HELD(&dn->dn_struct_rwlock));
ASSERT(!MUTEX_HELD(&dn->dn_mtx));
ASSERT(!MUTEX_HELD(&dn->dn_dbufs_mtx));
ASSERT(refcount_is_zero(&dn->dn_holds));
ASSERT(refcount_is_zero(&dn->dn_tx_holds));
for (i = 0; i < TXG_SIZE; i++) {
ASSERT(NULL == list_head(&dn->dn_dirty_records[i]));
ASSERT(0 == avl_numnodes(&dn->dn_ranges[i]));
}
ASSERT(NULL == list_head(&dn->dn_dbufs));
ASSERT(NULL == list_head(&dn->dn_zfetch.zf_stream));
ASSERT(!RW_LOCK_HELD(&dn->dn_zfetch.zf_rwlock));
}
#endif
void
@ -276,14 +299,32 @@ dnode_create(objset_impl_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
uint64_t object)
{
dnode_t *dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
int i;
DNODE_VERIFY_CLEAN(dn);
dn->dn_objset = os;
dn->dn_object = object;
dn->dn_dbuf = db;
dn->dn_phys = dnp;
if (dnp->dn_datablkszsec)
list_link_init(&dn->dn_link);
for (i = 0; i < TXG_SIZE; i++) {
list_link_init(&dn->dn_dirty_link[i]);
dn->dn_next_nblkptr[i] = 0;
dn->dn_next_nlevels[i] = 0;
dn->dn_next_indblkshift[i] = 0;
dn->dn_next_bonuslen[i] = 0;
dn->dn_next_blksz[i] = 0;
}
if (dnp->dn_datablkszsec) {
dnode_setdblksz(dn, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
} else {
dn->dn_datablksz = 0;
dn->dn_datablkszsec = 0;
dn->dn_datablkshift = 0;
}
dn->dn_indblkshift = dnp->dn_indblkshift;
dn->dn_nlevels = dnp->dn_nlevels;
dn->dn_type = dnp->dn_type;
@ -293,7 +334,13 @@ dnode_create(objset_impl_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
dn->dn_bonustype = dnp->dn_bonustype;
dn->dn_bonuslen = dnp->dn_bonuslen;
dn->dn_maxblkid = dnp->dn_maxblkid;
dn->dn_allocated_txg = 0;
dn->dn_free_txg = 0;
dn->dn_assigned_txg = 0;
dn->dn_dirtyctx = DN_UNDIRTIED;
dn->dn_dirtyctx_firstset = NULL;
dn->dn_bonus = NULL;
dn->dn_zio = NULL;
dmu_zfetch_init(&dn->dn_zfetch, dn);
ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES);
@ -335,6 +382,7 @@ dnode_destroy(dnode_t *dn)
dbuf_evict(dn->dn_bonus);
dn->dn_bonus = NULL;
}
DNODE_VERIFY_CLEAN(dn);
kmem_cache_free(dnode_cache, dn);
arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER);
}

View File

@ -65,6 +65,9 @@ typedef struct zfetch {
void dmu_zfetch_init(zfetch_t *, struct dnode *);
void dmu_zfetch_rele(zfetch_t *);
void dmu_zfetch_cons(zfetch_t *);
void dmu_zfetch_dest(zfetch_t *);
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, int);

View File

@ -222,6 +222,7 @@ void dnode_free(dnode_t *dn, dmu_tx_t *tx);
void dnode_byteswap(dnode_phys_t *dnp);
void dnode_buf_byteswap(void *buf, size_t size);
void dnode_verify(dnode_t *dn);
void dnode_verify_clean(dnode_t *dn);
int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
uint64_t dnode_current_max_length(dnode_t *dn);
void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
@ -259,12 +260,14 @@ void dnode_evict_dbufs(dnode_t *dn);
_NOTE(CONSTCOND) } while (0)
#define DNODE_VERIFY(dn) dnode_verify(dn)
#define DNODE_VERIFY_CLEAN(dn) dnode_verify_clean(dn)
#define FREE_VERIFY(db, start, end, tx) free_verify(db, start, end, tx)
#else
#define dprintf_dnode(db, fmt, ...)
#define DNODE_VERIFY(dn)
#define DNODE_VERIFY_CLEAN(dn)
#define FREE_VERIFY(db, start, end, tx)
#endif