Reduce stack usage for recursive dbuf_hold_impl()
This commit preserves the recursive function dbuf_hold_impl() but moves
the local variables and function arguments to the heap to minimize
the stack frame size. Enough space is initially allocated on the
stack for 20 levels of recursion. This technique was based on commit
34229a2f2a
which reduced stack usage of
traverse_visitbp().
dbuf_hold_impl() is mutually recursive with dbuf_findbp(),
but the latter function is also called from other functions.
Therefore dbuf_findbp() must contain logic to determine how to call
dbuf_hold_impl(). To this end, dbuf_hold_impl() now takes a
struct dbuf_hold_impl_data pointer as an argument. If that argument
is NULL it calls dbuf_hold_impl() as before, otherwise it calls
__debuf_hold_impl() with a single dbuf_hold_impl_data pointer argument.
As the name implies, dbuf_hold_impl_data stores the arguments and local
variables for dbuf_hold_impl().
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
parent
c7786edbf0
commit
4d3fc57112
|
@ -36,6 +36,29 @@
|
||||||
#include <sys/sa.h>
|
#include <sys/sa.h>
|
||||||
#include <sys/sa_impl.h>
|
#include <sys/sa_impl.h>
|
||||||
|
|
||||||
|
struct dbuf_hold_impl_data {
|
||||||
|
/* Function arguments */
|
||||||
|
dnode_t *dh_dn;
|
||||||
|
uint8_t dh_level;
|
||||||
|
uint64_t dh_blkid;
|
||||||
|
int dh_fail_sparse;
|
||||||
|
void *dh_tag;
|
||||||
|
dmu_buf_impl_t **dh_dbp;
|
||||||
|
/* Local variables */
|
||||||
|
dmu_buf_impl_t *dh_db;
|
||||||
|
dmu_buf_impl_t *dh_parent;
|
||||||
|
blkptr_t *dh_bp;
|
||||||
|
int dh_err;
|
||||||
|
dbuf_dirty_record_t *dh_dr;
|
||||||
|
arc_buf_contents_t dh_type;
|
||||||
|
int dh_depth;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void __dbuf_hold_impl_init(struct dbuf_hold_impl_data *dh,
|
||||||
|
dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
|
||||||
|
void *tag, dmu_buf_impl_t **dbp, int depth);
|
||||||
|
static int __dbuf_hold_impl(struct dbuf_hold_impl_data *dh);
|
||||||
|
|
||||||
static void dbuf_destroy(dmu_buf_impl_t *db);
|
static void dbuf_destroy(dmu_buf_impl_t *db);
|
||||||
static int dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
static int dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||||
static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
|
static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
|
||||||
|
@ -1492,7 +1515,7 @@ dbuf_clear(dmu_buf_impl_t *db)
|
||||||
|
|
||||||
static int
|
static int
|
||||||
dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
|
dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
|
||||||
dmu_buf_impl_t **parentp, blkptr_t **bpp)
|
dmu_buf_impl_t **parentp, blkptr_t **bpp, struct dbuf_hold_impl_data *dh)
|
||||||
{
|
{
|
||||||
int nlevels, epbs;
|
int nlevels, epbs;
|
||||||
|
|
||||||
|
@ -1529,8 +1552,17 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
|
||||||
return (ENOENT);
|
return (ENOENT);
|
||||||
} else if (level < nlevels-1) {
|
} else if (level < nlevels-1) {
|
||||||
/* this block is referenced from an indirect block */
|
/* this block is referenced from an indirect block */
|
||||||
int err = dbuf_hold_impl(dn, level+1,
|
int err;
|
||||||
blkid >> epbs, fail_sparse, NULL, parentp);
|
if (dh == NULL) {
|
||||||
|
err = dbuf_hold_impl(dn, level+1, blkid >> epbs,
|
||||||
|
fail_sparse, NULL, parentp);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
__dbuf_hold_impl_init(dh + 1, dn, dh->dh_level + 1,
|
||||||
|
blkid >> epbs, fail_sparse, NULL,
|
||||||
|
parentp, dh->dh_depth + 1);
|
||||||
|
err = __dbuf_hold_impl(dh + 1);
|
||||||
|
}
|
||||||
if (err)
|
if (err)
|
||||||
return (err);
|
return (err);
|
||||||
err = dbuf_read(*parentp, NULL,
|
err = dbuf_read(*parentp, NULL,
|
||||||
|
@ -1723,7 +1755,7 @@ dbuf_prefetch(dnode_t *dn, uint64_t blkid)
|
||||||
db = NULL;
|
db = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp) == 0) {
|
if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp, NULL) == 0) {
|
||||||
if (bp && !BP_IS_HOLE(bp)) {
|
if (bp && !BP_IS_HOLE(bp)) {
|
||||||
int priority = dn->dn_type == DMU_OT_DDT_ZAP ?
|
int priority = dn->dn_type == DMU_OT_DDT_ZAP ?
|
||||||
ZIO_PRIORITY_DDT_PREFETCH : ZIO_PRIORITY_ASYNC_READ;
|
ZIO_PRIORITY_DDT_PREFETCH : ZIO_PRIORITY_ASYNC_READ;
|
||||||
|
@ -1750,98 +1782,142 @@ dbuf_prefetch(dnode_t *dn, uint64_t blkid)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define DBUF_HOLD_IMPL_MAX_DEPTH 20
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns with db_holds incremented, and db_mtx not held.
|
* Returns with db_holds incremented, and db_mtx not held.
|
||||||
* Note: dn_struct_rwlock must be held.
|
* Note: dn_struct_rwlock must be held.
|
||||||
*/
|
*/
|
||||||
int
|
static int
|
||||||
dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
|
__dbuf_hold_impl(struct dbuf_hold_impl_data *dh)
|
||||||
void *tag, dmu_buf_impl_t **dbp)
|
|
||||||
{
|
{
|
||||||
dmu_buf_impl_t *db, *parent = NULL;
|
ASSERT3S(dh->dh_depth, <, DBUF_HOLD_IMPL_MAX_DEPTH);
|
||||||
|
dh->dh_parent = NULL;
|
||||||
|
|
||||||
ASSERT(blkid != DMU_BONUS_BLKID);
|
ASSERT(dh->dh_blkid != DMU_BONUS_BLKID);
|
||||||
ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
|
ASSERT(RW_LOCK_HELD(&dh->dh_dn->dn_struct_rwlock));
|
||||||
ASSERT3U(dn->dn_nlevels, >, level);
|
ASSERT3U(dh->dh_dn->dn_nlevels, >, dh->dh_level);
|
||||||
|
|
||||||
*dbp = NULL;
|
*(dh->dh_dbp) = NULL;
|
||||||
top:
|
top:
|
||||||
/* dbuf_find() returns with db_mtx held */
|
/* dbuf_find() returns with db_mtx held */
|
||||||
db = dbuf_find(dn, level, blkid);
|
dh->dh_db = dbuf_find(dh->dh_dn, dh->dh_level, dh->dh_blkid);
|
||||||
|
|
||||||
if (db == NULL) {
|
if (dh->dh_db == NULL) {
|
||||||
blkptr_t *bp = NULL;
|
dh->dh_bp = NULL;
|
||||||
int err;
|
|
||||||
|
|
||||||
ASSERT3P(parent, ==, NULL);
|
ASSERT3P(dh->dh_parent, ==, NULL);
|
||||||
err = dbuf_findbp(dn, level, blkid, fail_sparse, &parent, &bp);
|
dh->dh_err = dbuf_findbp(dh->dh_dn, dh->dh_level, dh->dh_blkid,
|
||||||
if (fail_sparse) {
|
dh->dh_fail_sparse, &dh->dh_parent,
|
||||||
if (err == 0 && bp && BP_IS_HOLE(bp))
|
&dh->dh_bp, dh);
|
||||||
err = ENOENT;
|
if (dh->dh_fail_sparse) {
|
||||||
if (err) {
|
if (dh->dh_err == 0 && dh->dh_bp && BP_IS_HOLE(dh->dh_bp))
|
||||||
if (parent)
|
dh->dh_err = ENOENT;
|
||||||
dbuf_rele(parent, NULL);
|
if (dh->dh_err) {
|
||||||
return (err);
|
if (dh->dh_parent)
|
||||||
|
dbuf_rele(dh->dh_parent, NULL);
|
||||||
|
return (dh->dh_err);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (err && err != ENOENT)
|
if (dh->dh_err && dh->dh_err != ENOENT)
|
||||||
return (err);
|
return (dh->dh_err);
|
||||||
db = dbuf_create(dn, level, blkid, parent, bp);
|
dh->dh_db = dbuf_create(dh->dh_dn, dh->dh_level, dh->dh_blkid,
|
||||||
|
dh->dh_parent, dh->dh_bp);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (db->db_buf && refcount_is_zero(&db->db_holds)) {
|
if (dh->dh_db->db_buf && refcount_is_zero(&dh->dh_db->db_holds)) {
|
||||||
arc_buf_add_ref(db->db_buf, db);
|
arc_buf_add_ref(dh->dh_db->db_buf, dh->dh_db);
|
||||||
if (db->db_buf->b_data == NULL) {
|
if (dh->dh_db->db_buf->b_data == NULL) {
|
||||||
dbuf_clear(db);
|
dbuf_clear(dh->dh_db);
|
||||||
if (parent) {
|
if (dh->dh_parent) {
|
||||||
dbuf_rele(parent, NULL);
|
dbuf_rele(dh->dh_parent, NULL);
|
||||||
parent = NULL;
|
dh->dh_parent = NULL;
|
||||||
}
|
}
|
||||||
goto top;
|
goto top;
|
||||||
}
|
}
|
||||||
ASSERT3P(db->db.db_data, ==, db->db_buf->b_data);
|
ASSERT3P(dh->dh_db->db.db_data, ==, dh->dh_db->db_buf->b_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT(db->db_buf == NULL || arc_referenced(db->db_buf));
|
ASSERT(dh->dh_db->db_buf == NULL || arc_referenced(dh->dh_db->db_buf));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this buffer is currently syncing out, and we are are
|
* If this buffer is currently syncing out, and we are are
|
||||||
* still referencing it from db_data, we need to make a copy
|
* still referencing it from db_data, we need to make a copy
|
||||||
* of it in case we decide we want to dirty it again in this txg.
|
* of it in case we decide we want to dirty it again in this txg.
|
||||||
*/
|
*/
|
||||||
if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
|
if (dh->dh_db->db_level == 0 &&
|
||||||
dn->dn_object != DMU_META_DNODE_OBJECT &&
|
dh->dh_db->db_blkid != DMU_BONUS_BLKID &&
|
||||||
db->db_state == DB_CACHED && db->db_data_pending) {
|
dh->dh_dn->dn_object != DMU_META_DNODE_OBJECT &&
|
||||||
dbuf_dirty_record_t *dr = db->db_data_pending;
|
dh->dh_db->db_state == DB_CACHED && dh->dh_db->db_data_pending) {
|
||||||
|
dh->dh_dr = dh->dh_db->db_data_pending;
|
||||||
|
|
||||||
if (dr->dt.dl.dr_data == db->db_buf) {
|
if (dh->dh_dr->dt.dl.dr_data == dh->dh_db->db_buf) {
|
||||||
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
|
dh->dh_type = DBUF_GET_BUFC_TYPE(dh->dh_db);
|
||||||
|
|
||||||
dbuf_set_data(db,
|
dbuf_set_data(dh->dh_db,
|
||||||
arc_buf_alloc(db->db_dnode->dn_objset->os_spa,
|
arc_buf_alloc(dh->dh_db->db_dnode->dn_objset->os_spa,
|
||||||
db->db.db_size, db, type));
|
dh->dh_db->db.db_size, dh->dh_db, dh->dh_type));
|
||||||
bcopy(dr->dt.dl.dr_data->b_data, db->db.db_data,
|
bcopy(dh->dh_dr->dt.dl.dr_data->b_data, dh->dh_db->db.db_data,
|
||||||
db->db.db_size);
|
dh->dh_db->db.db_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
(void) refcount_add(&db->db_holds, tag);
|
(void) refcount_add(&dh->dh_db->db_holds, dh->dh_tag);
|
||||||
dbuf_update_data(db);
|
dbuf_update_data(dh->dh_db);
|
||||||
DBUF_VERIFY(db);
|
DBUF_VERIFY(dh->dh_db);
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&dh->dh_db->db_mtx);
|
||||||
|
|
||||||
/* NOTE: we can't rele the parent until after we drop the db_mtx */
|
/* NOTE: we can't rele the parent until after we drop the db_mtx */
|
||||||
if (parent)
|
if (dh->dh_parent)
|
||||||
dbuf_rele(parent, NULL);
|
dbuf_rele(dh->dh_parent, NULL);
|
||||||
|
|
||||||
ASSERT3P(db->db_dnode, ==, dn);
|
ASSERT3P(dh->dh_db->db_dnode, ==, dh->dh_dn);
|
||||||
ASSERT3U(db->db_blkid, ==, blkid);
|
ASSERT3U(dh->dh_db->db_blkid, ==, dh->dh_blkid);
|
||||||
ASSERT3U(db->db_level, ==, level);
|
ASSERT3U(dh->dh_db->db_level, ==, dh->dh_level);
|
||||||
*dbp = db;
|
*(dh->dh_dbp) = dh->dh_db;
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The following code preserves the recursive function dbuf_hold_impl()
|
||||||
|
* but moves the local variables AND function arguments to the heap to
|
||||||
|
* minimize the stack frame size. Enough space is initially allocated
|
||||||
|
* on the stack for 20 levels of recursion.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
|
||||||
|
void *tag, dmu_buf_impl_t **dbp)
|
||||||
|
{
|
||||||
|
struct dbuf_hold_impl_data *dh;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
dh = kmem_zalloc(sizeof(struct dbuf_hold_impl_data) *
|
||||||
|
DBUF_HOLD_IMPL_MAX_DEPTH, KM_SLEEP);
|
||||||
|
__dbuf_hold_impl_init(dh, dn, level, blkid, fail_sparse, tag, dbp, 0);
|
||||||
|
|
||||||
|
error = __dbuf_hold_impl(dh);
|
||||||
|
|
||||||
|
kmem_free(dh, sizeof(struct dbuf_hold_impl_data) *
|
||||||
|
DBUF_HOLD_IMPL_MAX_DEPTH);
|
||||||
|
|
||||||
|
return (error);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
__dbuf_hold_impl_init(struct dbuf_hold_impl_data *dh,
|
||||||
|
dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
|
||||||
|
void *tag, dmu_buf_impl_t **dbp, int depth)
|
||||||
|
{
|
||||||
|
dh->dh_dn = dn;
|
||||||
|
dh->dh_level = level;
|
||||||
|
dh->dh_blkid = blkid;
|
||||||
|
dh->dh_fail_sparse = fail_sparse;
|
||||||
|
dh->dh_tag = tag;
|
||||||
|
dh->dh_dbp = dbp;
|
||||||
|
dh->dh_depth = depth;
|
||||||
|
}
|
||||||
|
|
||||||
dmu_buf_impl_t *
|
dmu_buf_impl_t *
|
||||||
dbuf_hold(dnode_t *dn, uint64_t blkid, void *tag)
|
dbuf_hold(dnode_t *dn, uint64_t blkid, void *tag)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue