From 48669030fe42e5bdcab8e129c6521cf8423bd57f Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 2 Aug 2010 13:56:27 -0700 Subject: [PATCH 01/11] Reduce stack in vdev_cache_read Moving the vdev_cache_entry_t struct ve_search from the stack to the heap saves ~100 bytes. --- .topdeps | 1 + .topmsg | 7 +++++++ module/zfs/vdev_cache.c | 8 +++++--- 3 files changed, 13 insertions(+), 3 deletions(-) create mode 100644 .topdeps create mode 100644 .topmsg diff --git a/.topdeps b/.topdeps new file mode 100644 index 0000000000..1f7391f92b --- /dev/null +++ b/.topdeps @@ -0,0 +1 @@ +master diff --git a/.topmsg b/.topmsg new file mode 100644 index 0000000000..b848e77746 --- /dev/null +++ b/.topmsg @@ -0,0 +1,7 @@ +From: Brian Behlendorf +Subject: [PATCH] fix stack vdev_cache_read + +Moving the vdev_cache_entry_t struct ve_search from the stack to +the heap saves ~100 bytes. + +Signed-off-by: Brian Behlendorf diff --git a/module/zfs/vdev_cache.c b/module/zfs/vdev_cache.c index 688d541344..b8d19a146f 100644 --- a/module/zfs/vdev_cache.c +++ b/module/zfs/vdev_cache.c @@ -244,7 +244,7 @@ int vdev_cache_read(zio_t *zio) { vdev_cache_t *vc = &zio->io_vd->vdev_cache; - vdev_cache_entry_t *ve, ve_search; + vdev_cache_entry_t *ve, *ve_search; uint64_t cache_offset = P2ALIGN(zio->io_offset, VCBS); uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS); zio_t *fio; @@ -267,8 +267,10 @@ vdev_cache_read(zio_t *zio) mutex_enter(&vc->vc_lock); - ve_search.ve_offset = cache_offset; - ve = avl_find(&vc->vc_offset_tree, &ve_search, NULL); + ve_search = kmem_alloc(sizeof(vdev_cache_entry_t), KM_SLEEP); + ve_search->ve_offset = cache_offset; + ve = avl_find(&vc->vc_offset_tree, ve_search, NULL); + kmem_free(ve_search, sizeof(vdev_cache_entry_t)); if (ve != NULL) { if (ve->ve_missed_update) { From 530190960add8f4026c1f4c1d5b814fcb46c5414 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 2 Aug 2010 13:57:36 -0700 Subject: [PATCH 02/11] New TopGit dependency: fix-stack-vdev_cache_read --- .topdeps | 1 + 1 file changed, 1 insertion(+) diff --git a/.topdeps b/.topdeps index dd74283adf..4205d11a81 100644 --- a/.topdeps +++ b/.topdeps @@ -24,3 +24,4 @@ fix-stack-traverse_impl fix-stack-traverse_visitbp fix-stack-ztest fix-stack-inline +fix-stack-vdev_cache_read From df8e5bf1ca5b980388bdb8dc4b38d02db3f6e863 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 2 Aug 2010 13:58:47 -0700 Subject: [PATCH 03/11] Initial commit for fix-stack-zio_done topic branch --- .topdeps | 1 + .topmsg | 9 +++++++++ 2 files changed, 10 insertions(+) create mode 100644 .topdeps create mode 100644 .topmsg diff --git a/.topdeps b/.topdeps new file mode 100644 index 0000000000..1f7391f92b --- /dev/null +++ b/.topdeps @@ -0,0 +1 @@ +master diff --git a/.topmsg b/.topmsg new file mode 100644 index 0000000000..e6d9a28098 --- /dev/null +++ b/.topmsg @@ -0,0 +1,9 @@ +From: Brian Behlendorf +Subject: [PATCH] fix stack zio_done + +Eliminated local variables pointing to members of the zio struct. +Just refer to the struct members directly. This saved about 32 bytes per +call, but this function can be called recurisvely up to 19 levels deep, +so we potentially save up to 608 bytes. + +Signed-off-by: Brian Behlendorf From d5658668e043e7dd849904b95cb182cf6a381b94 Mon Sep 17 00:00:00 2001 From: Ned Bass Date: Mon, 2 Aug 2010 14:05:49 -0700 Subject: [PATCH 04/11] Reduce stack usage of zio_done Eliminated local variables pointing to members of the zio struct. Just refer to the struct members directly. This saved about 32 bytes per call, but this function can be called recurisvely up to 19 levels deep, so we potentially save up to 608 bytes. Signed-off-by: Brian Behlendorf --- module/zfs/zio.c | 63 +++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 88d80af4e9..3a65c480a8 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -2629,11 +2629,6 @@ zio_ready(zio_t *zio) static int zio_done(zio_t *zio) { - spa_t *spa = zio->io_spa; - zio_t *lio = zio->io_logical; - blkptr_t *bp = zio->io_bp; - vdev_t *vd = zio->io_vd; - uint64_t psize = zio->io_size; zio_t *pio, *pio_next; /* @@ -2650,18 +2645,18 @@ zio_done(zio_t *zio) for (int w = 0; w < ZIO_WAIT_TYPES; w++) ASSERT(zio->io_children[c][w] == 0); - if (bp != NULL) { - ASSERT(bp->blk_pad[0] == 0); - ASSERT(bp->blk_pad[1] == 0); - ASSERT(bcmp(bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0 || - (bp == zio_unique_parent(zio)->io_bp)); - if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(bp) && + if (zio->io_bp != NULL) { + ASSERT(zio->io_bp->blk_pad[0] == 0); + ASSERT(zio->io_bp->blk_pad[1] == 0); + ASSERT(bcmp(zio->io_bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0 || + (zio->io_bp == zio_unique_parent(zio)->io_bp)); + if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(zio->io_bp) && zio->io_bp_override == NULL && !(zio->io_flags & ZIO_FLAG_IO_REPAIR)) { - ASSERT(!BP_SHOULD_BYTESWAP(bp)); - ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(bp)); - ASSERT(BP_COUNT_GANG(bp) == 0 || - (BP_COUNT_GANG(bp) == BP_GET_NDVAS(bp))); + ASSERT(!BP_SHOULD_BYTESWAP(zio->io_bp)); + ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(zio->io_bp)); + ASSERT(BP_COUNT_GANG(zio->io_bp) == 0 || + (BP_COUNT_GANG(zio->io_bp) == BP_GET_NDVAS(zio->io_bp))); } } @@ -2680,13 +2675,13 @@ zio_done(zio_t *zio) while (zio->io_cksum_report != NULL) { zio_cksum_report_t *zcr = zio->io_cksum_report; uint64_t align = zcr->zcr_align; - uint64_t asize = P2ROUNDUP(psize, align); + uint64_t asize = P2ROUNDUP(zio->io_size, align); char *abuf = zio->io_data; - if (asize != psize) { + if (asize != zio->io_size) { abuf = zio_buf_alloc(asize); - bcopy(zio->io_data, abuf, psize); - bzero(abuf + psize, asize - psize); + bcopy(zio->io_data, abuf, zio->io_size); + bzero(abuf + zio->io_size, asize - zio->io_size); } zio->io_cksum_report = zcr->zcr_next; @@ -2694,14 +2689,14 @@ zio_done(zio_t *zio) zcr->zcr_finish(zcr, abuf); zfs_ereport_free_checksum(zcr); - if (asize != psize) + if (asize != zio->io_size) zio_buf_free(abuf, asize); } } zio_pop_transforms(zio); /* note: may set zio->io_error */ - vdev_stat_update(zio, psize); + vdev_stat_update(zio, zio->io_size); if (zio->io_error) { /* @@ -2710,28 +2705,30 @@ zio_done(zio_t *zio) * at the block level. We ignore these errors if the * device is currently unavailable. */ - if (zio->io_error != ECKSUM && vd != NULL && !vdev_is_dead(vd)) - zfs_ereport_post(FM_EREPORT_ZFS_IO, spa, vd, zio, 0, 0); + if (zio->io_error != ECKSUM && zio->io_vd != NULL && + !vdev_is_dead(zio->io_vd)) + zfs_ereport_post(FM_EREPORT_ZFS_IO, zio->io_spa, + zio->io_vd, zio, 0, 0); if ((zio->io_error == EIO || !(zio->io_flags & (ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) && - zio == lio) { + zio == zio->io_logical) { /* * For logical I/O requests, tell the SPA to log the * error and generate a logical data ereport. */ - spa_log_error(spa, zio); - zfs_ereport_post(FM_EREPORT_ZFS_DATA, spa, NULL, zio, + spa_log_error(zio->io_spa, zio); + zfs_ereport_post(FM_EREPORT_ZFS_DATA, zio->io_spa, NULL, zio, 0, 0); } } - if (zio->io_error && zio == lio) { + if (zio->io_error && zio == zio->io_logical) { /* * Determine whether zio should be reexecuted. This will * propagate all the way to the root via zio_notify_parent(). */ - ASSERT(vd == NULL && bp != NULL); + ASSERT(zio->io_vd == NULL && zio->io_bp != NULL); ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); if (IO_IS_ALLOCATING(zio) && @@ -2745,8 +2742,8 @@ zio_done(zio_t *zio) if ((zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_FREE) && zio->io_error == ENXIO && - spa_load_state(spa) == SPA_LOAD_NONE && - spa_get_failmode(spa) != ZIO_FAILURE_MODE_CONTINUE) + spa_load_state(zio->io_spa) == SPA_LOAD_NONE && + spa_get_failmode(zio->io_spa) != ZIO_FAILURE_MODE_CONTINUE) zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND; if (!(zio->io_flags & ZIO_FLAG_CANFAIL) && !zio->io_reexecute) @@ -2772,7 +2769,7 @@ zio_done(zio_t *zio) if ((zio->io_error || zio->io_reexecute) && IO_IS_ALLOCATING(zio) && zio->io_gang_leader == zio && !(zio->io_flags & ZIO_FLAG_IO_REWRITE)) - zio_dva_unallocate(zio, zio->io_gang_tree, bp); + zio_dva_unallocate(zio, zio->io_gang_tree, zio->io_bp); zio_gang_tree_free(&zio->io_gang_tree); @@ -2837,14 +2834,14 @@ zio_done(zio_t *zio) * We'd fail again if we reexecuted now, so suspend * until conditions improve (e.g. device comes online). */ - zio_suspend(spa, zio); + zio_suspend(zio->io_spa, zio); } else { /* * Reexecution is potentially a huge amount of work. * Hand it off to the otherwise-unused claim taskq. */ (void) taskq_dispatch( - spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE], + zio->io_spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE], (task_func_t *)zio_reexecute, zio, TQ_SLEEP); } return (ZIO_PIPELINE_STOP); From da40cc145434c7626b2bfe14a192b12978b2f9c9 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 2 Aug 2010 14:06:29 -0700 Subject: [PATCH 05/11] New TopGit dependency: fix-stack-zio_done --- .topdeps | 1 + 1 file changed, 1 insertion(+) diff --git a/.topdeps b/.topdeps index 4205d11a81..bab699e790 100644 --- a/.topdeps +++ b/.topdeps @@ -25,3 +25,4 @@ fix-stack-traverse_visitbp fix-stack-ztest fix-stack-inline fix-stack-vdev_cache_read +fix-stack-zio_done From 911d2e0df426dc7c1ffffb4e19b20709110160c2 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 2 Aug 2010 14:08:36 -0700 Subject: [PATCH 06/11] Initial commit for fix-stack-dsl_scan_visitbp topic branch --- .topdeps | 1 + .topmsg | 7 +++++++ 2 files changed, 8 insertions(+) create mode 100644 .topdeps create mode 100644 .topmsg diff --git a/.topdeps b/.topdeps new file mode 100644 index 0000000000..1f7391f92b --- /dev/null +++ b/.topdeps @@ -0,0 +1 @@ +master diff --git a/.topmsg b/.topmsg new file mode 100644 index 0000000000..9f7c787aab --- /dev/null +++ b/.topmsg @@ -0,0 +1,7 @@ +From: Brian Behlendorf +Subject: [PATCH] fix stack dsl_scan_visitbp + +To reduce stack overhead this topic branch moves the 128 byte +blkptr_t data strucutre in dsl_scan_visitbp() to the heap. + +Signed-off-by: Brian Behlendorf From 410d8c4e1531fca810da0080ce5b19278f26a582 Mon Sep 17 00:00:00 2001 From: Ned Bass Date: Thu, 29 Jul 2010 18:30:39 -0700 Subject: [PATCH 07/11] Reduce dsl_scan_visitbp stack usage by moving blkptr_t to heap Github issue 22 reported a stack overrun when the zfs module was loaded, possibly related to the presence of existing zpools created under zfs-fuse. The stack trace showed 9 levels of recursion between dsl_scan_visitbp() and dsl_scan_recurse(). To reduce stack overhead in that code path, this commit moves the 128 byte blkptr_t data strucutre in dsl_scan_visitbp() to the heap. Signed-off-by: Brian Behlendorf --- module/zfs/dsl_scan.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 23c37c7ccf..14e61a3632 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -789,18 +789,21 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb, { dsl_pool_t *dp = scn->scn_dp; arc_buf_t *buf = NULL; - blkptr_t bp_toread = *bp; + blkptr_t *bp_toread; + + bp_toread = kmem_alloc(sizeof (blkptr_t), KM_SLEEP); + *bp_toread = *bp; /* ASSERT(pbuf == NULL || arc_released(pbuf)); */ if (dsl_scan_check_pause(scn, zb)) - return; + goto out; if (dsl_scan_check_resume(scn, dnp, zb)) - return; + goto out; if (bp->blk_birth == 0) - return; + goto out; scn->scn_visited_this_txg++; @@ -811,7 +814,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb, pbuf, bp); if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) - return; + goto out; if (BP_GET_TYPE(bp) != DMU_OT_USERGROUP_USED) { /* @@ -826,12 +829,12 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb, * it (original untranslated -> translations from * deleted snap -> now). */ - bp_toread = *bp; + *bp_toread = *bp; } - if (dsl_scan_recurse(scn, ds, ostype, dnp, &bp_toread, zb, tx, + if (dsl_scan_recurse(scn, ds, ostype, dnp, bp_toread, zb, tx, &buf) != 0) - return; + goto out; /* * If dsl_scan_ddt() has aready visited this block, it will have @@ -841,7 +844,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb, if (ddt_class_contains(dp->dp_spa, scn->scn_phys.scn_ddt_class_max, bp)) { ASSERT(buf == NULL); - return; + goto out; } /* @@ -856,6 +859,8 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb, } if (buf) (void) arc_buf_remove_ref(buf, &buf); +out: + kmem_free(bp_toread, sizeof(blkptr_t)); } static void From 0c6f448b57ac92d77c2e2c0c83180b2e992a3b55 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 2 Aug 2010 14:09:37 -0700 Subject: [PATCH 08/11] New TopGit dependency: fix-stack-dsl_scan_visitbp --- .topdeps | 1 + 1 file changed, 1 insertion(+) diff --git a/.topdeps b/.topdeps index bab699e790..3647ea46a0 100644 --- a/.topdeps +++ b/.topdeps @@ -26,3 +26,4 @@ fix-stack-ztest fix-stack-inline fix-stack-vdev_cache_read fix-stack-zio_done +fix-stack-dsl_scan_visitbp From c7786edbf01642bca1b97283d766309598eced24 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 2 Aug 2010 14:10:28 -0700 Subject: [PATCH 09/11] Initial commit for fix-stack-dbuf_hold_impl topic branch --- .topdeps | 1 + .topmsg | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 .topdeps create mode 100644 .topmsg diff --git a/.topdeps b/.topdeps new file mode 100644 index 0000000000..1f7391f92b --- /dev/null +++ b/.topdeps @@ -0,0 +1 @@ +master diff --git a/.topmsg b/.topmsg new file mode 100644 index 0000000000..b663e0025a --- /dev/null +++ b/.topmsg @@ -0,0 +1,11 @@ +From: Brian Behlendorf +Subject: [PATCH] fix stack dbuf_hold_impl + +This commit preserves the recursive function dbuf_hold_impl() but moves +the local variables and function arguments to the heap to minimize +the stack frame size. Enough space is initially allocated on the +stack for 20 levels of recursion. This technique was based on commit +34229a2f2ac07363f64ddd63e014964fff2f0671 which reduced stack usage of +traverse_visitbp(). + +Signed-off-by: Brian Behlendorf From 4d3fc5711202a7d13538b662190807617ae2df65 Mon Sep 17 00:00:00 2001 From: Ned Bass Date: Fri, 30 Jul 2010 16:06:41 -0700 Subject: [PATCH 10/11] Reduce stack usage for recursive dbuf_hold_impl() This commit preserves the recursive function dbuf_hold_impl() but moves the local variables and function arguments to the heap to minimize the stack frame size. Enough space is initially allocated on the stack for 20 levels of recursion. This technique was based on commit 34229a2f2ac07363f64ddd63e014964fff2f0671 which reduced stack usage of traverse_visitbp(). dbuf_hold_impl() is mutually recursive with dbuf_findbp(), but the latter function is also called from other functions. Therefore dbuf_findbp() must contain logic to determine how to call dbuf_hold_impl(). To this end, dbuf_hold_impl() now takes a struct dbuf_hold_impl_data pointer as an argument. If that argument is NULL it calls dbuf_hold_impl() as before, otherwise it calls __debuf_hold_impl() with a single dbuf_hold_impl_data pointer argument. As the name implies, dbuf_hold_impl_data stores the arguments and local variables for dbuf_hold_impl(). Signed-off-by: Brian Behlendorf --- module/zfs/dbuf.c | 192 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 134 insertions(+), 58 deletions(-) diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 42ae439972..b023fdd766 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -36,6 +36,29 @@ #include #include +struct dbuf_hold_impl_data { + /* Function arguments */ + dnode_t *dh_dn; + uint8_t dh_level; + uint64_t dh_blkid; + int dh_fail_sparse; + void *dh_tag; + dmu_buf_impl_t **dh_dbp; + /* Local variables */ + dmu_buf_impl_t *dh_db; + dmu_buf_impl_t *dh_parent; + blkptr_t *dh_bp; + int dh_err; + dbuf_dirty_record_t *dh_dr; + arc_buf_contents_t dh_type; + int dh_depth; +}; + +static void __dbuf_hold_impl_init(struct dbuf_hold_impl_data *dh, + dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse, + void *tag, dmu_buf_impl_t **dbp, int depth); +static int __dbuf_hold_impl(struct dbuf_hold_impl_data *dh); + static void dbuf_destroy(dmu_buf_impl_t *db); static int dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx); static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx); @@ -1492,7 +1515,7 @@ dbuf_clear(dmu_buf_impl_t *db) static int dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse, - dmu_buf_impl_t **parentp, blkptr_t **bpp) + dmu_buf_impl_t **parentp, blkptr_t **bpp, struct dbuf_hold_impl_data *dh) { int nlevels, epbs; @@ -1529,8 +1552,17 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse, return (ENOENT); } else if (level < nlevels-1) { /* this block is referenced from an indirect block */ - int err = dbuf_hold_impl(dn, level+1, - blkid >> epbs, fail_sparse, NULL, parentp); + int err; + if (dh == NULL) { + err = dbuf_hold_impl(dn, level+1, blkid >> epbs, + fail_sparse, NULL, parentp); + } + else { + __dbuf_hold_impl_init(dh + 1, dn, dh->dh_level + 1, + blkid >> epbs, fail_sparse, NULL, + parentp, dh->dh_depth + 1); + err = __dbuf_hold_impl(dh + 1); + } if (err) return (err); err = dbuf_read(*parentp, NULL, @@ -1723,7 +1755,7 @@ dbuf_prefetch(dnode_t *dn, uint64_t blkid) db = NULL; } - if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp) == 0) { + if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp, NULL) == 0) { if (bp && !BP_IS_HOLE(bp)) { int priority = dn->dn_type == DMU_OT_DDT_ZAP ? ZIO_PRIORITY_DDT_PREFETCH : ZIO_PRIORITY_ASYNC_READ; @@ -1750,98 +1782,142 @@ dbuf_prefetch(dnode_t *dn, uint64_t blkid) } } +#define DBUF_HOLD_IMPL_MAX_DEPTH 20 + /* * Returns with db_holds incremented, and db_mtx not held. * Note: dn_struct_rwlock must be held. */ -int -dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse, - void *tag, dmu_buf_impl_t **dbp) +static int +__dbuf_hold_impl(struct dbuf_hold_impl_data *dh) { - dmu_buf_impl_t *db, *parent = NULL; + ASSERT3S(dh->dh_depth, <, DBUF_HOLD_IMPL_MAX_DEPTH); + dh->dh_parent = NULL; - ASSERT(blkid != DMU_BONUS_BLKID); - ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); - ASSERT3U(dn->dn_nlevels, >, level); + ASSERT(dh->dh_blkid != DMU_BONUS_BLKID); + ASSERT(RW_LOCK_HELD(&dh->dh_dn->dn_struct_rwlock)); + ASSERT3U(dh->dh_dn->dn_nlevels, >, dh->dh_level); - *dbp = NULL; + *(dh->dh_dbp) = NULL; top: /* dbuf_find() returns with db_mtx held */ - db = dbuf_find(dn, level, blkid); + dh->dh_db = dbuf_find(dh->dh_dn, dh->dh_level, dh->dh_blkid); - if (db == NULL) { - blkptr_t *bp = NULL; - int err; + if (dh->dh_db == NULL) { + dh->dh_bp = NULL; - ASSERT3P(parent, ==, NULL); - err = dbuf_findbp(dn, level, blkid, fail_sparse, &parent, &bp); - if (fail_sparse) { - if (err == 0 && bp && BP_IS_HOLE(bp)) - err = ENOENT; - if (err) { - if (parent) - dbuf_rele(parent, NULL); - return (err); + ASSERT3P(dh->dh_parent, ==, NULL); + dh->dh_err = dbuf_findbp(dh->dh_dn, dh->dh_level, dh->dh_blkid, + dh->dh_fail_sparse, &dh->dh_parent, + &dh->dh_bp, dh); + if (dh->dh_fail_sparse) { + if (dh->dh_err == 0 && dh->dh_bp && BP_IS_HOLE(dh->dh_bp)) + dh->dh_err = ENOENT; + if (dh->dh_err) { + if (dh->dh_parent) + dbuf_rele(dh->dh_parent, NULL); + return (dh->dh_err); } } - if (err && err != ENOENT) - return (err); - db = dbuf_create(dn, level, blkid, parent, bp); + if (dh->dh_err && dh->dh_err != ENOENT) + return (dh->dh_err); + dh->dh_db = dbuf_create(dh->dh_dn, dh->dh_level, dh->dh_blkid, + dh->dh_parent, dh->dh_bp); } - if (db->db_buf && refcount_is_zero(&db->db_holds)) { - arc_buf_add_ref(db->db_buf, db); - if (db->db_buf->b_data == NULL) { - dbuf_clear(db); - if (parent) { - dbuf_rele(parent, NULL); - parent = NULL; + if (dh->dh_db->db_buf && refcount_is_zero(&dh->dh_db->db_holds)) { + arc_buf_add_ref(dh->dh_db->db_buf, dh->dh_db); + if (dh->dh_db->db_buf->b_data == NULL) { + dbuf_clear(dh->dh_db); + if (dh->dh_parent) { + dbuf_rele(dh->dh_parent, NULL); + dh->dh_parent = NULL; } goto top; } - ASSERT3P(db->db.db_data, ==, db->db_buf->b_data); + ASSERT3P(dh->dh_db->db.db_data, ==, dh->dh_db->db_buf->b_data); } - ASSERT(db->db_buf == NULL || arc_referenced(db->db_buf)); + ASSERT(dh->dh_db->db_buf == NULL || arc_referenced(dh->dh_db->db_buf)); /* * If this buffer is currently syncing out, and we are are * still referencing it from db_data, we need to make a copy * of it in case we decide we want to dirty it again in this txg. */ - if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID && - dn->dn_object != DMU_META_DNODE_OBJECT && - db->db_state == DB_CACHED && db->db_data_pending) { - dbuf_dirty_record_t *dr = db->db_data_pending; + if (dh->dh_db->db_level == 0 && + dh->dh_db->db_blkid != DMU_BONUS_BLKID && + dh->dh_dn->dn_object != DMU_META_DNODE_OBJECT && + dh->dh_db->db_state == DB_CACHED && dh->dh_db->db_data_pending) { + dh->dh_dr = dh->dh_db->db_data_pending; - if (dr->dt.dl.dr_data == db->db_buf) { - arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); + if (dh->dh_dr->dt.dl.dr_data == dh->dh_db->db_buf) { + dh->dh_type = DBUF_GET_BUFC_TYPE(dh->dh_db); - dbuf_set_data(db, - arc_buf_alloc(db->db_dnode->dn_objset->os_spa, - db->db.db_size, db, type)); - bcopy(dr->dt.dl.dr_data->b_data, db->db.db_data, - db->db.db_size); + dbuf_set_data(dh->dh_db, + arc_buf_alloc(dh->dh_db->db_dnode->dn_objset->os_spa, + dh->dh_db->db.db_size, dh->dh_db, dh->dh_type)); + bcopy(dh->dh_dr->dt.dl.dr_data->b_data, dh->dh_db->db.db_data, + dh->dh_db->db.db_size); } } - (void) refcount_add(&db->db_holds, tag); - dbuf_update_data(db); - DBUF_VERIFY(db); - mutex_exit(&db->db_mtx); + (void) refcount_add(&dh->dh_db->db_holds, dh->dh_tag); + dbuf_update_data(dh->dh_db); + DBUF_VERIFY(dh->dh_db); + mutex_exit(&dh->dh_db->db_mtx); /* NOTE: we can't rele the parent until after we drop the db_mtx */ - if (parent) - dbuf_rele(parent, NULL); + if (dh->dh_parent) + dbuf_rele(dh->dh_parent, NULL); - ASSERT3P(db->db_dnode, ==, dn); - ASSERT3U(db->db_blkid, ==, blkid); - ASSERT3U(db->db_level, ==, level); - *dbp = db; + ASSERT3P(dh->dh_db->db_dnode, ==, dh->dh_dn); + ASSERT3U(dh->dh_db->db_blkid, ==, dh->dh_blkid); + ASSERT3U(dh->dh_db->db_level, ==, dh->dh_level); + *(dh->dh_dbp) = dh->dh_db; return (0); } +/* + * The following code preserves the recursive function dbuf_hold_impl() + * but moves the local variables AND function arguments to the heap to + * minimize the stack frame size. Enough space is initially allocated + * on the stack for 20 levels of recursion. + */ +int +dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse, + void *tag, dmu_buf_impl_t **dbp) +{ + struct dbuf_hold_impl_data *dh; + int error; + + dh = kmem_zalloc(sizeof(struct dbuf_hold_impl_data) * + DBUF_HOLD_IMPL_MAX_DEPTH, KM_SLEEP); + __dbuf_hold_impl_init(dh, dn, level, blkid, fail_sparse, tag, dbp, 0); + + error = __dbuf_hold_impl(dh); + + kmem_free(dh, sizeof(struct dbuf_hold_impl_data) * + DBUF_HOLD_IMPL_MAX_DEPTH); + + return (error); +} + +static void +__dbuf_hold_impl_init(struct dbuf_hold_impl_data *dh, + dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse, + void *tag, dmu_buf_impl_t **dbp, int depth) +{ + dh->dh_dn = dn; + dh->dh_level = level; + dh->dh_blkid = blkid; + dh->dh_fail_sparse = fail_sparse; + dh->dh_tag = tag; + dh->dh_dbp = dbp; + dh->dh_depth = depth; +} + dmu_buf_impl_t * dbuf_hold(dnode_t *dn, uint64_t blkid, void *tag) { From f271befe26fb3c2137eb57fd060c9140a7440fda Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 2 Aug 2010 14:12:52 -0700 Subject: [PATCH 11/11] New TopGit dependency: fix-stack-dbuf_hold_impl --- .topdeps | 1 + 1 file changed, 1 insertion(+) diff --git a/.topdeps b/.topdeps index 3647ea46a0..762336325e 100644 --- a/.topdeps +++ b/.topdeps @@ -27,3 +27,4 @@ fix-stack-inline fix-stack-vdev_cache_read fix-stack-zio_done fix-stack-dsl_scan_visitbp +fix-stack-dbuf_hold_impl