Add explicit prefetches to bpobj_iterate().
To simplify error handling bpobj_iterate_blkptrs() iterates through the list of block pointers backwards. Unfortunately speculative prefetcher is currently unable to detect such patterns, that makes each block read there synchronous and very slow on HDD pools. According to my tests, added explicit prefetch reduces time needed to asynchronously delete 8 snapshots of 4 million blocks each from 20 seconds to less than one, that should free sync thread for other useful work, such as async writes, scrub, etc. While there, plug one memory leak in case of bpobj_open() error and harmonize some variable names. Reviewed-by: Allan Jude <allan@klarasystems.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Closes #15071
This commit is contained in:
parent
6fd87e1d8d
commit
28430b51e3
|
@ -60,7 +60,7 @@ typedef struct bpobj {
|
|||
kmutex_t bpo_lock;
|
||||
objset_t *bpo_os;
|
||||
uint64_t bpo_object;
|
||||
int bpo_epb;
|
||||
uint32_t bpo_epb;
|
||||
uint8_t bpo_havecomp;
|
||||
uint8_t bpo_havesubobj;
|
||||
uint8_t bpo_havefreed;
|
||||
|
|
|
@ -284,7 +284,17 @@ bpobj_iterate_blkptrs(bpobj_info_t *bpi, bpobj_itor_t func, void *arg,
|
|||
dmu_buf_t *dbuf = NULL;
|
||||
bpobj_t *bpo = bpi->bpi_bpo;
|
||||
|
||||
for (int64_t i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= start; i--) {
|
||||
int64_t i = bpo->bpo_phys->bpo_num_blkptrs - 1;
|
||||
uint64_t pe = P2ALIGN_TYPED(i, bpo->bpo_epb, uint64_t) *
|
||||
sizeof (blkptr_t);
|
||||
uint64_t ps = start * sizeof (blkptr_t);
|
||||
uint64_t pb = MAX((pe > dmu_prefetch_max) ? pe - dmu_prefetch_max : 0,
|
||||
ps);
|
||||
if (pe > pb) {
|
||||
dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0, pb, pe - pb,
|
||||
ZIO_PRIORITY_ASYNC_READ);
|
||||
}
|
||||
for (; i >= start; i--) {
|
||||
uint64_t offset = i * sizeof (blkptr_t);
|
||||
uint64_t blkoff = P2PHASE(i, bpo->bpo_epb);
|
||||
|
||||
|
@ -292,9 +302,16 @@ bpobj_iterate_blkptrs(bpobj_info_t *bpi, bpobj_itor_t func, void *arg,
|
|||
if (dbuf)
|
||||
dmu_buf_rele(dbuf, FTAG);
|
||||
err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object,
|
||||
offset, FTAG, &dbuf, 0);
|
||||
offset, FTAG, &dbuf, DMU_READ_NO_PREFETCH);
|
||||
if (err)
|
||||
break;
|
||||
pe = pb;
|
||||
pb = MAX((dbuf->db_offset > dmu_prefetch_max) ?
|
||||
dbuf->db_offset - dmu_prefetch_max : 0, ps);
|
||||
if (pe > pb) {
|
||||
dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0,
|
||||
pb, pe - pb, ZIO_PRIORITY_ASYNC_READ);
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT3U(offset, >=, dbuf->db_offset);
|
||||
|
@ -466,22 +483,30 @@ bpobj_iterate_impl(bpobj_t *initial_bpo, bpobj_itor_t func, void *arg,
|
|||
int64_t i = bpi->bpi_unprocessed_subobjs - 1;
|
||||
uint64_t offset = i * sizeof (uint64_t);
|
||||
|
||||
uint64_t obj_from_sublist;
|
||||
uint64_t subobj;
|
||||
err = dmu_read(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
|
||||
offset, sizeof (uint64_t), &obj_from_sublist,
|
||||
DMU_READ_PREFETCH);
|
||||
offset, sizeof (uint64_t), &subobj,
|
||||
DMU_READ_NO_PREFETCH);
|
||||
if (err)
|
||||
break;
|
||||
bpobj_t *sublist = kmem_alloc(sizeof (bpobj_t),
|
||||
|
||||
bpobj_t *subbpo = kmem_alloc(sizeof (bpobj_t),
|
||||
KM_SLEEP);
|
||||
|
||||
err = bpobj_open(sublist, bpo->bpo_os,
|
||||
obj_from_sublist);
|
||||
if (err)
|
||||
err = bpobj_open(subbpo, bpo->bpo_os, subobj);
|
||||
if (err) {
|
||||
kmem_free(subbpo, sizeof (bpobj_t));
|
||||
break;
|
||||
}
|
||||
|
||||
list_insert_head(&stack, bpi_alloc(sublist, bpi, i));
|
||||
mutex_enter(&sublist->bpo_lock);
|
||||
if (subbpo->bpo_havesubobj &&
|
||||
subbpo->bpo_phys->bpo_subobjs != 0) {
|
||||
dmu_prefetch(subbpo->bpo_os,
|
||||
subbpo->bpo_phys->bpo_subobjs, 0, 0, 0,
|
||||
ZIO_PRIORITY_ASYNC_READ);
|
||||
}
|
||||
|
||||
list_insert_head(&stack, bpi_alloc(subbpo, bpi, i));
|
||||
mutex_enter(&subbpo->bpo_lock);
|
||||
bpi->bpi_unprocessed_subobjs--;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue