From 88904bb3e3f4a385108343aee1ac7ee0d83e25dc Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Sat, 13 Sep 2014 16:02:18 +0200 Subject: [PATCH] Illumos 5162 - zfs recv should use loaned arc buffer to avoid copy 5162 zfs recv should use loaned arc buffer to avoid copy Reviewed by: Christopher Siden Reviewed by: George Wilson Reviewed by: Bayard Bell Reviewed by: Richard Elling Approved by: Garrett D'Amore References: https://www.illumos.org/issues/5162 https://github.com/illumos/illumos-gate/commit/8a90470 Porting notes: Fix spelling error 's/arena/area/' in dmu.c. In restore_write() declare bonus and abuf at the top of the function. Ported by: Turbo Fredriksson Signed-off-by: Brian Behlendorf Closes #2696 --- module/zfs/dmu.c | 9 ++++++++- module/zfs/dmu_send.c | 47 +++++++++++++++++++++++++++---------------- 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 81432b9bad..98defa3bbf 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1387,7 +1387,14 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, rw_exit(&dn->dn_struct_rwlock); DB_DNODE_EXIT(dbuf); - if (offset == db->db.db_offset && blksz == db->db.db_size) { + /* + * We can only assign if the offset is aligned, the arc buf is the + * same size as the dbuf, and the dbuf is not metadata. It + * can't be metadata because the loaned arc buf comes from the + * user-data kmem area. + */ + if (offset == db->db.db_offset && blksz == db->db.db_size && + DBUF_GET_BUFC_TYPE(db) == ARC_BUFC_DATA) { dbuf_assign_arcbuf(db, buf, tx); dbuf_rele(db, FTAG); } else { diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index feb089fd59..1f61368c5d 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -1206,11 +1206,13 @@ free_guid_map_onexit(void *arg) } static void * -restore_read(struct restorearg *ra, int len) +restore_read(struct restorearg *ra, int len, char *buf) { - void *rv; int done = 0; + if (buf == NULL) + buf = ra->buf; + /* some things will require 8-byte alignment, so everything must */ ASSERT0(len % 8); @@ -1218,7 +1220,7 @@ restore_read(struct restorearg *ra, int len) ssize_t resid; ra->err = vn_rdwr(UIO_READ, ra->vp, - (caddr_t)ra->buf + done, len - done, + buf + done, len - done, ra->voff, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); @@ -1231,12 +1233,11 @@ restore_read(struct restorearg *ra, int len) } ASSERT3U(done, ==, len); - rv = ra->buf; if (ra->byteswap) - fletcher_4_incremental_byteswap(rv, len, &ra->cksum); + fletcher_4_incremental_byteswap(buf, len, &ra->cksum); else - fletcher_4_incremental_native(rv, len, &ra->cksum); - return (rv); + fletcher_4_incremental_native(buf, len, &ra->cksum); + return (buf); } noinline static void @@ -1367,7 +1368,7 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT; if (drro->drr_bonuslen) { - data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8)); + data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8), NULL); if (ra->err != 0) return (ra->err); } @@ -1473,6 +1474,8 @@ restore_write(struct restorearg *ra, objset_t *os, struct drr_write *drrw) { dmu_tx_t *tx; + dmu_buf_t *bonus; + arc_buf_t *abuf; void *data; int err; @@ -1480,19 +1483,29 @@ restore_write(struct restorearg *ra, objset_t *os, !DMU_OT_IS_VALID(drrw->drr_type)) return (SET_ERROR(EINVAL)); - data = restore_read(ra, drrw->drr_length); - if (data == NULL) - return (ra->err); - if (dmu_object_info(os, drrw->drr_object, NULL) != 0) return (SET_ERROR(EINVAL)); + if (dmu_bonus_hold(os, drrw->drr_object, FTAG, &bonus) != 0) + return (SET_ERROR(EINVAL)); + + abuf = dmu_request_arcbuf(bonus, drrw->drr_length); + + data = restore_read(ra, drrw->drr_length, abuf->b_data); + if (data == NULL) { + dmu_return_arcbuf(abuf); + dmu_buf_rele(bonus, FTAG); + return (ra->err); + } + tx = dmu_tx_create(os); dmu_tx_hold_write(tx, drrw->drr_object, drrw->drr_offset, drrw->drr_length); err = dmu_tx_assign(tx, TXG_WAIT); if (err != 0) { + dmu_return_arcbuf(abuf); + dmu_buf_rele(bonus, FTAG); dmu_tx_abort(tx); return (err); } @@ -1501,9 +1514,9 @@ restore_write(struct restorearg *ra, objset_t *os, DMU_OT_BYTESWAP(drrw->drr_type); dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length); } - dmu_write(os, drrw->drr_object, - drrw->drr_offset, drrw->drr_length, data, tx); + dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx); dmu_tx_commit(tx); + dmu_buf_rele(bonus, FTAG); return (0); } @@ -1585,7 +1598,7 @@ restore_write_embedded(struct restorearg *ra, objset_t *os, if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS) return (EINVAL); - data = restore_read(ra, P2ROUNDUP(drrwnp->drr_psize, 8)); + data = restore_read(ra, P2ROUNDUP(drrwnp->drr_psize, 8), NULL); if (data == NULL) return (ra->err); @@ -1620,7 +1633,7 @@ restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) drrs->drr_length > SPA_MAXBLOCKSIZE) return (SET_ERROR(EINVAL)); - data = restore_read(ra, drrs->drr_length); + data = restore_read(ra, drrs->drr_length, NULL); if (data == NULL) return (ra->err); @@ -1761,7 +1774,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, */ pcksum = ra.cksum; while (ra.err == 0 && - NULL != (drr = restore_read(&ra, sizeof (*drr)))) { + NULL != (drr = restore_read(&ra, sizeof (*drr), NULL))) { if (issig(JUSTLOOKING) && issig(FORREAL)) { ra.err = SET_ERROR(EINTR); goto out;