Illumos 5162 - zfs recv should use loaned arc buffer to avoid copy

5162 zfs recv should use loaned arc buffer to avoid copy
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Bayard Bell <Bayard.Bell@nexenta.com>
Reviewed by: Richard Elling <richard.elling@gmail.com>
Approved by: Garrett D'Amore <garrett@damore.org>

References:
  https://www.illumos.org/issues/5162
  https://github.com/illumos/illumos-gate/commit/8a90470

Porting notes:
  Fix spelling error 's/arena/area/' in dmu.c.
  In restore_write() declare bonus and abuf at the top of the function.

Ported by: Turbo Fredriksson <turbo@bayour.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #2696
This commit is contained in:
Matthew Ahrens 2014-09-13 16:02:18 +02:00 committed by Brian Behlendorf
parent 4b20a6f509
commit 88904bb3e3
2 changed files with 38 additions and 18 deletions

View File

@ -1387,7 +1387,14 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
rw_exit(&dn->dn_struct_rwlock); rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(dbuf); DB_DNODE_EXIT(dbuf);
if (offset == db->db.db_offset && blksz == db->db.db_size) { /*
* We can only assign if the offset is aligned, the arc buf is the
* same size as the dbuf, and the dbuf is not metadata. It
* can't be metadata because the loaned arc buf comes from the
* user-data kmem area.
*/
if (offset == db->db.db_offset && blksz == db->db.db_size &&
DBUF_GET_BUFC_TYPE(db) == ARC_BUFC_DATA) {
dbuf_assign_arcbuf(db, buf, tx); dbuf_assign_arcbuf(db, buf, tx);
dbuf_rele(db, FTAG); dbuf_rele(db, FTAG);
} else { } else {

View File

@ -1206,11 +1206,13 @@ free_guid_map_onexit(void *arg)
} }
static void * static void *
restore_read(struct restorearg *ra, int len) restore_read(struct restorearg *ra, int len, char *buf)
{ {
void *rv;
int done = 0; int done = 0;
if (buf == NULL)
buf = ra->buf;
/* some things will require 8-byte alignment, so everything must */ /* some things will require 8-byte alignment, so everything must */
ASSERT0(len % 8); ASSERT0(len % 8);
@ -1218,7 +1220,7 @@ restore_read(struct restorearg *ra, int len)
ssize_t resid; ssize_t resid;
ra->err = vn_rdwr(UIO_READ, ra->vp, ra->err = vn_rdwr(UIO_READ, ra->vp,
(caddr_t)ra->buf + done, len - done, buf + done, len - done,
ra->voff, UIO_SYSSPACE, FAPPEND, ra->voff, UIO_SYSSPACE, FAPPEND,
RLIM64_INFINITY, CRED(), &resid); RLIM64_INFINITY, CRED(), &resid);
@ -1231,12 +1233,11 @@ restore_read(struct restorearg *ra, int len)
} }
ASSERT3U(done, ==, len); ASSERT3U(done, ==, len);
rv = ra->buf;
if (ra->byteswap) if (ra->byteswap)
fletcher_4_incremental_byteswap(rv, len, &ra->cksum); fletcher_4_incremental_byteswap(buf, len, &ra->cksum);
else else
fletcher_4_incremental_native(rv, len, &ra->cksum); fletcher_4_incremental_native(buf, len, &ra->cksum);
return (rv); return (buf);
} }
noinline static void noinline static void
@ -1367,7 +1368,7 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT; object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT;
if (drro->drr_bonuslen) { if (drro->drr_bonuslen) {
data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8)); data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8), NULL);
if (ra->err != 0) if (ra->err != 0)
return (ra->err); return (ra->err);
} }
@ -1473,6 +1474,8 @@ restore_write(struct restorearg *ra, objset_t *os,
struct drr_write *drrw) struct drr_write *drrw)
{ {
dmu_tx_t *tx; dmu_tx_t *tx;
dmu_buf_t *bonus;
arc_buf_t *abuf;
void *data; void *data;
int err; int err;
@ -1480,19 +1483,29 @@ restore_write(struct restorearg *ra, objset_t *os,
!DMU_OT_IS_VALID(drrw->drr_type)) !DMU_OT_IS_VALID(drrw->drr_type))
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
data = restore_read(ra, drrw->drr_length);
if (data == NULL)
return (ra->err);
if (dmu_object_info(os, drrw->drr_object, NULL) != 0) if (dmu_object_info(os, drrw->drr_object, NULL) != 0)
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
if (dmu_bonus_hold(os, drrw->drr_object, FTAG, &bonus) != 0)
return (SET_ERROR(EINVAL));
abuf = dmu_request_arcbuf(bonus, drrw->drr_length);
data = restore_read(ra, drrw->drr_length, abuf->b_data);
if (data == NULL) {
dmu_return_arcbuf(abuf);
dmu_buf_rele(bonus, FTAG);
return (ra->err);
}
tx = dmu_tx_create(os); tx = dmu_tx_create(os);
dmu_tx_hold_write(tx, drrw->drr_object, dmu_tx_hold_write(tx, drrw->drr_object,
drrw->drr_offset, drrw->drr_length); drrw->drr_offset, drrw->drr_length);
err = dmu_tx_assign(tx, TXG_WAIT); err = dmu_tx_assign(tx, TXG_WAIT);
if (err != 0) { if (err != 0) {
dmu_return_arcbuf(abuf);
dmu_buf_rele(bonus, FTAG);
dmu_tx_abort(tx); dmu_tx_abort(tx);
return (err); return (err);
} }
@ -1501,9 +1514,9 @@ restore_write(struct restorearg *ra, objset_t *os,
DMU_OT_BYTESWAP(drrw->drr_type); DMU_OT_BYTESWAP(drrw->drr_type);
dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length); dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length);
} }
dmu_write(os, drrw->drr_object, dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx);
drrw->drr_offset, drrw->drr_length, data, tx);
dmu_tx_commit(tx); dmu_tx_commit(tx);
dmu_buf_rele(bonus, FTAG);
return (0); return (0);
} }
@ -1585,7 +1598,7 @@ restore_write_embedded(struct restorearg *ra, objset_t *os,
if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS) if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS)
return (EINVAL); return (EINVAL);
data = restore_read(ra, P2ROUNDUP(drrwnp->drr_psize, 8)); data = restore_read(ra, P2ROUNDUP(drrwnp->drr_psize, 8), NULL);
if (data == NULL) if (data == NULL)
return (ra->err); return (ra->err);
@ -1620,7 +1633,7 @@ restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs)
drrs->drr_length > SPA_MAXBLOCKSIZE) drrs->drr_length > SPA_MAXBLOCKSIZE)
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
data = restore_read(ra, drrs->drr_length); data = restore_read(ra, drrs->drr_length, NULL);
if (data == NULL) if (data == NULL)
return (ra->err); return (ra->err);
@ -1761,7 +1774,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
*/ */
pcksum = ra.cksum; pcksum = ra.cksum;
while (ra.err == 0 && while (ra.err == 0 &&
NULL != (drr = restore_read(&ra, sizeof (*drr)))) { NULL != (drr = restore_read(&ra, sizeof (*drr), NULL))) {
if (issig(JUSTLOOKING) && issig(FORREAL)) { if (issig(JUSTLOOKING) && issig(FORREAL)) {
ra.err = SET_ERROR(EINTR); ra.err = SET_ERROR(EINTR);
goto out; goto out;