Illumos 5765 - add support for estimating send stream size with lzc_send_space when source is a bookmark

5765 add support for estimating send stream size with lzc_send_space when source is a bookmark
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Steven Hartland <killing@multiplay.co.uk>
Reviewed by: Bayard Bell <buffer.g.overflow@gmail.com>
Approved by: Albert Lee <trisk@nexenta.com>

References:
  https://www.illumos.org/issues/5765
  https://github.com/illumos/illumos-gate/commit/643da460

Porting notes:
* Unused variable 'recordsize' in dmu_send_estimate() dropped

Ported-by: DHE <git@dehacked.net>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3397
This commit is contained in:
Max Grossman 2015-04-08 11:37:13 -07:00 committed by Brian Behlendorf
parent 19b3b1d2a2
commit 5dc8b7365f
5 changed files with 150 additions and 43 deletions

View File

@ -21,7 +21,7 @@
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/ */
@ -42,6 +42,8 @@ int dmu_send(const char *tosnap, const char *fromsnap,
int outfd, struct vnode *vp, offset_t *off); int outfd, struct vnode *vp, offset_t *off);
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds, int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
uint64_t *sizep); uint64_t *sizep);
int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg,
uint64_t *sizep);
int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
boolean_t embedok, boolean_t large_block_ok, boolean_t embedok, boolean_t large_block_ok,
int outfd, struct vnode *vp, offset_t *off); int outfd, struct vnode *vp, offset_t *off);

View File

@ -201,6 +201,9 @@ dsl_dataset_phys(dsl_dataset_t *ds)
*/ */
#define MAX_TAG_PREFIX_LEN 17 #define MAX_TAG_PREFIX_LEN 17
#define dsl_dataset_is_snapshot(ds) \
(dsl_dataset_phys(ds)->ds_num_children != 0)
#define DS_UNIQUE_IS_ACCURATE(ds) \ #define DS_UNIQUE_IS_ACCURATE(ds) \
((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0) ((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)

View File

@ -20,7 +20,7 @@
*/ */
/* /*
* Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved.
*/ */
@ -485,18 +485,30 @@ lzc_send(const char *snapname, const char *from, int fd,
} }
/* /*
* If fromsnap is NULL, a full (non-incremental) stream will be estimated. * "from" can be NULL, a snapshot, or a bookmark.
*
* If from is NULL, a full (non-incremental) stream will be estimated. This
* is calculated very efficiently.
*
* If from is a snapshot, lzc_send_space uses the deadlists attached to
* each snapshot to efficiently estimate the stream size.
*
* If from is a bookmark, the indirect blocks in the destination snapshot
* are traversed, looking for blocks with a birth time since the creation TXG of
* the snapshot this bookmark was created from. This will result in
* significantly more I/O and be less efficient than a send space estimation on
* an equivalent snapshot.
*/ */
int int
lzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep) lzc_send_space(const char *snapname, const char *from, uint64_t *spacep)
{ {
nvlist_t *args; nvlist_t *args;
nvlist_t *result; nvlist_t *result;
int err; int err;
args = fnvlist_alloc(); args = fnvlist_alloc();
if (fromsnap != NULL) if (from != NULL)
fnvlist_add_string(args, "fromsnap", fromsnap); fnvlist_add_string(args, "from", from);
err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result); err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
nvlist_free(args); nvlist_free(args);
if (err == 0) if (err == 0)

View File

@ -836,11 +836,45 @@ dmu_send(const char *tosnap, const char *fromsnap,
return (err); return (err);
} }
static int
dmu_adjust_send_estimate_for_indirects(dsl_dataset_t *ds, uint64_t size,
uint64_t *sizep)
{
int err;
/*
* Assume that space (both on-disk and in-stream) is dominated by
* data. We will adjust for indirect blocks and the copies property,
* but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
*/
/*
* Subtract out approximate space used by indirect blocks.
* Assume most space is used by data blocks (non-indirect, non-dnode).
* Assume all blocks are recordsize. Assume ditto blocks and
* internal fragmentation counter out compression.
*
* Therefore, space used by indirect blocks is sizeof(blkptr_t) per
* block, which we observe in practice.
*/
uint64_t recordsize;
err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
if (err != 0)
return (err);
size -= size / recordsize * sizeof (blkptr_t);
/* Add in the space for the record associated with each block. */
size += size / recordsize * sizeof (dmu_replay_record_t);
*sizep = size;
return (0);
}
int int
dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep) dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
{ {
int err; int err;
uint64_t size, recordsize; uint64_t size;
ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool); ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool);
ASSERT(dsl_pool_config_held(dp)); ASSERT(dsl_pool_config_held(dp));
@ -867,34 +901,62 @@ dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
return (err); return (err);
} }
/* err = dmu_adjust_send_estimate_for_indirects(ds, size, sizep);
* Assume that space (both on-disk and in-stream) is dominated by
* data. We will adjust for indirect blocks and the copies property,
* but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
*/
/*
* Subtract out approximate space used by indirect blocks.
* Assume most space is used by data blocks (non-indirect, non-dnode).
* Assume all blocks are recordsize. Assume ditto blocks and
* internal fragmentation counter out compression.
*
* Therefore, space used by indirect blocks is sizeof(blkptr_t) per
* block, which we observe in practice.
*/
err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
if (err != 0)
return (err); return (err);
size -= size / recordsize * sizeof (blkptr_t); }
/* Add in the space for the record associated with each block. */
size += size / recordsize * sizeof (dmu_replay_record_t);
*sizep = size;
/*
* Simple callback used to traverse the blocks of a snapshot and sum their
* uncompressed size
*/
/* ARGSUSED */
static int
dmu_calculate_send_traversal(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
{
uint64_t *spaceptr = arg;
if (bp != NULL && !BP_IS_HOLE(bp)) {
*spaceptr += BP_GET_UCSIZE(bp);
}
return (0); return (0);
} }
/*
* Given a desination snapshot and a TXG, calculate the approximate size of a
* send stream sent from that TXG. from_txg may be zero, indicating that the
* whole snapshot will be sent.
*/
int
dmu_send_estimate_from_txg(dsl_dataset_t *ds, uint64_t from_txg,
uint64_t *sizep)
{
dsl_pool_t *dp = ds->ds_dir->dd_pool;
int err;
uint64_t size = 0;
ASSERT(dsl_pool_config_held(dp));
/* tosnap must be a snapshot */
if (!dsl_dataset_is_snapshot(ds))
return (SET_ERROR(EINVAL));
/* verify that from_txg is before the provided snapshot was taken */
if (from_txg >= dsl_dataset_phys(ds)->ds_creation_txg) {
return (SET_ERROR(EXDEV));
}
/*
* traverse the blocks of the snapshot with birth times after
* from_txg, summing their uncompressed size
*/
err = traverse_dataset(ds, from_txg, TRAVERSE_POST,
dmu_calculate_send_traversal, &size);
if (err)
return (err);
err = dmu_adjust_send_estimate_for_indirects(ds, size, sizep);
return (err);
}
typedef struct dmu_recv_begin_arg { typedef struct dmu_recv_begin_arg {
const char *drba_origin; const char *drba_origin;
dmu_recv_cookie_t *drba_cookie; dmu_recv_cookie_t *drba_cookie;

View File

@ -5245,7 +5245,8 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
* of bytes that will be written to the fd supplied to zfs_ioc_send_new(). * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
* *
* innvl: { * innvl: {
* (optional) "fromsnap" -> full snap name to send an incremental from * (optional) "from" -> full snap or bookmark name to send an incremental
* from
* } * }
* *
* outnvl: { * outnvl: {
@ -5256,7 +5257,6 @@ static int
zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
{ {
dsl_pool_t *dp; dsl_pool_t *dp;
dsl_dataset_t *fromsnap = NULL;
dsl_dataset_t *tosnap; dsl_dataset_t *tosnap;
int error; int error;
char *fromname; char *fromname;
@ -5272,27 +5272,55 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
return (error); return (error);
} }
error = nvlist_lookup_string(innvl, "fromsnap", &fromname); error = nvlist_lookup_string(innvl, "from", &fromname);
if (error == 0) { if (error == 0) {
if (strchr(fromname, '@') != NULL) {
/*
* If from is a snapshot, hold it and use the more
* efficient dmu_send_estimate to estimate send space
* size using deadlists.
*/
dsl_dataset_t *fromsnap;
error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap); error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
if (error != 0) { if (error != 0)
dsl_dataset_rele(tosnap, FTAG); goto out;
dsl_pool_rele(dp, FTAG); error = dmu_send_estimate(tosnap, fromsnap, &space);
return (error); dsl_dataset_rele(fromsnap, FTAG);
} else if (strchr(fromname, '#') != NULL) {
/*
* If from is a bookmark, fetch the creation TXG of the
* snapshot it was created from and use that to find
* blocks that were born after it.
*/
zfs_bookmark_phys_t frombm;
error = dsl_bookmark_lookup(dp, fromname, tosnap,
&frombm);
if (error != 0)
goto out;
error = dmu_send_estimate_from_txg(tosnap,
frombm.zbm_creation_txg, &space);
} else {
/*
* from is not properly formatted as a snapshot or
* bookmark
*/
error = SET_ERROR(EINVAL);
goto out;
} }
} else {
// If estimating the size of a full send, use dmu_send_estimate
error = dmu_send_estimate(tosnap, NULL, &space);
} }
error = dmu_send_estimate(tosnap, fromsnap, &space);
fnvlist_add_uint64(outnvl, "space", space); fnvlist_add_uint64(outnvl, "space", space);
if (fromsnap != NULL) out:
dsl_dataset_rele(fromsnap, FTAG);
dsl_dataset_rele(tosnap, FTAG); dsl_dataset_rele(tosnap, FTAG);
dsl_pool_rele(dp, FTAG); dsl_pool_rele(dp, FTAG);
return (error); return (error);
} }
static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST]; static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
static void static void