Illumos 5765 - add support for estimating send stream size with lzc_send_space when source is a bookmark
5765 add support for estimating send stream size with lzc_send_space when source is a bookmark Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Steven Hartland <killing@multiplay.co.uk> Reviewed by: Bayard Bell <buffer.g.overflow@gmail.com> Approved by: Albert Lee <trisk@nexenta.com> References: https://www.illumos.org/issues/5765 https://github.com/illumos/illumos-gate/commit/643da460 Porting notes: * Unused variable 'recordsize' in dmu_send_estimate() dropped Ported-by: DHE <git@dehacked.net> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #3397
This commit is contained in:
parent
19b3b1d2a2
commit
5dc8b7365f
|
@ -21,7 +21,7 @@
|
|||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
|
||||
*/
|
||||
|
@ -42,6 +42,8 @@ int dmu_send(const char *tosnap, const char *fromsnap,
|
|||
int outfd, struct vnode *vp, offset_t *off);
|
||||
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
|
||||
uint64_t *sizep);
|
||||
int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg,
|
||||
uint64_t *sizep);
|
||||
int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
|
||||
boolean_t embedok, boolean_t large_block_ok,
|
||||
int outfd, struct vnode *vp, offset_t *off);
|
||||
|
|
|
@ -201,6 +201,9 @@ dsl_dataset_phys(dsl_dataset_t *ds)
|
|||
*/
|
||||
#define MAX_TAG_PREFIX_LEN 17
|
||||
|
||||
#define dsl_dataset_is_snapshot(ds) \
|
||||
(dsl_dataset_phys(ds)->ds_num_children != 0)
|
||||
|
||||
#define DS_UNIQUE_IS_ACCURATE(ds) \
|
||||
((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 Steven Hartland. All rights reserved.
|
||||
*/
|
||||
|
||||
|
@ -485,18 +485,30 @@ lzc_send(const char *snapname, const char *from, int fd,
|
|||
}
|
||||
|
||||
/*
|
||||
* If fromsnap is NULL, a full (non-incremental) stream will be estimated.
|
||||
* "from" can be NULL, a snapshot, or a bookmark.
|
||||
*
|
||||
* If from is NULL, a full (non-incremental) stream will be estimated. This
|
||||
* is calculated very efficiently.
|
||||
*
|
||||
* If from is a snapshot, lzc_send_space uses the deadlists attached to
|
||||
* each snapshot to efficiently estimate the stream size.
|
||||
*
|
||||
* If from is a bookmark, the indirect blocks in the destination snapshot
|
||||
* are traversed, looking for blocks with a birth time since the creation TXG of
|
||||
* the snapshot this bookmark was created from. This will result in
|
||||
* significantly more I/O and be less efficient than a send space estimation on
|
||||
* an equivalent snapshot.
|
||||
*/
|
||||
int
|
||||
lzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep)
|
||||
lzc_send_space(const char *snapname, const char *from, uint64_t *spacep)
|
||||
{
|
||||
nvlist_t *args;
|
||||
nvlist_t *result;
|
||||
int err;
|
||||
|
||||
args = fnvlist_alloc();
|
||||
if (fromsnap != NULL)
|
||||
fnvlist_add_string(args, "fromsnap", fromsnap);
|
||||
if (from != NULL)
|
||||
fnvlist_add_string(args, "from", from);
|
||||
err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
|
||||
nvlist_free(args);
|
||||
if (err == 0)
|
||||
|
|
|
@ -836,11 +836,45 @@ dmu_send(const char *tosnap, const char *fromsnap,
|
|||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
dmu_adjust_send_estimate_for_indirects(dsl_dataset_t *ds, uint64_t size,
|
||||
uint64_t *sizep)
|
||||
{
|
||||
int err;
|
||||
/*
|
||||
* Assume that space (both on-disk and in-stream) is dominated by
|
||||
* data. We will adjust for indirect blocks and the copies property,
|
||||
* but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
|
||||
*/
|
||||
|
||||
/*
|
||||
* Subtract out approximate space used by indirect blocks.
|
||||
* Assume most space is used by data blocks (non-indirect, non-dnode).
|
||||
* Assume all blocks are recordsize. Assume ditto blocks and
|
||||
* internal fragmentation counter out compression.
|
||||
*
|
||||
* Therefore, space used by indirect blocks is sizeof(blkptr_t) per
|
||||
* block, which we observe in practice.
|
||||
*/
|
||||
uint64_t recordsize;
|
||||
err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
size -= size / recordsize * sizeof (blkptr_t);
|
||||
|
||||
/* Add in the space for the record associated with each block. */
|
||||
size += size / recordsize * sizeof (dmu_replay_record_t);
|
||||
|
||||
*sizep = size;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
|
||||
{
|
||||
int err;
|
||||
uint64_t size, recordsize;
|
||||
uint64_t size;
|
||||
ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool);
|
||||
|
||||
ASSERT(dsl_pool_config_held(dp));
|
||||
|
@ -867,34 +901,62 @@ dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
|
|||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Assume that space (both on-disk and in-stream) is dominated by
|
||||
* data. We will adjust for indirect blocks and the copies property,
|
||||
* but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
|
||||
*/
|
||||
|
||||
/*
|
||||
* Subtract out approximate space used by indirect blocks.
|
||||
* Assume most space is used by data blocks (non-indirect, non-dnode).
|
||||
* Assume all blocks are recordsize. Assume ditto blocks and
|
||||
* internal fragmentation counter out compression.
|
||||
*
|
||||
* Therefore, space used by indirect blocks is sizeof(blkptr_t) per
|
||||
* block, which we observe in practice.
|
||||
*/
|
||||
err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
size -= size / recordsize * sizeof (blkptr_t);
|
||||
|
||||
/* Add in the space for the record associated with each block. */
|
||||
size += size / recordsize * sizeof (dmu_replay_record_t);
|
||||
|
||||
*sizep = size;
|
||||
err = dmu_adjust_send_estimate_for_indirects(ds, size, sizep);
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Simple callback used to traverse the blocks of a snapshot and sum their
|
||||
* uncompressed size
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
dmu_calculate_send_traversal(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
|
||||
{
|
||||
uint64_t *spaceptr = arg;
|
||||
if (bp != NULL && !BP_IS_HOLE(bp)) {
|
||||
*spaceptr += BP_GET_UCSIZE(bp);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a desination snapshot and a TXG, calculate the approximate size of a
|
||||
* send stream sent from that TXG. from_txg may be zero, indicating that the
|
||||
* whole snapshot will be sent.
|
||||
*/
|
||||
int
|
||||
dmu_send_estimate_from_txg(dsl_dataset_t *ds, uint64_t from_txg,
|
||||
uint64_t *sizep)
|
||||
{
|
||||
dsl_pool_t *dp = ds->ds_dir->dd_pool;
|
||||
int err;
|
||||
uint64_t size = 0;
|
||||
|
||||
ASSERT(dsl_pool_config_held(dp));
|
||||
|
||||
/* tosnap must be a snapshot */
|
||||
if (!dsl_dataset_is_snapshot(ds))
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
/* verify that from_txg is before the provided snapshot was taken */
|
||||
if (from_txg >= dsl_dataset_phys(ds)->ds_creation_txg) {
|
||||
return (SET_ERROR(EXDEV));
|
||||
}
|
||||
/*
|
||||
* traverse the blocks of the snapshot with birth times after
|
||||
* from_txg, summing their uncompressed size
|
||||
*/
|
||||
err = traverse_dataset(ds, from_txg, TRAVERSE_POST,
|
||||
dmu_calculate_send_traversal, &size);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
err = dmu_adjust_send_estimate_for_indirects(ds, size, sizep);
|
||||
return (err);
|
||||
}
|
||||
|
||||
typedef struct dmu_recv_begin_arg {
|
||||
const char *drba_origin;
|
||||
dmu_recv_cookie_t *drba_cookie;
|
||||
|
|
|
@ -5245,7 +5245,8 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
|
|||
* of bytes that will be written to the fd supplied to zfs_ioc_send_new().
|
||||
*
|
||||
* innvl: {
|
||||
* (optional) "fromsnap" -> full snap name to send an incremental from
|
||||
* (optional) "from" -> full snap or bookmark name to send an incremental
|
||||
* from
|
||||
* }
|
||||
*
|
||||
* outnvl: {
|
||||
|
@ -5256,7 +5257,6 @@ static int
|
|||
zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
{
|
||||
dsl_pool_t *dp;
|
||||
dsl_dataset_t *fromsnap = NULL;
|
||||
dsl_dataset_t *tosnap;
|
||||
int error;
|
||||
char *fromname;
|
||||
|
@ -5272,27 +5272,55 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
|
|||
return (error);
|
||||
}
|
||||
|
||||
error = nvlist_lookup_string(innvl, "fromsnap", &fromname);
|
||||
error = nvlist_lookup_string(innvl, "from", &fromname);
|
||||
if (error == 0) {
|
||||
error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
|
||||
if (error != 0) {
|
||||
dsl_dataset_rele(tosnap, FTAG);
|
||||
dsl_pool_rele(dp, FTAG);
|
||||
return (error);
|
||||
if (strchr(fromname, '@') != NULL) {
|
||||
/*
|
||||
* If from is a snapshot, hold it and use the more
|
||||
* efficient dmu_send_estimate to estimate send space
|
||||
* size using deadlists.
|
||||
*/
|
||||
dsl_dataset_t *fromsnap;
|
||||
error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
error = dmu_send_estimate(tosnap, fromsnap, &space);
|
||||
dsl_dataset_rele(fromsnap, FTAG);
|
||||
} else if (strchr(fromname, '#') != NULL) {
|
||||
/*
|
||||
* If from is a bookmark, fetch the creation TXG of the
|
||||
* snapshot it was created from and use that to find
|
||||
* blocks that were born after it.
|
||||
*/
|
||||
zfs_bookmark_phys_t frombm;
|
||||
|
||||
error = dsl_bookmark_lookup(dp, fromname, tosnap,
|
||||
&frombm);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
error = dmu_send_estimate_from_txg(tosnap,
|
||||
frombm.zbm_creation_txg, &space);
|
||||
} else {
|
||||
/*
|
||||
* from is not properly formatted as a snapshot or
|
||||
* bookmark
|
||||
*/
|
||||
error = SET_ERROR(EINVAL);
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
// If estimating the size of a full send, use dmu_send_estimate
|
||||
error = dmu_send_estimate(tosnap, NULL, &space);
|
||||
}
|
||||
|
||||
error = dmu_send_estimate(tosnap, fromsnap, &space);
|
||||
fnvlist_add_uint64(outnvl, "space", space);
|
||||
|
||||
if (fromsnap != NULL)
|
||||
dsl_dataset_rele(fromsnap, FTAG);
|
||||
out:
|
||||
dsl_dataset_rele(tosnap, FTAG);
|
||||
dsl_pool_rele(dp, FTAG);
|
||||
return (error);
|
||||
}
|
||||
|
||||
|
||||
static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
|
||||
|
||||
static void
|
||||
|
|
Loading…
Reference in New Issue