Illumos 5765 - add support for estimating send stream size with lzc_send_space when source is a bookmark
5765 add support for estimating send stream size with lzc_send_space when source is a bookmark Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Steven Hartland <killing@multiplay.co.uk> Reviewed by: Bayard Bell <buffer.g.overflow@gmail.com> Approved by: Albert Lee <trisk@nexenta.com> References: https://www.illumos.org/issues/5765 https://github.com/illumos/illumos-gate/commit/643da460 Porting notes: * Unused variable 'recordsize' in dmu_send_estimate() dropped Ported-by: DHE <git@dehacked.net> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #3397
This commit is contained in:
parent
19b3b1d2a2
commit
5dc8b7365f
|
@ -21,7 +21,7 @@
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
|
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
@ -42,6 +42,8 @@ int dmu_send(const char *tosnap, const char *fromsnap,
|
||||||
int outfd, struct vnode *vp, offset_t *off);
|
int outfd, struct vnode *vp, offset_t *off);
|
||||||
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
|
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
|
||||||
uint64_t *sizep);
|
uint64_t *sizep);
|
||||||
|
int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg,
|
||||||
|
uint64_t *sizep);
|
||||||
int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
|
int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
|
||||||
boolean_t embedok, boolean_t large_block_ok,
|
boolean_t embedok, boolean_t large_block_ok,
|
||||||
int outfd, struct vnode *vp, offset_t *off);
|
int outfd, struct vnode *vp, offset_t *off);
|
||||||
|
|
|
@ -201,6 +201,9 @@ dsl_dataset_phys(dsl_dataset_t *ds)
|
||||||
*/
|
*/
|
||||||
#define MAX_TAG_PREFIX_LEN 17
|
#define MAX_TAG_PREFIX_LEN 17
|
||||||
|
|
||||||
|
#define dsl_dataset_is_snapshot(ds) \
|
||||||
|
(dsl_dataset_phys(ds)->ds_num_children != 0)
|
||||||
|
|
||||||
#define DS_UNIQUE_IS_ACCURATE(ds) \
|
#define DS_UNIQUE_IS_ACCURATE(ds) \
|
||||||
((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
|
((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||||
* Copyright (c) 2013 Steven Hartland. All rights reserved.
|
* Copyright (c) 2013 Steven Hartland. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -485,18 +485,30 @@ lzc_send(const char *snapname, const char *from, int fd,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If fromsnap is NULL, a full (non-incremental) stream will be estimated.
|
* "from" can be NULL, a snapshot, or a bookmark.
|
||||||
|
*
|
||||||
|
* If from is NULL, a full (non-incremental) stream will be estimated. This
|
||||||
|
* is calculated very efficiently.
|
||||||
|
*
|
||||||
|
* If from is a snapshot, lzc_send_space uses the deadlists attached to
|
||||||
|
* each snapshot to efficiently estimate the stream size.
|
||||||
|
*
|
||||||
|
* If from is a bookmark, the indirect blocks in the destination snapshot
|
||||||
|
* are traversed, looking for blocks with a birth time since the creation TXG of
|
||||||
|
* the snapshot this bookmark was created from. This will result in
|
||||||
|
* significantly more I/O and be less efficient than a send space estimation on
|
||||||
|
* an equivalent snapshot.
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
lzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep)
|
lzc_send_space(const char *snapname, const char *from, uint64_t *spacep)
|
||||||
{
|
{
|
||||||
nvlist_t *args;
|
nvlist_t *args;
|
||||||
nvlist_t *result;
|
nvlist_t *result;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
args = fnvlist_alloc();
|
args = fnvlist_alloc();
|
||||||
if (fromsnap != NULL)
|
if (from != NULL)
|
||||||
fnvlist_add_string(args, "fromsnap", fromsnap);
|
fnvlist_add_string(args, "from", from);
|
||||||
err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
|
err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
|
||||||
nvlist_free(args);
|
nvlist_free(args);
|
||||||
if (err == 0)
|
if (err == 0)
|
||||||
|
|
|
@ -836,11 +836,45 @@ dmu_send(const char *tosnap, const char *fromsnap,
|
||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
dmu_adjust_send_estimate_for_indirects(dsl_dataset_t *ds, uint64_t size,
|
||||||
|
uint64_t *sizep)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
/*
|
||||||
|
* Assume that space (both on-disk and in-stream) is dominated by
|
||||||
|
* data. We will adjust for indirect blocks and the copies property,
|
||||||
|
* but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Subtract out approximate space used by indirect blocks.
|
||||||
|
* Assume most space is used by data blocks (non-indirect, non-dnode).
|
||||||
|
* Assume all blocks are recordsize. Assume ditto blocks and
|
||||||
|
* internal fragmentation counter out compression.
|
||||||
|
*
|
||||||
|
* Therefore, space used by indirect blocks is sizeof(blkptr_t) per
|
||||||
|
* block, which we observe in practice.
|
||||||
|
*/
|
||||||
|
uint64_t recordsize;
|
||||||
|
err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
|
||||||
|
if (err != 0)
|
||||||
|
return (err);
|
||||||
|
size -= size / recordsize * sizeof (blkptr_t);
|
||||||
|
|
||||||
|
/* Add in the space for the record associated with each block. */
|
||||||
|
size += size / recordsize * sizeof (dmu_replay_record_t);
|
||||||
|
|
||||||
|
*sizep = size;
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
|
dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
uint64_t size, recordsize;
|
uint64_t size;
|
||||||
ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool);
|
ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool);
|
||||||
|
|
||||||
ASSERT(dsl_pool_config_held(dp));
|
ASSERT(dsl_pool_config_held(dp));
|
||||||
|
@ -867,34 +901,62 @@ dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
|
||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
err = dmu_adjust_send_estimate_for_indirects(ds, size, sizep);
|
||||||
* Assume that space (both on-disk and in-stream) is dominated by
|
|
||||||
* data. We will adjust for indirect blocks and the copies property,
|
|
||||||
* but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Subtract out approximate space used by indirect blocks.
|
|
||||||
* Assume most space is used by data blocks (non-indirect, non-dnode).
|
|
||||||
* Assume all blocks are recordsize. Assume ditto blocks and
|
|
||||||
* internal fragmentation counter out compression.
|
|
||||||
*
|
|
||||||
* Therefore, space used by indirect blocks is sizeof(blkptr_t) per
|
|
||||||
* block, which we observe in practice.
|
|
||||||
*/
|
|
||||||
err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
|
|
||||||
if (err != 0)
|
|
||||||
return (err);
|
return (err);
|
||||||
size -= size / recordsize * sizeof (blkptr_t);
|
}
|
||||||
|
|
||||||
/* Add in the space for the record associated with each block. */
|
|
||||||
size += size / recordsize * sizeof (dmu_replay_record_t);
|
|
||||||
|
|
||||||
*sizep = size;
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Simple callback used to traverse the blocks of a snapshot and sum their
|
||||||
|
* uncompressed size
|
||||||
|
*/
|
||||||
|
/* ARGSUSED */
|
||||||
|
static int
|
||||||
|
dmu_calculate_send_traversal(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||||
|
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
|
||||||
|
{
|
||||||
|
uint64_t *spaceptr = arg;
|
||||||
|
if (bp != NULL && !BP_IS_HOLE(bp)) {
|
||||||
|
*spaceptr += BP_GET_UCSIZE(bp);
|
||||||
|
}
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Given a desination snapshot and a TXG, calculate the approximate size of a
|
||||||
|
* send stream sent from that TXG. from_txg may be zero, indicating that the
|
||||||
|
* whole snapshot will be sent.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
dmu_send_estimate_from_txg(dsl_dataset_t *ds, uint64_t from_txg,
|
||||||
|
uint64_t *sizep)
|
||||||
|
{
|
||||||
|
dsl_pool_t *dp = ds->ds_dir->dd_pool;
|
||||||
|
int err;
|
||||||
|
uint64_t size = 0;
|
||||||
|
|
||||||
|
ASSERT(dsl_pool_config_held(dp));
|
||||||
|
|
||||||
|
/* tosnap must be a snapshot */
|
||||||
|
if (!dsl_dataset_is_snapshot(ds))
|
||||||
|
return (SET_ERROR(EINVAL));
|
||||||
|
|
||||||
|
/* verify that from_txg is before the provided snapshot was taken */
|
||||||
|
if (from_txg >= dsl_dataset_phys(ds)->ds_creation_txg) {
|
||||||
|
return (SET_ERROR(EXDEV));
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* traverse the blocks of the snapshot with birth times after
|
||||||
|
* from_txg, summing their uncompressed size
|
||||||
|
*/
|
||||||
|
err = traverse_dataset(ds, from_txg, TRAVERSE_POST,
|
||||||
|
dmu_calculate_send_traversal, &size);
|
||||||
|
if (err)
|
||||||
|
return (err);
|
||||||
|
|
||||||
|
err = dmu_adjust_send_estimate_for_indirects(ds, size, sizep);
|
||||||
|
return (err);
|
||||||
|
}
|
||||||
|
|
||||||
typedef struct dmu_recv_begin_arg {
|
typedef struct dmu_recv_begin_arg {
|
||||||
const char *drba_origin;
|
const char *drba_origin;
|
||||||
dmu_recv_cookie_t *drba_cookie;
|
dmu_recv_cookie_t *drba_cookie;
|
||||||
|
|
|
@ -5245,7 +5245,8 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||||
* of bytes that will be written to the fd supplied to zfs_ioc_send_new().
|
* of bytes that will be written to the fd supplied to zfs_ioc_send_new().
|
||||||
*
|
*
|
||||||
* innvl: {
|
* innvl: {
|
||||||
* (optional) "fromsnap" -> full snap name to send an incremental from
|
* (optional) "from" -> full snap or bookmark name to send an incremental
|
||||||
|
* from
|
||||||
* }
|
* }
|
||||||
*
|
*
|
||||||
* outnvl: {
|
* outnvl: {
|
||||||
|
@ -5256,7 +5257,6 @@ static int
|
||||||
zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
|
zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||||
{
|
{
|
||||||
dsl_pool_t *dp;
|
dsl_pool_t *dp;
|
||||||
dsl_dataset_t *fromsnap = NULL;
|
|
||||||
dsl_dataset_t *tosnap;
|
dsl_dataset_t *tosnap;
|
||||||
int error;
|
int error;
|
||||||
char *fromname;
|
char *fromname;
|
||||||
|
@ -5272,27 +5272,55 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
error = nvlist_lookup_string(innvl, "fromsnap", &fromname);
|
error = nvlist_lookup_string(innvl, "from", &fromname);
|
||||||
if (error == 0) {
|
if (error == 0) {
|
||||||
|
if (strchr(fromname, '@') != NULL) {
|
||||||
|
/*
|
||||||
|
* If from is a snapshot, hold it and use the more
|
||||||
|
* efficient dmu_send_estimate to estimate send space
|
||||||
|
* size using deadlists.
|
||||||
|
*/
|
||||||
|
dsl_dataset_t *fromsnap;
|
||||||
error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
|
error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
|
||||||
if (error != 0) {
|
if (error != 0)
|
||||||
dsl_dataset_rele(tosnap, FTAG);
|
goto out;
|
||||||
dsl_pool_rele(dp, FTAG);
|
error = dmu_send_estimate(tosnap, fromsnap, &space);
|
||||||
return (error);
|
dsl_dataset_rele(fromsnap, FTAG);
|
||||||
|
} else if (strchr(fromname, '#') != NULL) {
|
||||||
|
/*
|
||||||
|
* If from is a bookmark, fetch the creation TXG of the
|
||||||
|
* snapshot it was created from and use that to find
|
||||||
|
* blocks that were born after it.
|
||||||
|
*/
|
||||||
|
zfs_bookmark_phys_t frombm;
|
||||||
|
|
||||||
|
error = dsl_bookmark_lookup(dp, fromname, tosnap,
|
||||||
|
&frombm);
|
||||||
|
if (error != 0)
|
||||||
|
goto out;
|
||||||
|
error = dmu_send_estimate_from_txg(tosnap,
|
||||||
|
frombm.zbm_creation_txg, &space);
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* from is not properly formatted as a snapshot or
|
||||||
|
* bookmark
|
||||||
|
*/
|
||||||
|
error = SET_ERROR(EINVAL);
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// If estimating the size of a full send, use dmu_send_estimate
|
||||||
|
error = dmu_send_estimate(tosnap, NULL, &space);
|
||||||
}
|
}
|
||||||
|
|
||||||
error = dmu_send_estimate(tosnap, fromsnap, &space);
|
|
||||||
fnvlist_add_uint64(outnvl, "space", space);
|
fnvlist_add_uint64(outnvl, "space", space);
|
||||||
|
|
||||||
if (fromsnap != NULL)
|
out:
|
||||||
dsl_dataset_rele(fromsnap, FTAG);
|
|
||||||
dsl_dataset_rele(tosnap, FTAG);
|
dsl_dataset_rele(tosnap, FTAG);
|
||||||
dsl_pool_rele(dp, FTAG);
|
dsl_pool_rele(dp, FTAG);
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
|
static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
Loading…
Reference in New Issue