3740 Poor ZFS send / receive performance due to snapshot
     hold / release processing
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Approved by: Christopher Siden <christopher.siden@delphix.com>

References:
  https://www.illumos.org/issues/3740
  illumos/illumos-gate@a7a845e4bf

Ported-by: Richard Yao <ryao@gentoo.org>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #1775

Porting notes:

1. 13fe019870 introduced a merge conflict
   in dsl_dataset_user_release_tmp where some variables were moved
   outside of the preprocessor directive.

2. dea9dfefdd747534b3846845629d2200f0616dad made the previous merge
   conflict worse by switching KM_SLEEP to KM_PUSHPAGE. This is notable
   because this commit refactors the code, adding a new KM_SLEEP
   allocation. It is not clear to me whether this should be converted
   to KM_PUSHPAGE.

3. We had a merge conflict in libzfs_sendrecv.c because of copyright
   notices.

4. Several small C99 compatibility fixed were made.
This commit is contained in:
Steven Hartland 2013-05-25 02:06:23 +00:00 committed by Brian Behlendorf
parent 7bc7f25040
commit 95fd54a1c5
13 changed files with 672 additions and 505 deletions

View File

@ -24,6 +24,7 @@
* Copyright 2012 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
#include <assert.h>
@ -5193,8 +5194,7 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
continue;
}
if (holding) {
if (zfs_hold(zhp, delim+1, tag, recursive,
B_FALSE, -1) != 0)
if (zfs_hold(zhp, delim+1, tag, recursive, -1) != 0)
++errors;
} else {
if (zfs_release(zhp, delim+1, tag, recursive) != 0)

View File

@ -21,6 +21,7 @@
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
/*
@ -152,7 +153,7 @@ import_pool(const char *target, boolean_t readonly)
g_importargs.poolname = g_pool;
pools = zpool_search_import(g_zfs, &g_importargs);
if (pools == NULL || nvlist_next_nvpair(pools, NULL) == NULL) {
if (nvlist_empty(pools)) {
if (!g_importargs.can_be_active) {
g_importargs.can_be_active = B_TRUE;
if (zpool_search_import(g_zfs, &g_importargs) != NULL ||

View File

@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
/*
@ -4830,7 +4831,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
error = user_release_one(fullname, tag);
if (error)
fatal(0, "user_release_one(%s)", fullname, tag);
fatal(0, "user_release_one(%s, %s) = %d", fullname, tag, error);
VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT);

View File

@ -24,6 +24,7 @@
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
#ifndef _LIBZFS_H
@ -614,7 +615,8 @@ extern int zfs_send(zfs_handle_t *, const char *, const char *,
extern int zfs_promote(zfs_handle_t *);
extern int zfs_hold(zfs_handle_t *, const char *, const char *,
boolean_t, boolean_t, int);
boolean_t, int);
extern int zfs_hold_nvl(zfs_handle_t *, int, nvlist_t *);
extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
extern int zfs_get_holds(zfs_handle_t *, nvlist_t **);
extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *);

View File

@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
#ifndef _SYS_DSL_DATASET_H
@ -187,8 +188,6 @@ int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
void dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
minor_t minor);
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,

View File

@ -23,6 +23,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
#ifndef _SYS_DSL_USERHOLD_H
@ -43,8 +44,7 @@ int dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor,
nvlist_t *errlist);
int dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist);
int dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl);
void dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj,
const char *htag);
void dsl_dataset_user_release_tmp(struct dsl_pool *dp, nvlist_t *holds);
int dsl_dataset_user_hold_check_one(struct dsl_dataset *ds, const char *htag,
boolean_t temphold, struct dmu_tx *tx);
void dsl_dataset_user_hold_sync_one(struct dsl_dataset *ds, const char *htag,

View File

@ -26,6 +26,7 @@
* Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
* Copyright 2012 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 Martin Matuska. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
#include <ctype.h>
@ -3153,18 +3154,14 @@ static int
zfs_check_snap_cb(zfs_handle_t *zhp, void *arg)
{
struct destroydata *dd = arg;
zfs_handle_t *szhp;
char name[ZFS_MAXNAMELEN];
int rv = 0;
(void) snprintf(name, sizeof (name),
"%s@%s", zhp->zfs_name, dd->snapname);
szhp = make_dataset_handle(zhp->zfs_hdl, name);
if (szhp) {
if (lzc_exists(name))
verify(nvlist_add_boolean(dd->nvl, name) == 0);
zfs_close(szhp);
}
if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
(void) zvol_remove_link(zhp->zfs_hdl, name);
@ -3193,7 +3190,7 @@ zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer)
verify(nvlist_alloc(&dd.nvl, NV_UNIQUE_NAME, 0) == 0);
(void) zfs_check_snap_cb(zfs_handle_dup(zhp), &dd);
if (nvlist_next_nvpair(dd.nvl, NULL) == NULL) {
if (nvlist_empty(dd.nvl)) {
ret = zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT,
dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"),
zhp->zfs_name, snapname);
@ -3219,7 +3216,7 @@ zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer)
if (ret == 0)
return (0);
if (nvlist_next_nvpair(errlist, NULL) == NULL) {
if (nvlist_empty(errlist)) {
char errbuf[1024];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot destroy snapshots"));
@ -4421,18 +4418,14 @@ static int
zfs_hold_one(zfs_handle_t *zhp, void *arg)
{
struct holdarg *ha = arg;
zfs_handle_t *szhp;
char name[ZFS_MAXNAMELEN];
int rv = 0;
(void) snprintf(name, sizeof (name),
"%s@%s", zhp->zfs_name, ha->snapname);
szhp = make_dataset_handle(zhp->zfs_hdl, name);
if (szhp) {
if (lzc_exists(name))
fnvlist_add_string(ha->nvl, name, ha->tag);
zfs_close(szhp);
}
if (ha->recursive)
rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha);
@ -4442,14 +4435,10 @@ zfs_hold_one(zfs_handle_t *zhp, void *arg)
int
zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
boolean_t recursive, boolean_t enoent_ok, int cleanup_fd)
boolean_t recursive, int cleanup_fd)
{
int ret;
struct holdarg ha;
nvlist_t *errors;
libzfs_handle_t *hdl = zhp->zfs_hdl;
char errbuf[1024];
nvpair_t *elem;
ha.nvl = fnvlist_alloc();
ha.snapname = snapname;
@ -4457,26 +4446,44 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
ha.recursive = recursive;
(void) zfs_hold_one(zfs_handle_dup(zhp), &ha);
if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) {
if (nvlist_empty(ha.nvl)) {
char errbuf[1024];
fnvlist_free(ha.nvl);
ret = ENOENT;
if (!enoent_ok) {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN,
"cannot hold snapshot '%s@%s'"),
zhp->zfs_name, snapname);
(void) zfs_standard_error(hdl, ret, errbuf);
}
(void) zfs_standard_error(zhp->zfs_hdl, ret, errbuf);
return (ret);
}
ret = lzc_hold(ha.nvl, cleanup_fd, &errors);
ret = zfs_hold_nvl(zhp, cleanup_fd, ha.nvl);
fnvlist_free(ha.nvl);
if (ret == 0)
return (0);
return (ret);
}
if (nvlist_next_nvpair(errors, NULL) == NULL) {
int
zfs_hold_nvl(zfs_handle_t *zhp, int cleanup_fd, nvlist_t *holds)
{
int ret;
nvlist_t *errors;
libzfs_handle_t *hdl = zhp->zfs_hdl;
char errbuf[1024];
nvpair_t *elem;
errors = NULL;
ret = lzc_hold(holds, cleanup_fd, &errors);
if (ret == 0) {
/* There may be errors even in the success case. */
fnvlist_free(errors);
return (0);
}
if (nvlist_empty(errors)) {
/* no hold-specific errors */
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot hold"));
@ -4516,10 +4523,6 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
case EEXIST:
(void) zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf);
break;
case ENOENT:
if (enoent_ok)
return (ENOENT);
/* FALLTHROUGH */
default:
(void) zfs_standard_error(hdl,
fnvpair_value_int32(elem), errbuf);
@ -4530,30 +4533,21 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
return (ret);
}
struct releasearg {
nvlist_t *nvl;
const char *snapname;
const char *tag;
boolean_t recursive;
};
static int
zfs_release_one(zfs_handle_t *zhp, void *arg)
{
struct holdarg *ha = arg;
zfs_handle_t *szhp;
char name[ZFS_MAXNAMELEN];
int rv = 0;
(void) snprintf(name, sizeof (name),
"%s@%s", zhp->zfs_name, ha->snapname);
szhp = make_dataset_handle(zhp->zfs_hdl, name);
if (szhp) {
if (lzc_exists(name)) {
nvlist_t *holds = fnvlist_alloc();
fnvlist_add_boolean(holds, ha->tag);
fnvlist_add_nvlist(ha->nvl, name, holds);
zfs_close(szhp);
fnvlist_free(holds);
}
if (ha->recursive)
@ -4568,7 +4562,7 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
{
int ret;
struct holdarg ha;
nvlist_t *errors;
nvlist_t *errors = NULL;
nvpair_t *elem;
libzfs_handle_t *hdl = zhp->zfs_hdl;
char errbuf[1024];
@ -4579,7 +4573,7 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
ha.recursive = recursive;
(void) zfs_release_one(zfs_handle_dup(zhp), &ha);
if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) {
if (nvlist_empty(ha.nvl)) {
fnvlist_free(ha.nvl);
ret = ENOENT;
(void) snprintf(errbuf, sizeof (errbuf),
@ -4593,10 +4587,13 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
ret = lzc_release(ha.nvl, &errors);
fnvlist_free(ha.nvl);
if (ret == 0)
if (ret == 0) {
/* There may be errors even in the success case. */
fnvlist_free(errors);
return (0);
}
if (nvlist_next_nvpair(errors, NULL) == NULL) {
if (nvlist_empty(errors)) {
/* no hold-specific errors */
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot release"));

View File

@ -22,9 +22,10 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
* All rights reserved
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
#include <assert.h>
@ -799,6 +800,7 @@ typedef struct send_dump_data {
int outfd;
boolean_t err;
nvlist_t *fss;
nvlist_t *snapholds;
avl_tree_t *fsavl;
snapfilter_cb_t *filter_cb;
void *filter_cb_arg;
@ -948,41 +950,19 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
return (0);
}
static int
hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
static void
gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
{
zfs_handle_t *pzhp;
int error = 0;
char *thissnap;
assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
if (sdd->dryrun)
return (0);
/*
* zfs_send() only opens a cleanup_fd for sends that need it,
* zfs_send() only sets snapholds for sends that need them,
* e.g. replication and doall.
*/
if (sdd->cleanup_fd == -1)
return (0);
if (sdd->snapholds == NULL)
return;
thissnap = strchr(zhp->zfs_name, '@') + 1;
*(thissnap - 1) = '\0';
pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
*(thissnap - 1) = '@';
/*
* It's OK if the parent no longer exists. The send code will
* handle that error.
*/
if (pzhp) {
error = zfs_hold(pzhp, thissnap, sdd->holdtag,
B_FALSE, B_TRUE, sdd->cleanup_fd);
zfs_close(pzhp);
}
return (error);
fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
}
static void *
@ -1038,28 +1018,23 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
send_dump_data_t *sdd = arg;
progress_arg_t pa = { 0 };
pthread_t tid;
char *thissnap;
int err;
boolean_t isfromsnap, istosnap, fromorigin;
boolean_t exclude = B_FALSE;
err = 0;
thissnap = strchr(zhp->zfs_name, '@') + 1;
isfromsnap = (sdd->fromsnap != NULL &&
strcmp(sdd->fromsnap, thissnap) == 0);
if (!sdd->seenfrom && isfromsnap) {
err = hold_for_send(zhp, sdd);
if (err == 0) {
gather_holds(zhp, sdd);
sdd->seenfrom = B_TRUE;
(void) strcpy(sdd->prevsnap, thissnap);
sdd->prevsnap_obj = zfs_prop_get_int(zhp,
ZFS_PROP_OBJSETID);
} else if (err == ENOENT) {
err = 0;
}
sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
zfs_close(zhp);
return (err);
return (0);
}
if (sdd->seento || !sdd->seenfrom) {
@ -1110,14 +1085,7 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
return (0);
}
err = hold_for_send(zhp, sdd);
if (err) {
if (err == ENOENT)
err = 0;
zfs_close(zhp);
return (err);
}
gather_holds(zhp, sdd);
fromorigin = sdd->prevsnap[0] == '\0' &&
(sdd->fromorigin || sdd->replicate);
@ -1385,7 +1353,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
avl_tree_t *fsavl = NULL;
static uint64_t holdseq;
int spa_version;
pthread_t tid;
pthread_t tid = 0;
int pipefd[2];
dedup_arg_t dda = { 0 };
int featureflags = 0;
@ -1458,12 +1426,9 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
*debugnvp = hdrnv;
else
nvlist_free(hdrnv);
if (err) {
fsavl_destroy(fsavl);
nvlist_free(fss);
if (err)
goto stderr_out;
}
}
if (!flags->dryrun) {
/* write first begin record */
@ -1486,8 +1451,6 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
}
free(packbuf);
if (err == -1) {
fsavl_destroy(fsavl);
nvlist_free(fss);
err = errno;
goto stderr_out;
}
@ -1498,8 +1461,6 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
drr.drr_u.drr_end.drr_checksum = zc;
err = write(outfd, &drr, sizeof (drr));
if (err == -1) {
fsavl_destroy(fsavl);
nvlist_free(fss);
err = errno;
goto stderr_out;
}
@ -1511,7 +1472,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
/* dump each stream */
sdd.fromsnap = fromsnap;
sdd.tosnap = tosnap;
if (flags->dedup)
if (tid != 0)
sdd.outfd = pipefd[0];
else
sdd.outfd = outfd;
@ -1548,19 +1509,24 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
err = errno;
goto stderr_out;
}
sdd.snapholds = fnvlist_alloc();
} else {
sdd.cleanup_fd = -1;
sdd.snapholds = NULL;
}
if (flags->verbose) {
if (flags->verbose || sdd.snapholds != NULL) {
/*
* Do a verbose no-op dry run to get all the verbose output
* before generating any data. Then do a non-verbose real
* run to generate the streams.
* or to gather snapshot hold's before generating any data,
* then do a non-verbose real run to generate the streams.
*/
sdd.dryrun = B_TRUE;
err = dump_filesystems(zhp, &sdd);
sdd.dryrun = flags->dryrun;
sdd.verbose = B_FALSE;
if (err != 0)
goto stderr_out;
if (flags->verbose) {
if (flags->parsable) {
(void) fprintf(stderr, "size\t%llu\n",
(longlong_t)sdd.size);
@ -1571,13 +1537,43 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
"total estimated size is %s\n"), buf);
}
}
/* Ensure no snaps found is treated as an error. */
if (!sdd.seento) {
err = ENOENT;
goto err_out;
}
/* Skip the second run if dryrun was requested. */
if (flags->dryrun)
goto err_out;
if (sdd.snapholds != NULL) {
err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
if (err != 0)
goto stderr_out;
fnvlist_free(sdd.snapholds);
sdd.snapholds = NULL;
}
sdd.dryrun = B_FALSE;
sdd.verbose = B_FALSE;
}
err = dump_filesystems(zhp, &sdd);
fsavl_destroy(fsavl);
nvlist_free(fss);
if (flags->dedup) {
(void) close(pipefd[0]);
/* Ensure no snaps found is treated as an error. */
if (err == 0 && !sdd.seento)
err = ENOENT;
if (tid != 0) {
if (err != 0)
(void) pthread_cancel(tid);
(void) pthread_join(tid, NULL);
(void) close(pipefd[0]);
}
if (sdd.cleanup_fd != -1) {
@ -1605,9 +1601,13 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
stderr_out:
err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
err_out:
fsavl_destroy(fsavl);
nvlist_free(fss);
fnvlist_free(sdd.snapholds);
if (sdd.cleanup_fd != -1)
VERIFY(0 == close(sdd.cleanup_fd));
if (flags->dedup) {
if (tid != 0) {
(void) pthread_cancel(tid);
(void) pthread_join(tid, NULL);
(void) close(pipefd[0]);

View File

@ -21,6 +21,7 @@
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
/*
@ -254,8 +255,11 @@ lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
* marked for deferred destruction, and will be destroyed when the last hold
* or clone is removed/destroyed.
*
* The return value will be ENOENT if none of the snapshots existed.
*
* The return value will be 0 if all snapshots were destroyed (or marked for
* later destruction if 'defer' is set) or didn't exist to begin with.
* later destruction if 'defer' is set) or didn't exist to begin with and
* at least one snapshot was destroyed.
*
* Otherwise the return value will be the errno of a (unspecified) snapshot
* that failed, no snapshots will be destroyed, and the errlist will have an
@ -286,7 +290,6 @@ lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
nvlist_free(args);
return (error);
}
int
@ -346,11 +349,22 @@ lzc_exists(const char *dataset)
* uncleanly, the holds will be released when the pool is next opened
* or imported.
*
* The return value will be 0 if all holds were created. Otherwise the return
* value will be the errno of a (unspecified) hold that failed, no holds will
* be created, and the errlist will have an entry for each hold that
* failed (name = snapshot). The value in the errlist will be the error
* code (int32).
* Holds for snapshots which don't exist will be skipped and have an entry
* added to errlist, but will not cause an overall failure, except in the
* case that all holds where skipped.
*
* The return value will be ENOENT if none of the snapshots for the requested
* holds existed.
*
* The return value will be 0 if the nvl holds was empty or all holds, for
* snapshots that existed, were succesfully created and at least one hold
* was created.
*
* Otherwise the return value will be the errno of a (unspecified) hold that
* failed and no holds will be created.
*
* In all cases the errlist will have an entry for each hold that failed
* (name = snapshot), with its value being the error code (int32).
*/
int
lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
@ -387,11 +401,20 @@ lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
* The snapshots must all be in the same pool.
* The value is a nvlist whose keys are the holds to remove.
*
* The return value will be 0 if all holds were removed.
* Otherwise the return value will be the errno of a (unspecified) release
* that failed, no holds will be released, and the errlist will have an
* entry for each snapshot that has failed releases (name = snapshot).
* The value in the errlist will be the error code (int32) of a failed release.
* Holds which failed to release because they didn't exist will have an entry
* added to errlist, but will not cause an overall failure, except in the
* case that all releases where skipped.
*
* The return value will be ENOENT if none of the specified holds existed.
*
* The return value will be 0 if the nvl holds was empty or all holds that
* existed, were successfully removed and at least one hold was removed.
*
* Otherwise the return value will be the errno of a (unspecified) hold that
* failed to release and no holds will be released.
*
* In all cases the errlist will have an entry for each hold that failed to
* to release.
*/
int
lzc_release(nvlist_t *holds, nvlist_t **errlist)

View File

@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -127,6 +128,10 @@ dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx)
pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL);
if (pair != NULL)
return (fnvpair_value_int32(pair));
if (nvlist_empty(dsda->dsda_successful_snaps))
return (SET_ERROR(ENOENT));
return (0);
}

View File

@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
#include <sys/dsl_pool.h>
@ -840,23 +841,34 @@ dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp)
zap_cursor_t zc;
objset_t *mos = dp->dp_meta_objset;
uint64_t zapobj = dp->dp_tmp_userrefs_obj;
nvlist_t *holds;
if (zapobj == 0)
return;
ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
holds = fnvlist_alloc();
for (zap_cursor_init(&zc, mos, zapobj);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
char *htag;
uint64_t dsobj;
nvlist_t *tags;
htag = strchr(za.za_name, '-');
*htag = '\0';
++htag;
dsobj = strtonum(za.za_name, NULL);
dsl_dataset_user_release_tmp(dp, dsobj, htag);
if (nvlist_lookup_nvlist(holds, za.za_name, &tags) != 0) {
tags = fnvlist_alloc();
fnvlist_add_boolean(tags, htag);
fnvlist_add_nvlist(holds, za.za_name, tags);
fnvlist_free(tags);
} else {
fnvlist_add_boolean(tags, htag);
}
}
dsl_dataset_user_release_tmp(dp, holds);
fnvlist_free(holds);
zap_cursor_fini(&zc);
}

View File

@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -37,6 +38,7 @@
typedef struct dsl_dataset_user_hold_arg {
nvlist_t *dduha_holds;
nvlist_t *dduha_chkholds;
nvlist_t *dduha_errlist;
minor_t dduha_minor;
} dsl_dataset_user_hold_arg_t;
@ -53,17 +55,18 @@ dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
objset_t *mos = dp->dp_meta_objset;
int error = 0;
ASSERT(dsl_pool_config_held(dp));
if (strlen(htag) > MAXNAMELEN)
return (E2BIG);
return (SET_ERROR(E2BIG));
/* Tempholds have a more restricted length */
if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
return (E2BIG);
return (SET_ERROR(E2BIG));
/* tags must be unique (if ds already exists) */
if (ds != NULL) {
mutex_enter(&ds->ds_lock);
if (ds->ds_phys->ds_userrefs_obj != 0) {
if (ds != NULL && ds->ds_phys->ds_userrefs_obj != 0) {
uint64_t value;
error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
htag, 8, 1, &value);
if (error == 0)
@ -71,8 +74,6 @@ dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
else if (error == ENOENT)
error = 0;
}
mutex_exit(&ds->ds_lock);
}
return (error);
}
@ -83,51 +84,67 @@ dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
dsl_dataset_user_hold_arg_t *dduha = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
int rv = 0;
if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
return (SET_ERROR(ENOTSUP));
for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
int error = 0;
if (!dmu_tx_is_syncing(tx))
return (0);
for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL);
pair != NULL; pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
dsl_dataset_t *ds;
char *htag;
int error = 0;
char *htag, *name;
/* must be a snapshot */
if (strchr(nvpair_name(pair), '@') == NULL)
name = nvpair_name(pair);
if (strchr(name, '@') == NULL)
error = SET_ERROR(EINVAL);
if (error == 0)
error = nvpair_value_string(pair, &htag);
if (error == 0) {
error = dsl_dataset_hold(dp,
nvpair_name(pair), FTAG, &ds);
}
if (error == 0)
error = dsl_dataset_hold(dp, name, FTAG, &ds);
if (error == 0) {
error = dsl_dataset_user_hold_check_one(ds, htag,
dduha->dduha_minor != 0, tx);
dsl_dataset_rele(ds, FTAG);
}
if (error != 0) {
rv = error;
fnvlist_add_int32(dduha->dduha_errlist,
nvpair_name(pair), error);
if (error == 0) {
fnvlist_add_string(dduha->dduha_chkholds, name, htag);
} else {
/*
* We register ENOENT errors so they can be correctly
* reported if needed, such as when all holds fail.
*/
fnvlist_add_int32(dduha->dduha_errlist, name, error);
if (error != ENOENT)
return (error);
}
}
return (rv);
}
void
dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
minor_t minor, uint64_t now, dmu_tx_t *tx)
/* Return ENOENT if no holds would be created. */
if (nvlist_empty(dduha->dduha_chkholds))
return (SET_ERROR(ENOENT));
return (0);
}
static void
dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds,
const char *htag, minor_t minor, uint64_t now, dmu_tx_t *tx)
{
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
uint64_t zapobj;
mutex_enter(&ds->ds_lock);
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
if (ds->ds_phys->ds_userrefs_obj == 0) {
/*
* This is the first user hold for this dataset. Create
@ -140,14 +157,26 @@ dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
zapobj = ds->ds_phys->ds_userrefs_obj;
}
ds->ds_userrefs++;
mutex_exit(&ds->ds_lock);
VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
if (minor != 0) {
char name[MAXNAMELEN];
nvlist_t *tags;
VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
htag, now, tx));
dsl_register_onexit_hold_cleanup(ds, htag, minor);
(void) snprintf(name, sizeof (name), "%llx",
(u_longlong_t)ds->ds_object);
if (nvlist_lookup_nvlist(tmpholds, name, &tags) != 0) {
tags = fnvlist_alloc();
fnvlist_add_boolean(tags, htag);
fnvlist_add_nvlist(tmpholds, name, tags);
fnvlist_free(tags);
} else {
fnvlist_add_boolean(tags, htag);
}
}
spa_history_log_internal_ds(ds, "hold", tx,
@ -155,306 +184,10 @@ dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
htag, minor != 0, ds->ds_userrefs);
}
static void
dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_hold_arg_t *dduha = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
uint64_t now = gethrestime_sec();
for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
dduha->dduha_minor, now, tx);
dsl_dataset_rele(ds, FTAG);
}
}
/*
* holds is nvl of snapname -> holdname
* errlist will be filled in with snapname -> error
* if cleanup_minor is not 0, the holds will be temporary, cleaned up
* when the process exits.
*
* if any fails, all will fail.
*/
int
dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
{
dsl_dataset_user_hold_arg_t dduha;
nvpair_t *pair;
pair = nvlist_next_nvpair(holds, NULL);
if (pair == NULL)
return (0);
dduha.dduha_holds = holds;
dduha.dduha_errlist = errlist;
dduha.dduha_minor = cleanup_minor;
return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)));
}
typedef struct dsl_dataset_user_release_arg {
nvlist_t *ddura_holds;
nvlist_t *ddura_todelete;
nvlist_t *ddura_errlist;
} dsl_dataset_user_release_arg_t;
static int
dsl_dataset_user_release_check_one(dsl_dataset_t *ds,
nvlist_t *holds, boolean_t *todelete)
{
uint64_t zapobj;
nvpair_t *pair;
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
int error;
int numholds = 0;
*todelete = B_FALSE;
if (!dsl_dataset_is_snapshot(ds))
return (SET_ERROR(EINVAL));
zapobj = ds->ds_phys->ds_userrefs_obj;
if (zapobj == 0)
return (SET_ERROR(ESRCH));
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(holds, pair)) {
/* Make sure the hold exists */
uint64_t tmp;
error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp);
if (error == ENOENT)
error = SET_ERROR(ESRCH);
if (error != 0)
return (error);
numholds++;
}
if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
ds->ds_userrefs == numholds) {
/* we need to destroy the snapshot as well */
if (dsl_dataset_long_held(ds))
return (SET_ERROR(EBUSY));
*todelete = B_TRUE;
}
return (0);
}
static int
dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_release_arg_t *ddura = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
int rv = 0;
if (!dmu_tx_is_syncing(tx))
return (0);
for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
const char *name = nvpair_name(pair);
int error;
dsl_dataset_t *ds;
nvlist_t *holds;
error = nvpair_value_nvlist(pair, &holds);
if (error != 0)
return (SET_ERROR(EINVAL));
error = dsl_dataset_hold(dp, name, FTAG, &ds);
if (error == 0) {
boolean_t deleteme;
error = dsl_dataset_user_release_check_one(ds,
holds, &deleteme);
if (error == 0 && deleteme) {
fnvlist_add_boolean(ddura->ddura_todelete,
name);
}
dsl_dataset_rele(ds, FTAG);
}
if (error != 0) {
if (ddura->ddura_errlist != NULL) {
fnvlist_add_int32(ddura->ddura_errlist,
name, error);
}
rv = error;
}
}
return (rv);
}
static void
dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
dmu_tx_t *tx)
{
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
uint64_t zapobj;
int error;
nvpair_t *pair;
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(holds, pair)) {
ds->ds_userrefs--;
error = dsl_pool_user_release(dp, ds->ds_object,
nvpair_name(pair), tx);
VERIFY(error == 0 || error == ENOENT);
zapobj = ds->ds_phys->ds_userrefs_obj;
VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx));
spa_history_log_internal_ds(ds, "release", tx,
"tag=%s refs=%lld", nvpair_name(pair),
(longlong_t)ds->ds_userrefs);
}
}
static void
dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_release_arg_t *ddura = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
dsl_dataset_user_release_sync_one(ds,
fnvpair_value_nvlist(pair), tx);
if (nvlist_exists(ddura->ddura_todelete,
nvpair_name(pair))) {
ASSERT(ds->ds_userrefs == 0 &&
ds->ds_phys->ds_num_children == 1 &&
DS_IS_DEFER_DESTROY(ds));
dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
}
dsl_dataset_rele(ds, FTAG);
}
}
/*
* holds is nvl of snapname -> { holdname, ... }
* errlist will be filled in with snapname -> error
*
* if any fails, all will fail.
*/
int
dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
{
dsl_dataset_user_release_arg_t ddura;
nvpair_t *pair;
int error;
pair = nvlist_next_nvpair(holds, NULL);
if (pair == NULL)
return (0);
ddura.ddura_holds = holds;
ddura.ddura_errlist = errlist;
ddura.ddura_todelete = fnvlist_alloc();
error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
fnvlist_free(ddura.ddura_todelete);
return (error);
}
typedef struct dsl_dataset_user_release_tmp_arg {
uint64_t ddurta_dsobj;
nvlist_t *ddurta_holds;
boolean_t ddurta_deleteme;
} dsl_dataset_user_release_tmp_arg_t;
static int
dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
int error;
if (!dmu_tx_is_syncing(tx))
return (0);
error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
if (error)
return (error);
error = dsl_dataset_user_release_check_one(ds,
ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
dsl_dataset_rele(ds, FTAG);
return (error);
}
static void
dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds));
dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx);
if (ddurta->ddurta_deleteme) {
ASSERT(ds->ds_userrefs == 0 &&
ds->ds_phys->ds_num_children == 1 &&
DS_IS_DEFER_DESTROY(ds));
dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
}
dsl_dataset_rele(ds, FTAG);
}
/*
* Called at spa_load time to release a stale temporary user hold.
* Also called by the onexit code.
*/
void
dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
{
dsl_dataset_user_release_tmp_arg_t ddurta;
#ifdef _KERNEL
dsl_dataset_t *ds;
int error;
/* Make sure it is not mounted. */
dsl_pool_config_enter(dp, FTAG);
error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
if (error == 0) {
char name[MAXNAMELEN];
dsl_dataset_name(ds, name);
dsl_dataset_rele(ds, FTAG);
dsl_pool_config_exit(dp, FTAG);
(void) zfs_unmount_snap(name);
} else {
dsl_pool_config_exit(dp, FTAG);
}
#endif
ddurta.ddurta_dsobj = dsobj;
ddurta.ddurta_holds = fnvlist_alloc();
fnvlist_add_boolean(ddurta.ddurta_holds, htag);
(void) dsl_sync_task(spa_name(dp->dp_spa),
dsl_dataset_user_release_tmp_check,
dsl_dataset_user_release_tmp_sync, &ddurta, 1);
fnvlist_free(ddurta.ddurta_holds);
}
typedef struct zfs_hold_cleanup_arg {
char zhca_spaname[MAXNAMELEN];
uint64_t zhca_spa_load_guid;
uint64_t zhca_dsobj;
char zhca_htag[MAXNAMELEN];
nvlist_t *zhca_holds;
} zfs_hold_cleanup_arg_t;
static void
@ -466,40 +199,447 @@ dsl_dataset_user_release_onexit(void *arg)
error = spa_open(ca->zhca_spaname, &spa, FTAG);
if (error != 0) {
zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
zfs_dbgmsg("couldn't release holds on pool=%s "
"because pool is no longer loaded",
ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
ca->zhca_spaname);
return;
}
if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
zfs_dbgmsg("couldn't release holds on pool=%s "
"because pool is no longer loaded (guid doesn't match)",
ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
ca->zhca_spaname);
spa_close(spa, FTAG);
return;
}
dsl_dataset_user_release_tmp(spa_get_dsl(spa),
ca->zhca_dsobj, ca->zhca_htag);
(void) dsl_dataset_user_release_tmp(spa_get_dsl(spa), ca->zhca_holds);
fnvlist_free(ca->zhca_holds);
kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
spa_close(spa, FTAG);
}
void
dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
minor_t minor)
static void
dsl_onexit_hold_cleanup(spa_t *spa, nvlist_t *holds, minor_t minor)
{
zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_PUSHPAGE);
spa_t *spa = dsl_dataset_get_spa(ds);
zfs_hold_cleanup_arg_t *ca;
if (minor == 0 || nvlist_empty(holds)) {
fnvlist_free(holds);
return;
}
ASSERT(spa != NULL);
ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
(void) strlcpy(ca->zhca_spaname, spa_name(spa),
sizeof (ca->zhca_spaname));
ca->zhca_spa_load_guid = spa_load_guid(spa);
ca->zhca_dsobj = ds->ds_object;
(void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag));
ca->zhca_holds = holds;
VERIFY0(zfs_onexit_add_cb(minor,
dsl_dataset_user_release_onexit, ca, NULL));
}
void
dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
minor_t minor, uint64_t now, dmu_tx_t *tx)
{
nvlist_t *tmpholds;
if (minor != 0)
tmpholds = fnvlist_alloc();
else
tmpholds = NULL;
dsl_dataset_user_hold_sync_one_impl(tmpholds, ds, htag, minor, now, tx);
dsl_onexit_hold_cleanup(dsl_dataset_get_spa(ds), tmpholds, minor);
}
static void
dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_hold_arg_t *dduha = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvlist_t *tmpholds;
nvpair_t *pair;
uint64_t now = gethrestime_sec();
if (dduha->dduha_minor != 0)
tmpholds = fnvlist_alloc();
else
tmpholds = NULL;
for (pair = nvlist_next_nvpair(dduha->dduha_chkholds, NULL);
pair != NULL;
pair = nvlist_next_nvpair(dduha->dduha_chkholds, pair)) {
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
dsl_dataset_user_hold_sync_one_impl(tmpholds, ds,
fnvpair_value_string(pair), dduha->dduha_minor, now, tx);
dsl_dataset_rele(ds, FTAG);
}
dsl_onexit_hold_cleanup(dp->dp_spa, tmpholds, dduha->dduha_minor);
}
/*
* The full semantics of this function are described in the comment above
* lzc_hold().
*
* To summarize:
* holds is nvl of snapname -> holdname
* errlist will be filled in with snapname -> error
*
* The snaphosts must all be in the same pool.
*
* Holds for snapshots that don't exist will be skipped.
*
* If none of the snapshots for requested holds exist then ENOENT will be
* returned.
*
* If cleanup_minor is not 0, the holds will be temporary, which will be cleaned
* up when the process exits.
*
* On success all the holds, for snapshots that existed, will be created and 0
* will be returned.
*
* On failure no holds will be created, the errlist will be filled in,
* and an errno will returned.
*
* In all cases the errlist will contain entries for holds where the snapshot
* didn't exist.
*/
int
dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
{
dsl_dataset_user_hold_arg_t dduha;
nvpair_t *pair;
int ret;
pair = nvlist_next_nvpair(holds, NULL);
if (pair == NULL)
return (0);
dduha.dduha_holds = holds;
dduha.dduha_chkholds = fnvlist_alloc();
dduha.dduha_errlist = errlist;
dduha.dduha_minor = cleanup_minor;
ret = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds));
fnvlist_free(dduha.dduha_chkholds);
return (ret);
}
typedef int (dsl_holdfunc_t)(dsl_pool_t *dp, const char *name, void *tag,
dsl_dataset_t **dsp);
typedef struct dsl_dataset_user_release_arg {
dsl_holdfunc_t *ddura_holdfunc;
nvlist_t *ddura_holds;
nvlist_t *ddura_todelete;
nvlist_t *ddura_errlist;
nvlist_t *ddura_chkholds;
} dsl_dataset_user_release_arg_t;
/* Place a dataset hold on the snapshot identified by passed dsobj string */
static int
dsl_dataset_hold_obj_string(dsl_pool_t *dp, const char *dsobj, void *tag,
dsl_dataset_t **dsp)
{
return (dsl_dataset_hold_obj(dp, strtonum(dsobj, NULL), tag, dsp));
}
static int
dsl_dataset_user_release_check_one(dsl_dataset_user_release_arg_t *ddura,
dsl_dataset_t *ds, nvlist_t *holds, const char *snapname)
{
uint64_t zapobj;
nvlist_t *holds_found;
nvpair_t *pair;
objset_t *mos;
int numholds;
if (!dsl_dataset_is_snapshot(ds))
return (SET_ERROR(EINVAL));
if (nvlist_empty(holds))
return (0);
numholds = 0;
mos = ds->ds_dir->dd_pool->dp_meta_objset;
zapobj = ds->ds_phys->ds_userrefs_obj;
holds_found = fnvlist_alloc();
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(holds, pair)) {
uint64_t tmp;
int error;
const char *holdname = nvpair_name(pair);
if (zapobj != 0)
error = zap_lookup(mos, zapobj, holdname, 8, 1, &tmp);
else
error = SET_ERROR(ENOENT);
/*
* Non-existent holds are put on the errlist, but don't
* cause an overall failure.
*/
if (error == ENOENT) {
if (ddura->ddura_errlist != NULL) {
char *errtag = kmem_asprintf("%s#%s",
snapname, holdname);
fnvlist_add_int32(ddura->ddura_errlist, errtag,
ENOENT);
strfree(errtag);
}
continue;
}
if (error != 0) {
fnvlist_free(holds_found);
return (error);
}
fnvlist_add_boolean(holds_found, holdname);
numholds++;
}
if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
ds->ds_userrefs == numholds) {
/* we need to destroy the snapshot as well */
if (dsl_dataset_long_held(ds)) {
fnvlist_free(holds_found);
return (SET_ERROR(EBUSY));
}
fnvlist_add_boolean(ddura->ddura_todelete, snapname);
}
if (numholds != 0) {
fnvlist_add_nvlist(ddura->ddura_chkholds, snapname,
holds_found);
}
fnvlist_free(holds_found);
return (0);
}
static int
dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_release_arg_t *ddura;
dsl_holdfunc_t *holdfunc;
dsl_pool_t *dp;
nvpair_t *pair;
if (!dmu_tx_is_syncing(tx))
return (0);
dp = dmu_tx_pool(tx);
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
ddura = arg;
holdfunc = ddura->ddura_holdfunc;
for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL);
pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
int error;
dsl_dataset_t *ds;
nvlist_t *holds;
const char *snapname = nvpair_name(pair);
error = nvpair_value_nvlist(pair, &holds);
if (error != 0)
error = (SET_ERROR(EINVAL));
else
error = holdfunc(dp, snapname, FTAG, &ds);
if (error == 0) {
error = dsl_dataset_user_release_check_one(ddura, ds,
holds, snapname);
dsl_dataset_rele(ds, FTAG);
}
if (error != 0) {
if (ddura->ddura_errlist != NULL) {
fnvlist_add_int32(ddura->ddura_errlist,
snapname, error);
}
/*
* Non-existent snapshots are put on the errlist,
* but don't cause an overall failure.
*/
if (error != ENOENT)
return (error);
}
}
/* Return ENOENT if none of the holds existed. */
if (nvlist_empty(ddura->ddura_chkholds))
return (SET_ERROR(ENOENT));
return (0);
}
static void
dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
dmu_tx_t *tx)
{
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
nvpair_t *pair;
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(holds, pair)) {
int error;
const char *holdname = nvpair_name(pair);
/* Remove temporary hold if one exists. */
error = dsl_pool_user_release(dp, ds->ds_object, holdname, tx);
VERIFY(error == 0 || error == ENOENT);
VERIFY0(zap_remove(mos, ds->ds_phys->ds_userrefs_obj, holdname,
tx));
ds->ds_userrefs--;
spa_history_log_internal_ds(ds, "release", tx,
"tag=%s refs=%lld", holdname, (longlong_t)ds->ds_userrefs);
}
}
static void
dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_release_arg_t *ddura = arg;
dsl_holdfunc_t *holdfunc = ddura->ddura_holdfunc;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
for (pair = nvlist_next_nvpair(ddura->ddura_chkholds, NULL);
pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_chkholds,
pair)) {
dsl_dataset_t *ds;
const char *name = nvpair_name(pair);
VERIFY0(holdfunc(dp, name, FTAG, &ds));
dsl_dataset_user_release_sync_one(ds,
fnvpair_value_nvlist(pair), tx);
if (nvlist_exists(ddura->ddura_todelete, name)) {
ASSERT(ds->ds_userrefs == 0 &&
ds->ds_phys->ds_num_children == 1 &&
DS_IS_DEFER_DESTROY(ds));
dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
}
dsl_dataset_rele(ds, FTAG);
}
}
/*
* The full semantics of this function are described in the comment above
* lzc_release().
*
* To summarize:
* Releases holds specified in the nvl holds.
*
* holds is nvl of snapname -> { holdname, ... }
* errlist will be filled in with snapname -> error
*
* If tmpdp is not NULL the names for holds should be the dsobj's of snapshots,
* otherwise they should be the names of shapshots.
*
* As a release may cause snapshots to be destroyed this trys to ensure they
* aren't mounted.
*
* The release of non-existent holds are skipped.
*
* At least one hold must have been released for the this function to succeed
* and return 0.
*/
static int
dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist,
dsl_pool_t *tmpdp)
{
dsl_dataset_user_release_arg_t ddura;
nvpair_t *pair;
char *pool;
int error;
pair = nvlist_next_nvpair(holds, NULL);
if (pair == NULL)
return (0);
/*
* The release may cause snapshots to be destroyed; make sure they
* are not mounted.
*/
if (tmpdp != NULL) {
/* Temporary holds are specified by dsobj string. */
ddura.ddura_holdfunc = dsl_dataset_hold_obj_string;
pool = spa_name(tmpdp->dp_spa);
#ifdef _KERNEL
dsl_pool_config_enter(tmpdp, FTAG);
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(holds, pair)) {
dsl_dataset_t *ds;
error = dsl_dataset_hold_obj_string(tmpdp,
nvpair_name(pair), FTAG, &ds);
if (error == 0) {
char name[MAXNAMELEN];
dsl_dataset_name(ds, name);
dsl_dataset_rele(ds, FTAG);
(void) zfs_unmount_snap(name);
}
}
dsl_pool_config_exit(tmpdp, FTAG);
#endif
} else {
/* Non-temporary holds are specified by name. */
ddura.ddura_holdfunc = dsl_dataset_hold;
pool = nvpair_name(pair);
#ifdef _KERNEL
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(holds, pair)) {
(void) zfs_unmount_snap(nvpair_name(pair));
}
#endif
}
ddura.ddura_holds = holds;
ddura.ddura_errlist = errlist;
ddura.ddura_todelete = fnvlist_alloc();
ddura.ddura_chkholds = fnvlist_alloc();
error = dsl_sync_task(pool, dsl_dataset_user_release_check,
dsl_dataset_user_release_sync, &ddura,
fnvlist_num_pairs(holds));
fnvlist_free(ddura.ddura_todelete);
fnvlist_free(ddura.ddura_chkholds);
return (error);
}
/*
* holds is nvl of snapname -> { holdname, ... }
* errlist will be filled in with snapname -> error
*/
int
dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
{
return (dsl_dataset_user_release_impl(holds, errlist, NULL));
}
/*
* holds is nvl of snapdsobj -> { holdname, ... }
*/
void
dsl_dataset_user_release_tmp(struct dsl_pool *dp, nvlist_t *holds)
{
ASSERT(dp != NULL);
(void) dsl_dataset_user_release_impl(holds, NULL, dp);
}
int
dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
{

View File

@ -28,6 +28,7 @@
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 201i3 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
/*
@ -4867,20 +4868,6 @@ zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
static int
zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
{
nvpair_t *pair;
int err;
/*
* The release may cause the snapshot to be destroyed; make sure it
* is not mounted.
*/
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(holds, pair)) {
err = zfs_unmount_snap(nvpair_name(pair));
if (err != 0)
return (err);
}
return (dsl_dataset_user_release(holds, errlist));
}