From 95fd54a1c5b93bb2aa3e7dffc28c784b1e21a8bb Mon Sep 17 00:00:00 2001 From: Steven Hartland Date: Sat, 25 May 2013 02:06:23 +0000 Subject: [PATCH] Illumos #3740 3740 Poor ZFS send / receive performance due to snapshot hold / release processing Reviewed by: Matthew Ahrens Approved by: Christopher Siden References: https://www.illumos.org/issues/3740 illumos/illumos-gate@a7a845e4bf22fd1b2a284729ccd95c7370a0438c Ported-by: Richard Yao Signed-off-by: Brian Behlendorf Issue #1775 Porting notes: 1. 13fe019870c8779bf2f5b3ff731b512cf89133ef introduced a merge conflict in dsl_dataset_user_release_tmp where some variables were moved outside of the preprocessor directive. 2. dea9dfefdd747534b3846845629d2200f0616dad made the previous merge conflict worse by switching KM_SLEEP to KM_PUSHPAGE. This is notable because this commit refactors the code, adding a new KM_SLEEP allocation. It is not clear to me whether this should be converted to KM_PUSHPAGE. 3. We had a merge conflict in libzfs_sendrecv.c because of copyright notices. 4. Several small C99 compatibility fixed were made. --- cmd/zfs/zfs_main.c | 4 +- cmd/zhack/zhack.c | 3 +- cmd/ztest/ztest.c | 3 +- include/libzfs.h | 4 +- include/sys/dsl_dataset.h | 3 +- include/sys/dsl_userhold.h | 4 +- lib/libzfs/libzfs_dataset.c | 93 ++-- lib/libzfs/libzfs_sendrecv.c | 148 +++--- lib/libzfs_core/libzfs_core.c | 47 +- module/zfs/dsl_destroy.c | 5 + module/zfs/dsl_pool.c | 18 +- module/zfs/dsl_userhold.c | 830 ++++++++++++++++++++-------------- module/zfs/zfs_ioctl.c | 15 +- 13 files changed, 672 insertions(+), 505 deletions(-) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 8e13dd715b..08d16aa1f2 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -24,6 +24,7 @@ * Copyright 2012 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include @@ -5193,8 +5194,7 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding) continue; } if (holding) { - if (zfs_hold(zhp, delim+1, tag, recursive, - B_FALSE, -1) != 0) + if (zfs_hold(zhp, delim+1, tag, recursive, -1) != 0) ++errors; } else { if (zfs_release(zhp, delim+1, tag, recursive) != 0) diff --git a/cmd/zhack/zhack.c b/cmd/zhack/zhack.c index 99d2671945..64ab8edbbc 100644 --- a/cmd/zhack/zhack.c +++ b/cmd/zhack/zhack.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ /* @@ -152,7 +153,7 @@ import_pool(const char *target, boolean_t readonly) g_importargs.poolname = g_pool; pools = zpool_search_import(g_zfs, &g_importargs); - if (pools == NULL || nvlist_next_nvpair(pools, NULL) == NULL) { + if (nvlist_empty(pools)) { if (!g_importargs.can_be_active) { g_importargs.can_be_active = B_TRUE; if (zpool_search_import(g_zfs, &g_importargs) != NULL || diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index e192ab17a1..48da5b2061 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -22,6 +22,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ /* @@ -4830,7 +4831,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) error = user_release_one(fullname, tag); if (error) - fatal(0, "user_release_one(%s)", fullname, tag); + fatal(0, "user_release_one(%s, %s) = %d", fullname, tag, error); VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT); diff --git a/include/libzfs.h b/include/libzfs.h index c212a0c05a..5fabcb1b1f 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -24,6 +24,7 @@ * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #ifndef _LIBZFS_H @@ -614,7 +615,8 @@ extern int zfs_send(zfs_handle_t *, const char *, const char *, extern int zfs_promote(zfs_handle_t *); extern int zfs_hold(zfs_handle_t *, const char *, const char *, - boolean_t, boolean_t, int); + boolean_t, int); +extern int zfs_hold_nvl(zfs_handle_t *, int, nvlist_t *); extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t); extern int zfs_get_holds(zfs_handle_t *, nvlist_t **); extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *); diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h index 494f11b902..725f0d2339 100644 --- a/include/sys/dsl_dataset.h +++ b/include/sys/dsl_dataset.h @@ -22,6 +22,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #ifndef _SYS_DSL_DATASET_H @@ -187,8 +188,6 @@ int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj, void dsl_dataset_disown(dsl_dataset_t *ds, void *tag); void dsl_dataset_name(dsl_dataset_t *ds, char *name); boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag); -void dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag, - minor_t minor); uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname, dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *); uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, diff --git a/include/sys/dsl_userhold.h b/include/sys/dsl_userhold.h index 56c6c8f47a..071aeb86d1 100644 --- a/include/sys/dsl_userhold.h +++ b/include/sys/dsl_userhold.h @@ -23,6 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #ifndef _SYS_DSL_USERHOLD_H @@ -43,8 +44,7 @@ int dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist); int dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist); int dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl); -void dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj, - const char *htag); +void dsl_dataset_user_release_tmp(struct dsl_pool *dp, nvlist_t *holds); int dsl_dataset_user_hold_check_one(struct dsl_dataset *ds, const char *htag, boolean_t temphold, struct dmu_tx *tx); void dsl_dataset_user_hold_sync_one(struct dsl_dataset *ds, const char *htag, diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index ede0d91f8c..4b4f8d8c92 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -26,6 +26,7 @@ * Copyright (c) 2012 Pawel Jakub Dawidek . * Copyright 2012 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 Martin Matuska. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include @@ -3153,18 +3154,14 @@ static int zfs_check_snap_cb(zfs_handle_t *zhp, void *arg) { struct destroydata *dd = arg; - zfs_handle_t *szhp; char name[ZFS_MAXNAMELEN]; int rv = 0; (void) snprintf(name, sizeof (name), "%s@%s", zhp->zfs_name, dd->snapname); - szhp = make_dataset_handle(zhp->zfs_hdl, name); - if (szhp) { + if (lzc_exists(name)) verify(nvlist_add_boolean(dd->nvl, name) == 0); - zfs_close(szhp); - } if (zhp->zfs_type == ZFS_TYPE_VOLUME) { (void) zvol_remove_link(zhp->zfs_hdl, name); @@ -3193,7 +3190,7 @@ zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer) verify(nvlist_alloc(&dd.nvl, NV_UNIQUE_NAME, 0) == 0); (void) zfs_check_snap_cb(zfs_handle_dup(zhp), &dd); - if (nvlist_next_nvpair(dd.nvl, NULL) == NULL) { + if (nvlist_empty(dd.nvl)) { ret = zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT, dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"), zhp->zfs_name, snapname); @@ -3219,7 +3216,7 @@ zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer) if (ret == 0) return (0); - if (nvlist_next_nvpair(errlist, NULL) == NULL) { + if (nvlist_empty(errlist)) { char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot destroy snapshots")); @@ -4421,18 +4418,14 @@ static int zfs_hold_one(zfs_handle_t *zhp, void *arg) { struct holdarg *ha = arg; - zfs_handle_t *szhp; char name[ZFS_MAXNAMELEN]; int rv = 0; (void) snprintf(name, sizeof (name), "%s@%s", zhp->zfs_name, ha->snapname); - szhp = make_dataset_handle(zhp->zfs_hdl, name); - if (szhp) { + if (lzc_exists(name)) fnvlist_add_string(ha->nvl, name, ha->tag); - zfs_close(szhp); - } if (ha->recursive) rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha); @@ -4442,14 +4435,10 @@ zfs_hold_one(zfs_handle_t *zhp, void *arg) int zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, - boolean_t recursive, boolean_t enoent_ok, int cleanup_fd) + boolean_t recursive, int cleanup_fd) { int ret; struct holdarg ha; - nvlist_t *errors; - libzfs_handle_t *hdl = zhp->zfs_hdl; - char errbuf[1024]; - nvpair_t *elem; ha.nvl = fnvlist_alloc(); ha.snapname = snapname; @@ -4457,26 +4446,44 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, ha.recursive = recursive; (void) zfs_hold_one(zfs_handle_dup(zhp), &ha); - if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) { + if (nvlist_empty(ha.nvl)) { + char errbuf[1024]; + fnvlist_free(ha.nvl); ret = ENOENT; - if (!enoent_ok) { - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, - "cannot hold snapshot '%s@%s'"), - zhp->zfs_name, snapname); - (void) zfs_standard_error(hdl, ret, errbuf); - } + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot hold snapshot '%s@%s'"), + zhp->zfs_name, snapname); + (void) zfs_standard_error(zhp->zfs_hdl, ret, errbuf); return (ret); } - ret = lzc_hold(ha.nvl, cleanup_fd, &errors); + ret = zfs_hold_nvl(zhp, cleanup_fd, ha.nvl); fnvlist_free(ha.nvl); - if (ret == 0) - return (0); + return (ret); +} - if (nvlist_next_nvpair(errors, NULL) == NULL) { +int +zfs_hold_nvl(zfs_handle_t *zhp, int cleanup_fd, nvlist_t *holds) +{ + int ret; + nvlist_t *errors; + libzfs_handle_t *hdl = zhp->zfs_hdl; + char errbuf[1024]; + nvpair_t *elem; + + errors = NULL; + ret = lzc_hold(holds, cleanup_fd, &errors); + + if (ret == 0) { + /* There may be errors even in the success case. */ + fnvlist_free(errors); + return (0); + } + + if (nvlist_empty(errors)) { /* no hold-specific errors */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot hold")); @@ -4516,10 +4523,6 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, case EEXIST: (void) zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf); break; - case ENOENT: - if (enoent_ok) - return (ENOENT); - /* FALLTHROUGH */ default: (void) zfs_standard_error(hdl, fnvpair_value_int32(elem), errbuf); @@ -4530,30 +4533,21 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, return (ret); } -struct releasearg { - nvlist_t *nvl; - const char *snapname; - const char *tag; - boolean_t recursive; -}; - static int zfs_release_one(zfs_handle_t *zhp, void *arg) { struct holdarg *ha = arg; - zfs_handle_t *szhp; char name[ZFS_MAXNAMELEN]; int rv = 0; (void) snprintf(name, sizeof (name), "%s@%s", zhp->zfs_name, ha->snapname); - szhp = make_dataset_handle(zhp->zfs_hdl, name); - if (szhp) { + if (lzc_exists(name)) { nvlist_t *holds = fnvlist_alloc(); fnvlist_add_boolean(holds, ha->tag); fnvlist_add_nvlist(ha->nvl, name, holds); - zfs_close(szhp); + fnvlist_free(holds); } if (ha->recursive) @@ -4568,7 +4562,7 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, { int ret; struct holdarg ha; - nvlist_t *errors; + nvlist_t *errors = NULL; nvpair_t *elem; libzfs_handle_t *hdl = zhp->zfs_hdl; char errbuf[1024]; @@ -4579,7 +4573,7 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, ha.recursive = recursive; (void) zfs_release_one(zfs_handle_dup(zhp), &ha); - if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) { + if (nvlist_empty(ha.nvl)) { fnvlist_free(ha.nvl); ret = ENOENT; (void) snprintf(errbuf, sizeof (errbuf), @@ -4593,10 +4587,13 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, ret = lzc_release(ha.nvl, &errors); fnvlist_free(ha.nvl); - if (ret == 0) + if (ret == 0) { + /* There may be errors even in the success case. */ + fnvlist_free(errors); return (0); + } - if (nvlist_next_nvpair(errors, NULL) == NULL) { + if (nvlist_empty(errors)) { /* no hold-specific errors */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot release")); diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 28751b215d..6f0d27f5b2 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -22,9 +22,10 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. - * Copyright (c) 2012 Pawel Jakub Dawidek . * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2012 Pawel Jakub Dawidek . * All rights reserved + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include @@ -799,6 +800,7 @@ typedef struct send_dump_data { int outfd; boolean_t err; nvlist_t *fss; + nvlist_t *snapholds; avl_tree_t *fsavl; snapfilter_cb_t *filter_cb; void *filter_cb_arg; @@ -948,41 +950,19 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj, return (0); } -static int -hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd) +static void +gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd) { - zfs_handle_t *pzhp; - int error = 0; - char *thissnap; - assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); - if (sdd->dryrun) - return (0); - /* - * zfs_send() only opens a cleanup_fd for sends that need it, + * zfs_send() only sets snapholds for sends that need them, * e.g. replication and doall. */ - if (sdd->cleanup_fd == -1) - return (0); + if (sdd->snapholds == NULL) + return; - thissnap = strchr(zhp->zfs_name, '@') + 1; - *(thissnap - 1) = '\0'; - pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET); - *(thissnap - 1) = '@'; - - /* - * It's OK if the parent no longer exists. The send code will - * handle that error. - */ - if (pzhp) { - error = zfs_hold(pzhp, thissnap, sdd->holdtag, - B_FALSE, B_TRUE, sdd->cleanup_fd); - zfs_close(pzhp); - } - - return (error); + fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag); } static void * @@ -1038,28 +1018,23 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) send_dump_data_t *sdd = arg; progress_arg_t pa = { 0 }; pthread_t tid; - char *thissnap; int err; boolean_t isfromsnap, istosnap, fromorigin; boolean_t exclude = B_FALSE; + err = 0; thissnap = strchr(zhp->zfs_name, '@') + 1; isfromsnap = (sdd->fromsnap != NULL && strcmp(sdd->fromsnap, thissnap) == 0); if (!sdd->seenfrom && isfromsnap) { - err = hold_for_send(zhp, sdd); - if (err == 0) { - sdd->seenfrom = B_TRUE; - (void) strcpy(sdd->prevsnap, thissnap); - sdd->prevsnap_obj = zfs_prop_get_int(zhp, - ZFS_PROP_OBJSETID); - } else if (err == ENOENT) { - err = 0; - } + gather_holds(zhp, sdd); + sdd->seenfrom = B_TRUE; + (void) strcpy(sdd->prevsnap, thissnap); + sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); zfs_close(zhp); - return (err); + return (0); } if (sdd->seento || !sdd->seenfrom) { @@ -1110,14 +1085,7 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) return (0); } - err = hold_for_send(zhp, sdd); - if (err) { - if (err == ENOENT) - err = 0; - zfs_close(zhp); - return (err); - } - + gather_holds(zhp, sdd); fromorigin = sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate); @@ -1385,7 +1353,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, avl_tree_t *fsavl = NULL; static uint64_t holdseq; int spa_version; - pthread_t tid; + pthread_t tid = 0; int pipefd[2]; dedup_arg_t dda = { 0 }; int featureflags = 0; @@ -1458,11 +1426,8 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, *debugnvp = hdrnv; else nvlist_free(hdrnv); - if (err) { - fsavl_destroy(fsavl); - nvlist_free(fss); + if (err) goto stderr_out; - } } if (!flags->dryrun) { @@ -1486,8 +1451,6 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, } free(packbuf); if (err == -1) { - fsavl_destroy(fsavl); - nvlist_free(fss); err = errno; goto stderr_out; } @@ -1498,8 +1461,6 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, drr.drr_u.drr_end.drr_checksum = zc; err = write(outfd, &drr, sizeof (drr)); if (err == -1) { - fsavl_destroy(fsavl); - nvlist_free(fss); err = errno; goto stderr_out; } @@ -1511,7 +1472,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, /* dump each stream */ sdd.fromsnap = fromsnap; sdd.tosnap = tosnap; - if (flags->dedup) + if (tid != 0) sdd.outfd = pipefd[0]; else sdd.outfd = outfd; @@ -1548,36 +1509,71 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, err = errno; goto stderr_out; } + sdd.snapholds = fnvlist_alloc(); } else { sdd.cleanup_fd = -1; + sdd.snapholds = NULL; } - if (flags->verbose) { + if (flags->verbose || sdd.snapholds != NULL) { /* * Do a verbose no-op dry run to get all the verbose output - * before generating any data. Then do a non-verbose real - * run to generate the streams. + * or to gather snapshot hold's before generating any data, + * then do a non-verbose real run to generate the streams. */ sdd.dryrun = B_TRUE; err = dump_filesystems(zhp, &sdd); - sdd.dryrun = flags->dryrun; - sdd.verbose = B_FALSE; - if (flags->parsable) { - (void) fprintf(stderr, "size\t%llu\n", - (longlong_t)sdd.size); - } else { - char buf[16]; - zfs_nicenum(sdd.size, buf, sizeof (buf)); - (void) fprintf(stderr, dgettext(TEXT_DOMAIN, - "total estimated size is %s\n"), buf); + + if (err != 0) + goto stderr_out; + + if (flags->verbose) { + if (flags->parsable) { + (void) fprintf(stderr, "size\t%llu\n", + (longlong_t)sdd.size); + } else { + char buf[16]; + zfs_nicenum(sdd.size, buf, sizeof (buf)); + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "total estimated size is %s\n"), buf); + } } + + /* Ensure no snaps found is treated as an error. */ + if (!sdd.seento) { + err = ENOENT; + goto err_out; + } + + /* Skip the second run if dryrun was requested. */ + if (flags->dryrun) + goto err_out; + + if (sdd.snapholds != NULL) { + err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds); + if (err != 0) + goto stderr_out; + + fnvlist_free(sdd.snapholds); + sdd.snapholds = NULL; + } + + sdd.dryrun = B_FALSE; + sdd.verbose = B_FALSE; } + err = dump_filesystems(zhp, &sdd); fsavl_destroy(fsavl); nvlist_free(fss); - if (flags->dedup) { - (void) close(pipefd[0]); + /* Ensure no snaps found is treated as an error. */ + if (err == 0 && !sdd.seento) + err = ENOENT; + + if (tid != 0) { + if (err != 0) + (void) pthread_cancel(tid); (void) pthread_join(tid, NULL); + (void) close(pipefd[0]); } if (sdd.cleanup_fd != -1) { @@ -1605,9 +1601,13 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, stderr_out: err = zfs_standard_error(zhp->zfs_hdl, err, errbuf); err_out: + fsavl_destroy(fsavl); + nvlist_free(fss); + fnvlist_free(sdd.snapholds); + if (sdd.cleanup_fd != -1) VERIFY(0 == close(sdd.cleanup_fd)); - if (flags->dedup) { + if (tid != 0) { (void) pthread_cancel(tid); (void) pthread_join(tid, NULL); (void) close(pipefd[0]); diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c index 44a2070d60..d3918a4edd 100644 --- a/lib/libzfs_core/libzfs_core.c +++ b/lib/libzfs_core/libzfs_core.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ /* @@ -254,8 +255,11 @@ lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) * marked for deferred destruction, and will be destroyed when the last hold * or clone is removed/destroyed. * + * The return value will be ENOENT if none of the snapshots existed. + * * The return value will be 0 if all snapshots were destroyed (or marked for - * later destruction if 'defer' is set) or didn't exist to begin with. + * later destruction if 'defer' is set) or didn't exist to begin with and + * at least one snapshot was destroyed. * * Otherwise the return value will be the errno of a (unspecified) snapshot * that failed, no snapshots will be destroyed, and the errlist will have an @@ -286,7 +290,6 @@ lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist) nvlist_free(args); return (error); - } int @@ -346,11 +349,22 @@ lzc_exists(const char *dataset) * uncleanly, the holds will be released when the pool is next opened * or imported. * - * The return value will be 0 if all holds were created. Otherwise the return - * value will be the errno of a (unspecified) hold that failed, no holds will - * be created, and the errlist will have an entry for each hold that - * failed (name = snapshot). The value in the errlist will be the error - * code (int32). + * Holds for snapshots which don't exist will be skipped and have an entry + * added to errlist, but will not cause an overall failure, except in the + * case that all holds where skipped. + * + * The return value will be ENOENT if none of the snapshots for the requested + * holds existed. + * + * The return value will be 0 if the nvl holds was empty or all holds, for + * snapshots that existed, were succesfully created and at least one hold + * was created. + * + * Otherwise the return value will be the errno of a (unspecified) hold that + * failed and no holds will be created. + * + * In all cases the errlist will have an entry for each hold that failed + * (name = snapshot), with its value being the error code (int32). */ int lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) @@ -387,11 +401,20 @@ lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) * The snapshots must all be in the same pool. * The value is a nvlist whose keys are the holds to remove. * - * The return value will be 0 if all holds were removed. - * Otherwise the return value will be the errno of a (unspecified) release - * that failed, no holds will be released, and the errlist will have an - * entry for each snapshot that has failed releases (name = snapshot). - * The value in the errlist will be the error code (int32) of a failed release. + * Holds which failed to release because they didn't exist will have an entry + * added to errlist, but will not cause an overall failure, except in the + * case that all releases where skipped. + * + * The return value will be ENOENT if none of the specified holds existed. + * + * The return value will be 0 if the nvl holds was empty or all holds that + * existed, were successfully removed and at least one hold was removed. + * + * Otherwise the return value will be the errno of a (unspecified) hold that + * failed to release and no holds will be released. + * + * In all cases the errlist will have an entry for each hold that failed to + * to release. */ int lzc_release(nvlist_t *holds, nvlist_t **errlist) diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c index eee0df1060..b1fd270dac 100644 --- a/module/zfs/dsl_destroy.c +++ b/module/zfs/dsl_destroy.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include @@ -127,6 +128,10 @@ dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx) pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL); if (pair != NULL) return (fnvpair_value_int32(pair)); + + if (nvlist_empty(dsda->dsda_successful_snaps)) + return (SET_ERROR(ENOENT)); + return (0); } diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index 950738e985..72e819f6b9 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include @@ -840,23 +841,34 @@ dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp) zap_cursor_t zc; objset_t *mos = dp->dp_meta_objset; uint64_t zapobj = dp->dp_tmp_userrefs_obj; + nvlist_t *holds; if (zapobj == 0) return; ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); + holds = fnvlist_alloc(); + for (zap_cursor_init(&zc, mos, zapobj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { char *htag; - uint64_t dsobj; + nvlist_t *tags; htag = strchr(za.za_name, '-'); *htag = '\0'; ++htag; - dsobj = strtonum(za.za_name, NULL); - dsl_dataset_user_release_tmp(dp, dsobj, htag); + if (nvlist_lookup_nvlist(holds, za.za_name, &tags) != 0) { + tags = fnvlist_alloc(); + fnvlist_add_boolean(tags, htag); + fnvlist_add_nvlist(holds, za.za_name, tags); + fnvlist_free(tags); + } else { + fnvlist_add_boolean(tags, htag); + } } + dsl_dataset_user_release_tmp(dp, holds); + fnvlist_free(holds); zap_cursor_fini(&zc); } diff --git a/module/zfs/dsl_userhold.c b/module/zfs/dsl_userhold.c index cda4081f33..f33292787d 100644 --- a/module/zfs/dsl_userhold.c +++ b/module/zfs/dsl_userhold.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include @@ -37,6 +38,7 @@ typedef struct dsl_dataset_user_hold_arg { nvlist_t *dduha_holds; + nvlist_t *dduha_chkholds; nvlist_t *dduha_errlist; minor_t dduha_minor; } dsl_dataset_user_hold_arg_t; @@ -53,25 +55,24 @@ dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag, objset_t *mos = dp->dp_meta_objset; int error = 0; + ASSERT(dsl_pool_config_held(dp)); + if (strlen(htag) > MAXNAMELEN) - return (E2BIG); + return (SET_ERROR(E2BIG)); /* Tempholds have a more restricted length */ if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) - return (E2BIG); + return (SET_ERROR(E2BIG)); /* tags must be unique (if ds already exists) */ - if (ds != NULL) { - mutex_enter(&ds->ds_lock); - if (ds->ds_phys->ds_userrefs_obj != 0) { - uint64_t value; - error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, - htag, 8, 1, &value); - if (error == 0) - error = SET_ERROR(EEXIST); - else if (error == ENOENT) - error = 0; - } - mutex_exit(&ds->ds_lock); + if (ds != NULL && ds->ds_phys->ds_userrefs_obj != 0) { + uint64_t value; + + error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, + htag, 8, 1, &value); + if (error == 0) + error = SET_ERROR(EEXIST); + else if (error == ENOENT) + error = 0; } return (error); @@ -83,51 +84,67 @@ dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx) dsl_dataset_user_hold_arg_t *dduha = arg; dsl_pool_t *dp = dmu_tx_pool(tx); nvpair_t *pair; - int rv = 0; if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) return (SET_ERROR(ENOTSUP)); - for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL; - pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) { - int error = 0; + if (!dmu_tx_is_syncing(tx)) + return (0); + + for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); + pair != NULL; pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) { dsl_dataset_t *ds; - char *htag; + int error = 0; + char *htag, *name; /* must be a snapshot */ - if (strchr(nvpair_name(pair), '@') == NULL) + name = nvpair_name(pair); + if (strchr(name, '@') == NULL) error = SET_ERROR(EINVAL); if (error == 0) error = nvpair_value_string(pair, &htag); - if (error == 0) { - error = dsl_dataset_hold(dp, - nvpair_name(pair), FTAG, &ds); - } + + if (error == 0) + error = dsl_dataset_hold(dp, name, FTAG, &ds); + if (error == 0) { error = dsl_dataset_user_hold_check_one(ds, htag, dduha->dduha_minor != 0, tx); dsl_dataset_rele(ds, FTAG); } - if (error != 0) { - rv = error; - fnvlist_add_int32(dduha->dduha_errlist, - nvpair_name(pair), error); + if (error == 0) { + fnvlist_add_string(dduha->dduha_chkholds, name, htag); + } else { + /* + * We register ENOENT errors so they can be correctly + * reported if needed, such as when all holds fail. + */ + fnvlist_add_int32(dduha->dduha_errlist, name, error); + if (error != ENOENT) + return (error); } } - return (rv); + + /* Return ENOENT if no holds would be created. */ + if (nvlist_empty(dduha->dduha_chkholds)) + return (SET_ERROR(ENOENT)); + + return (0); } -void -dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag, - minor_t minor, uint64_t now, dmu_tx_t *tx) + +static void +dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds, + const char *htag, minor_t minor, uint64_t now, dmu_tx_t *tx) { dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; uint64_t zapobj; - mutex_enter(&ds->ds_lock); + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); + if (ds->ds_phys->ds_userrefs_obj == 0) { /* * This is the first user hold for this dataset. Create @@ -140,14 +157,26 @@ dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag, zapobj = ds->ds_phys->ds_userrefs_obj; } ds->ds_userrefs++; - mutex_exit(&ds->ds_lock); VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx)); if (minor != 0) { + char name[MAXNAMELEN]; + nvlist_t *tags; + VERIFY0(dsl_pool_user_hold(dp, ds->ds_object, htag, now, tx)); - dsl_register_onexit_hold_cleanup(ds, htag, minor); + (void) snprintf(name, sizeof (name), "%llx", + (u_longlong_t)ds->ds_object); + + if (nvlist_lookup_nvlist(tmpholds, name, &tags) != 0) { + tags = fnvlist_alloc(); + fnvlist_add_boolean(tags, htag); + fnvlist_add_nvlist(tmpholds, name, tags); + fnvlist_free(tags); + } else { + fnvlist_add_boolean(tags, htag); + } } spa_history_log_internal_ds(ds, "hold", tx, @@ -155,306 +184,10 @@ dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag, htag, minor != 0, ds->ds_userrefs); } -static void -dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx) -{ - dsl_dataset_user_hold_arg_t *dduha = arg; - dsl_pool_t *dp = dmu_tx_pool(tx); - nvpair_t *pair; - uint64_t now = gethrestime_sec(); - - for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL; - pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) { - dsl_dataset_t *ds; - VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds)); - dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair), - dduha->dduha_minor, now, tx); - dsl_dataset_rele(ds, FTAG); - } -} - -/* - * holds is nvl of snapname -> holdname - * errlist will be filled in with snapname -> error - * if cleanup_minor is not 0, the holds will be temporary, cleaned up - * when the process exits. - * - * if any fails, all will fail. - */ -int -dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist) -{ - dsl_dataset_user_hold_arg_t dduha; - nvpair_t *pair; - - pair = nvlist_next_nvpair(holds, NULL); - if (pair == NULL) - return (0); - - dduha.dduha_holds = holds; - dduha.dduha_errlist = errlist; - dduha.dduha_minor = cleanup_minor; - - return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check, - dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds))); -} - -typedef struct dsl_dataset_user_release_arg { - nvlist_t *ddura_holds; - nvlist_t *ddura_todelete; - nvlist_t *ddura_errlist; -} dsl_dataset_user_release_arg_t; - -static int -dsl_dataset_user_release_check_one(dsl_dataset_t *ds, - nvlist_t *holds, boolean_t *todelete) -{ - uint64_t zapobj; - nvpair_t *pair; - objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; - int error; - int numholds = 0; - - *todelete = B_FALSE; - - if (!dsl_dataset_is_snapshot(ds)) - return (SET_ERROR(EINVAL)); - - zapobj = ds->ds_phys->ds_userrefs_obj; - if (zapobj == 0) - return (SET_ERROR(ESRCH)); - - for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; - pair = nvlist_next_nvpair(holds, pair)) { - /* Make sure the hold exists */ - uint64_t tmp; - error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp); - if (error == ENOENT) - error = SET_ERROR(ESRCH); - if (error != 0) - return (error); - numholds++; - } - - if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 && - ds->ds_userrefs == numholds) { - /* we need to destroy the snapshot as well */ - - if (dsl_dataset_long_held(ds)) - return (SET_ERROR(EBUSY)); - *todelete = B_TRUE; - } - return (0); -} - -static int -dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx) -{ - dsl_dataset_user_release_arg_t *ddura = arg; - dsl_pool_t *dp = dmu_tx_pool(tx); - nvpair_t *pair; - int rv = 0; - - if (!dmu_tx_is_syncing(tx)) - return (0); - - for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL; - pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) { - const char *name = nvpair_name(pair); - int error; - dsl_dataset_t *ds; - nvlist_t *holds; - - error = nvpair_value_nvlist(pair, &holds); - if (error != 0) - return (SET_ERROR(EINVAL)); - - error = dsl_dataset_hold(dp, name, FTAG, &ds); - if (error == 0) { - boolean_t deleteme; - error = dsl_dataset_user_release_check_one(ds, - holds, &deleteme); - if (error == 0 && deleteme) { - fnvlist_add_boolean(ddura->ddura_todelete, - name); - } - dsl_dataset_rele(ds, FTAG); - } - if (error != 0) { - if (ddura->ddura_errlist != NULL) { - fnvlist_add_int32(ddura->ddura_errlist, - name, error); - } - rv = error; - } - } - return (rv); -} - -static void -dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds, - dmu_tx_t *tx) -{ - dsl_pool_t *dp = ds->ds_dir->dd_pool; - objset_t *mos = dp->dp_meta_objset; - uint64_t zapobj; - int error; - nvpair_t *pair; - - for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; - pair = nvlist_next_nvpair(holds, pair)) { - ds->ds_userrefs--; - error = dsl_pool_user_release(dp, ds->ds_object, - nvpair_name(pair), tx); - VERIFY(error == 0 || error == ENOENT); - zapobj = ds->ds_phys->ds_userrefs_obj; - VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx)); - - spa_history_log_internal_ds(ds, "release", tx, - "tag=%s refs=%lld", nvpair_name(pair), - (longlong_t)ds->ds_userrefs); - } -} - -static void -dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx) -{ - dsl_dataset_user_release_arg_t *ddura = arg; - dsl_pool_t *dp = dmu_tx_pool(tx); - nvpair_t *pair; - - for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL; - pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) { - dsl_dataset_t *ds; - - VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds)); - dsl_dataset_user_release_sync_one(ds, - fnvpair_value_nvlist(pair), tx); - if (nvlist_exists(ddura->ddura_todelete, - nvpair_name(pair))) { - ASSERT(ds->ds_userrefs == 0 && - ds->ds_phys->ds_num_children == 1 && - DS_IS_DEFER_DESTROY(ds)); - dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx); - } - dsl_dataset_rele(ds, FTAG); - } -} - -/* - * holds is nvl of snapname -> { holdname, ... } - * errlist will be filled in with snapname -> error - * - * if any fails, all will fail. - */ -int -dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist) -{ - dsl_dataset_user_release_arg_t ddura; - nvpair_t *pair; - int error; - - pair = nvlist_next_nvpair(holds, NULL); - if (pair == NULL) - return (0); - - ddura.ddura_holds = holds; - ddura.ddura_errlist = errlist; - ddura.ddura_todelete = fnvlist_alloc(); - - error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check, - dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds)); - fnvlist_free(ddura.ddura_todelete); - return (error); -} - -typedef struct dsl_dataset_user_release_tmp_arg { - uint64_t ddurta_dsobj; - nvlist_t *ddurta_holds; - boolean_t ddurta_deleteme; -} dsl_dataset_user_release_tmp_arg_t; - -static int -dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx) -{ - dsl_dataset_user_release_tmp_arg_t *ddurta = arg; - dsl_pool_t *dp = dmu_tx_pool(tx); - dsl_dataset_t *ds; - int error; - - if (!dmu_tx_is_syncing(tx)) - return (0); - - error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds); - if (error) - return (error); - - error = dsl_dataset_user_release_check_one(ds, - ddurta->ddurta_holds, &ddurta->ddurta_deleteme); - dsl_dataset_rele(ds, FTAG); - return (error); -} - -static void -dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx) -{ - dsl_dataset_user_release_tmp_arg_t *ddurta = arg; - dsl_pool_t *dp = dmu_tx_pool(tx); - dsl_dataset_t *ds; - - VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds)); - dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx); - if (ddurta->ddurta_deleteme) { - ASSERT(ds->ds_userrefs == 0 && - ds->ds_phys->ds_num_children == 1 && - DS_IS_DEFER_DESTROY(ds)); - dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx); - } - dsl_dataset_rele(ds, FTAG); -} - -/* - * Called at spa_load time to release a stale temporary user hold. - * Also called by the onexit code. - */ -void -dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag) -{ - dsl_dataset_user_release_tmp_arg_t ddurta; - -#ifdef _KERNEL - dsl_dataset_t *ds; - int error; - - /* Make sure it is not mounted. */ - dsl_pool_config_enter(dp, FTAG); - error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); - if (error == 0) { - char name[MAXNAMELEN]; - dsl_dataset_name(ds, name); - dsl_dataset_rele(ds, FTAG); - dsl_pool_config_exit(dp, FTAG); - (void) zfs_unmount_snap(name); - } else { - dsl_pool_config_exit(dp, FTAG); - } -#endif - - ddurta.ddurta_dsobj = dsobj; - ddurta.ddurta_holds = fnvlist_alloc(); - fnvlist_add_boolean(ddurta.ddurta_holds, htag); - - (void) dsl_sync_task(spa_name(dp->dp_spa), - dsl_dataset_user_release_tmp_check, - dsl_dataset_user_release_tmp_sync, &ddurta, 1); - fnvlist_free(ddurta.ddurta_holds); -} - typedef struct zfs_hold_cleanup_arg { char zhca_spaname[MAXNAMELEN]; uint64_t zhca_spa_load_guid; - uint64_t zhca_dsobj; - char zhca_htag[MAXNAMELEN]; + nvlist_t *zhca_holds; } zfs_hold_cleanup_arg_t; static void @@ -466,40 +199,447 @@ dsl_dataset_user_release_onexit(void *arg) error = spa_open(ca->zhca_spaname, &spa, FTAG); if (error != 0) { - zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s " + zfs_dbgmsg("couldn't release holds on pool=%s " "because pool is no longer loaded", - ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag); + ca->zhca_spaname); return; } if (spa_load_guid(spa) != ca->zhca_spa_load_guid) { - zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s " + zfs_dbgmsg("couldn't release holds on pool=%s " "because pool is no longer loaded (guid doesn't match)", - ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag); + ca->zhca_spaname); spa_close(spa, FTAG); return; } - dsl_dataset_user_release_tmp(spa_get_dsl(spa), - ca->zhca_dsobj, ca->zhca_htag); + (void) dsl_dataset_user_release_tmp(spa_get_dsl(spa), ca->zhca_holds); + fnvlist_free(ca->zhca_holds); kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t)); spa_close(spa, FTAG); } -void -dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag, - minor_t minor) +static void +dsl_onexit_hold_cleanup(spa_t *spa, nvlist_t *holds, minor_t minor) { - zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_PUSHPAGE); - spa_t *spa = dsl_dataset_get_spa(ds); + zfs_hold_cleanup_arg_t *ca; + + if (minor == 0 || nvlist_empty(holds)) { + fnvlist_free(holds); + return; + } + + ASSERT(spa != NULL); + ca = kmem_alloc(sizeof (*ca), KM_SLEEP); + (void) strlcpy(ca->zhca_spaname, spa_name(spa), sizeof (ca->zhca_spaname)); ca->zhca_spa_load_guid = spa_load_guid(spa); - ca->zhca_dsobj = ds->ds_object; - (void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag)); + ca->zhca_holds = holds; VERIFY0(zfs_onexit_add_cb(minor, dsl_dataset_user_release_onexit, ca, NULL)); } +void +dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag, + minor_t minor, uint64_t now, dmu_tx_t *tx) +{ + nvlist_t *tmpholds; + + if (minor != 0) + tmpholds = fnvlist_alloc(); + else + tmpholds = NULL; + dsl_dataset_user_hold_sync_one_impl(tmpholds, ds, htag, minor, now, tx); + dsl_onexit_hold_cleanup(dsl_dataset_get_spa(ds), tmpholds, minor); +} + +static void +dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_user_hold_arg_t *dduha = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + nvlist_t *tmpholds; + nvpair_t *pair; + uint64_t now = gethrestime_sec(); + + if (dduha->dduha_minor != 0) + tmpholds = fnvlist_alloc(); + else + tmpholds = NULL; + for (pair = nvlist_next_nvpair(dduha->dduha_chkholds, NULL); + pair != NULL; + pair = nvlist_next_nvpair(dduha->dduha_chkholds, pair)) { + dsl_dataset_t *ds; + + VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds)); + dsl_dataset_user_hold_sync_one_impl(tmpholds, ds, + fnvpair_value_string(pair), dduha->dduha_minor, now, tx); + dsl_dataset_rele(ds, FTAG); + } + dsl_onexit_hold_cleanup(dp->dp_spa, tmpholds, dduha->dduha_minor); +} + +/* + * The full semantics of this function are described in the comment above + * lzc_hold(). + * + * To summarize: + * holds is nvl of snapname -> holdname + * errlist will be filled in with snapname -> error + * + * The snaphosts must all be in the same pool. + * + * Holds for snapshots that don't exist will be skipped. + * + * If none of the snapshots for requested holds exist then ENOENT will be + * returned. + * + * If cleanup_minor is not 0, the holds will be temporary, which will be cleaned + * up when the process exits. + * + * On success all the holds, for snapshots that existed, will be created and 0 + * will be returned. + * + * On failure no holds will be created, the errlist will be filled in, + * and an errno will returned. + * + * In all cases the errlist will contain entries for holds where the snapshot + * didn't exist. + */ +int +dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist) +{ + dsl_dataset_user_hold_arg_t dduha; + nvpair_t *pair; + int ret; + + pair = nvlist_next_nvpair(holds, NULL); + if (pair == NULL) + return (0); + + dduha.dduha_holds = holds; + dduha.dduha_chkholds = fnvlist_alloc(); + dduha.dduha_errlist = errlist; + dduha.dduha_minor = cleanup_minor; + + ret = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check, + dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)); + fnvlist_free(dduha.dduha_chkholds); + + return (ret); +} + +typedef int (dsl_holdfunc_t)(dsl_pool_t *dp, const char *name, void *tag, + dsl_dataset_t **dsp); + +typedef struct dsl_dataset_user_release_arg { + dsl_holdfunc_t *ddura_holdfunc; + nvlist_t *ddura_holds; + nvlist_t *ddura_todelete; + nvlist_t *ddura_errlist; + nvlist_t *ddura_chkholds; +} dsl_dataset_user_release_arg_t; + +/* Place a dataset hold on the snapshot identified by passed dsobj string */ +static int +dsl_dataset_hold_obj_string(dsl_pool_t *dp, const char *dsobj, void *tag, + dsl_dataset_t **dsp) +{ + return (dsl_dataset_hold_obj(dp, strtonum(dsobj, NULL), tag, dsp)); +} + +static int +dsl_dataset_user_release_check_one(dsl_dataset_user_release_arg_t *ddura, + dsl_dataset_t *ds, nvlist_t *holds, const char *snapname) +{ + uint64_t zapobj; + nvlist_t *holds_found; + nvpair_t *pair; + objset_t *mos; + int numholds; + + if (!dsl_dataset_is_snapshot(ds)) + return (SET_ERROR(EINVAL)); + + if (nvlist_empty(holds)) + return (0); + + numholds = 0; + mos = ds->ds_dir->dd_pool->dp_meta_objset; + zapobj = ds->ds_phys->ds_userrefs_obj; + holds_found = fnvlist_alloc(); + + for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(holds, pair)) { + uint64_t tmp; + int error; + const char *holdname = nvpair_name(pair); + + if (zapobj != 0) + error = zap_lookup(mos, zapobj, holdname, 8, 1, &tmp); + else + error = SET_ERROR(ENOENT); + + /* + * Non-existent holds are put on the errlist, but don't + * cause an overall failure. + */ + if (error == ENOENT) { + if (ddura->ddura_errlist != NULL) { + char *errtag = kmem_asprintf("%s#%s", + snapname, holdname); + fnvlist_add_int32(ddura->ddura_errlist, errtag, + ENOENT); + strfree(errtag); + } + continue; + } + + if (error != 0) { + fnvlist_free(holds_found); + return (error); + } + + fnvlist_add_boolean(holds_found, holdname); + numholds++; + } + + if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 && + ds->ds_userrefs == numholds) { + /* we need to destroy the snapshot as well */ + if (dsl_dataset_long_held(ds)) { + fnvlist_free(holds_found); + return (SET_ERROR(EBUSY)); + } + fnvlist_add_boolean(ddura->ddura_todelete, snapname); + } + + if (numholds != 0) { + fnvlist_add_nvlist(ddura->ddura_chkholds, snapname, + holds_found); + } + fnvlist_free(holds_found); + + return (0); +} + +static int +dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_user_release_arg_t *ddura; + dsl_holdfunc_t *holdfunc; + dsl_pool_t *dp; + nvpair_t *pair; + + if (!dmu_tx_is_syncing(tx)) + return (0); + + dp = dmu_tx_pool(tx); + + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); + + ddura = arg; + holdfunc = ddura->ddura_holdfunc; + + for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); + pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) { + int error; + dsl_dataset_t *ds; + nvlist_t *holds; + const char *snapname = nvpair_name(pair); + + error = nvpair_value_nvlist(pair, &holds); + if (error != 0) + error = (SET_ERROR(EINVAL)); + else + error = holdfunc(dp, snapname, FTAG, &ds); + if (error == 0) { + error = dsl_dataset_user_release_check_one(ddura, ds, + holds, snapname); + dsl_dataset_rele(ds, FTAG); + } + if (error != 0) { + if (ddura->ddura_errlist != NULL) { + fnvlist_add_int32(ddura->ddura_errlist, + snapname, error); + } + /* + * Non-existent snapshots are put on the errlist, + * but don't cause an overall failure. + */ + if (error != ENOENT) + return (error); + } + } + + /* Return ENOENT if none of the holds existed. */ + if (nvlist_empty(ddura->ddura_chkholds)) + return (SET_ERROR(ENOENT)); + + return (0); +} + +static void +dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds, + dmu_tx_t *tx) +{ + dsl_pool_t *dp = ds->ds_dir->dd_pool; + objset_t *mos = dp->dp_meta_objset; + nvpair_t *pair; + + for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(holds, pair)) { + int error; + const char *holdname = nvpair_name(pair); + + /* Remove temporary hold if one exists. */ + error = dsl_pool_user_release(dp, ds->ds_object, holdname, tx); + VERIFY(error == 0 || error == ENOENT); + + VERIFY0(zap_remove(mos, ds->ds_phys->ds_userrefs_obj, holdname, + tx)); + ds->ds_userrefs--; + + spa_history_log_internal_ds(ds, "release", tx, + "tag=%s refs=%lld", holdname, (longlong_t)ds->ds_userrefs); + } +} + +static void +dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_user_release_arg_t *ddura = arg; + dsl_holdfunc_t *holdfunc = ddura->ddura_holdfunc; + dsl_pool_t *dp = dmu_tx_pool(tx); + nvpair_t *pair; + + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); + + for (pair = nvlist_next_nvpair(ddura->ddura_chkholds, NULL); + pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_chkholds, + pair)) { + dsl_dataset_t *ds; + const char *name = nvpair_name(pair); + + VERIFY0(holdfunc(dp, name, FTAG, &ds)); + + dsl_dataset_user_release_sync_one(ds, + fnvpair_value_nvlist(pair), tx); + if (nvlist_exists(ddura->ddura_todelete, name)) { + ASSERT(ds->ds_userrefs == 0 && + ds->ds_phys->ds_num_children == 1 && + DS_IS_DEFER_DESTROY(ds)); + dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx); + } + dsl_dataset_rele(ds, FTAG); + } +} + +/* + * The full semantics of this function are described in the comment above + * lzc_release(). + * + * To summarize: + * Releases holds specified in the nvl holds. + * + * holds is nvl of snapname -> { holdname, ... } + * errlist will be filled in with snapname -> error + * + * If tmpdp is not NULL the names for holds should be the dsobj's of snapshots, + * otherwise they should be the names of shapshots. + * + * As a release may cause snapshots to be destroyed this trys to ensure they + * aren't mounted. + * + * The release of non-existent holds are skipped. + * + * At least one hold must have been released for the this function to succeed + * and return 0. + */ +static int +dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist, + dsl_pool_t *tmpdp) +{ + dsl_dataset_user_release_arg_t ddura; + nvpair_t *pair; + char *pool; + int error; + + pair = nvlist_next_nvpair(holds, NULL); + if (pair == NULL) + return (0); + + /* + * The release may cause snapshots to be destroyed; make sure they + * are not mounted. + */ + if (tmpdp != NULL) { + /* Temporary holds are specified by dsobj string. */ + ddura.ddura_holdfunc = dsl_dataset_hold_obj_string; + pool = spa_name(tmpdp->dp_spa); +#ifdef _KERNEL + dsl_pool_config_enter(tmpdp, FTAG); + for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(holds, pair)) { + dsl_dataset_t *ds; + + error = dsl_dataset_hold_obj_string(tmpdp, + nvpair_name(pair), FTAG, &ds); + if (error == 0) { + char name[MAXNAMELEN]; + dsl_dataset_name(ds, name); + dsl_dataset_rele(ds, FTAG); + (void) zfs_unmount_snap(name); + } + } + dsl_pool_config_exit(tmpdp, FTAG); +#endif + } else { + /* Non-temporary holds are specified by name. */ + ddura.ddura_holdfunc = dsl_dataset_hold; + pool = nvpair_name(pair); +#ifdef _KERNEL + for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(holds, pair)) { + (void) zfs_unmount_snap(nvpair_name(pair)); + } +#endif + } + + ddura.ddura_holds = holds; + ddura.ddura_errlist = errlist; + ddura.ddura_todelete = fnvlist_alloc(); + ddura.ddura_chkholds = fnvlist_alloc(); + + error = dsl_sync_task(pool, dsl_dataset_user_release_check, + dsl_dataset_user_release_sync, &ddura, + fnvlist_num_pairs(holds)); + fnvlist_free(ddura.ddura_todelete); + fnvlist_free(ddura.ddura_chkholds); + + return (error); +} + +/* + * holds is nvl of snapname -> { holdname, ... } + * errlist will be filled in with snapname -> error + */ +int +dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist) +{ + return (dsl_dataset_user_release_impl(holds, errlist, NULL)); +} + +/* + * holds is nvl of snapdsobj -> { holdname, ... } + */ +void +dsl_dataset_user_release_tmp(struct dsl_pool *dp, nvlist_t *holds) +{ + ASSERT(dp != NULL); + (void) dsl_dataset_user_release_impl(holds, NULL, dp); +} + int dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl) { diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index f476fc183d..d6736c29aa 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -28,6 +28,7 @@ * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 201i3 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ /* @@ -4867,20 +4868,6 @@ zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl) static int zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist) { - nvpair_t *pair; - int err; - - /* - * The release may cause the snapshot to be destroyed; make sure it - * is not mounted. - */ - for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; - pair = nvlist_next_nvpair(holds, pair)) { - err = zfs_unmount_snap(nvpair_name(pair)); - if (err != 0) - return (err); - } - return (dsl_dataset_user_release(holds, errlist)); }