Add 'zfs wait' command

Add a mechanism to wait for delete queue to drain.

When doing redacted send/recv, many workflows involve deleting files 
that contain sensitive data. Because of the way zfs handles file 
deletions, snapshots taken quickly after a rm operation can sometimes 
still contain the file in question, especially if the file is very 
large. This can result in issues for redacted send/recv users who 
expect the deleted files to be redacted in the send streams, and not 
appear in their clones.

This change duplicates much of the zpool wait related logic into a 
zfs wait command, which can be used to wait until the internal
deleteq has been drained.  Additional wait activities may be added 
in the future. 

Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: John Gallagher <john.gallagher@delphix.com>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #9707
This commit is contained in:
Paul Dagnelie 2020-04-01 10:02:06 -07:00 committed by GitHub
parent c9e3efdb3a
commit 5a42ef04fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 679 additions and 11 deletions

View File

@ -122,6 +122,7 @@ static int zfs_do_change_key(int argc, char **argv);
static int zfs_do_project(int argc, char **argv); static int zfs_do_project(int argc, char **argv);
static int zfs_do_version(int argc, char **argv); static int zfs_do_version(int argc, char **argv);
static int zfs_do_redact(int argc, char **argv); static int zfs_do_redact(int argc, char **argv);
static int zfs_do_wait(int argc, char **argv);
#ifdef __FreeBSD__ #ifdef __FreeBSD__
static int zfs_do_jail(int argc, char **argv); static int zfs_do_jail(int argc, char **argv);
@ -183,7 +184,8 @@ typedef enum {
HELP_VERSION, HELP_VERSION,
HELP_REDACT, HELP_REDACT,
HELP_JAIL, HELP_JAIL,
HELP_UNJAIL HELP_UNJAIL,
HELP_WAIT,
} zfs_help_t; } zfs_help_t;
typedef struct zfs_command { typedef struct zfs_command {
@ -248,6 +250,7 @@ static zfs_command_t command_table[] = {
{ "unload-key", zfs_do_unload_key, HELP_UNLOAD_KEY }, { "unload-key", zfs_do_unload_key, HELP_UNLOAD_KEY },
{ "change-key", zfs_do_change_key, HELP_CHANGE_KEY }, { "change-key", zfs_do_change_key, HELP_CHANGE_KEY },
{ "redact", zfs_do_redact, HELP_REDACT }, { "redact", zfs_do_redact, HELP_REDACT },
{ "wait", zfs_do_wait, HELP_WAIT },
#ifdef __FreeBSD__ #ifdef __FreeBSD__
{ "jail", zfs_do_jail, HELP_JAIL }, { "jail", zfs_do_jail, HELP_JAIL },
@ -410,6 +413,8 @@ get_usage(zfs_help_t idx)
return (gettext("\tjail <jailid|jailname> <filesystem>\n")); return (gettext("\tjail <jailid|jailname> <filesystem>\n"));
case HELP_UNJAIL: case HELP_UNJAIL:
return (gettext("\tunjail <jailid|jailname> <filesystem>\n")); return (gettext("\tunjail <jailid|jailname> <filesystem>\n"));
case HELP_WAIT:
return (gettext("\twait [-t <activity>] <filesystem>\n"));
} }
abort(); abort();
@ -8317,6 +8322,90 @@ zfs_do_project(int argc, char **argv)
return (ret); return (ret);
} }
static int
zfs_do_wait(int argc, char **argv)
{
boolean_t enabled[ZFS_WAIT_NUM_ACTIVITIES];
int error, i;
char c;
/* By default, wait for all types of activity. */
for (i = 0; i < ZFS_WAIT_NUM_ACTIVITIES; i++)
enabled[i] = B_TRUE;
while ((c = getopt(argc, argv, "t:")) != -1) {
switch (c) {
case 't':
{
static char *col_subopts[] = { "deleteq", NULL };
char *value;
/* Reset activities array */
bzero(&enabled, sizeof (enabled));
while (*optarg != '\0') {
int activity = getsubopt(&optarg, col_subopts,
&value);
if (activity < 0) {
(void) fprintf(stderr,
gettext("invalid activity '%s'\n"),
value);
usage(B_FALSE);
}
enabled[activity] = B_TRUE;
}
break;
}
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
usage(B_FALSE);
}
}
argv += optind;
argc -= optind;
if (argc < 1) {
(void) fprintf(stderr, gettext("missing 'filesystem' "
"argument\n"));
usage(B_FALSE);
}
if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}
zfs_handle_t *zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM);
if (zhp == NULL)
return (1);
for (;;) {
boolean_t missing = B_FALSE;
boolean_t any_waited = B_FALSE;
for (int i = 0; i < ZFS_WAIT_NUM_ACTIVITIES; i++) {
boolean_t waited;
if (!enabled[i])
continue;
error = zfs_wait_status(zhp, i, &missing, &waited);
if (error != 0 || missing)
break;
any_waited = (any_waited || waited);
}
if (error != 0 || missing || !any_waited)
break;
}
zfs_close(zhp);
return (error);
}
/* /*
* Display version message * Display version message
*/ */

View File

@ -264,6 +264,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/cli_root/zfs_unmount/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_unmount/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_unshare/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_unshare/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_wait/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool/Makefile tests/zfs-tests/tests/functional/cli_root/zpool/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool_add/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_add/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool_attach/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_attach/Makefile

View File

@ -507,6 +507,9 @@ extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *); extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *);
extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *); extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *);
extern int zfs_wait_status(zfs_handle_t *, zfs_wait_activity_t,
boolean_t *, boolean_t *);
/* /*
* zfs encryption management * zfs encryption management
*/ */

View File

@ -133,6 +133,7 @@ int lzc_pool_checkpoint_discard(const char *);
int lzc_wait(const char *, zpool_wait_activity_t, boolean_t *); int lzc_wait(const char *, zpool_wait_activity_t, boolean_t *);
int lzc_wait_tag(const char *, zpool_wait_activity_t, uint64_t, boolean_t *); int lzc_wait_tag(const char *, zpool_wait_activity_t, uint64_t, boolean_t *);
int lzc_wait_fs(const char *, zfs_wait_activity_t, boolean_t *);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -121,6 +121,11 @@ struct dsl_dir {
bplist_t dd_pending_frees; bplist_t dd_pending_frees;
bplist_t dd_pending_allocs; bplist_t dd_pending_allocs;
kmutex_t dd_activity_lock;
kcondvar_t dd_activity_cv;
boolean_t dd_activity_cancelled;
uint64_t dd_activity_waiters;
/* protected by dd_lock; keep at end of struct for better locality */ /* protected by dd_lock; keep at end of struct for better locality */
char dd_myname[ZFS_MAX_DATASET_NAME_LEN]; char dd_myname[ZFS_MAX_DATASET_NAME_LEN];
}; };
@ -192,6 +197,9 @@ boolean_t dsl_dir_is_zapified(dsl_dir_t *dd);
void dsl_dir_livelist_open(dsl_dir_t *dd, uint64_t obj); void dsl_dir_livelist_open(dsl_dir_t *dd, uint64_t obj);
void dsl_dir_livelist_close(dsl_dir_t *dd); void dsl_dir_livelist_close(dsl_dir_t *dd);
void dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total); void dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total);
int dsl_dir_wait(dsl_dir_t *dd, dsl_dataset_t *ds, zfs_wait_activity_t activity,
boolean_t *waited);
void dsl_dir_cancel_waiters(dsl_dir_t *dd);
/* internal reserved dir name */ /* internal reserved dir name */
#define MOS_DIR_NAME "$MOS" #define MOS_DIR_NAME "$MOS"

View File

@ -1282,6 +1282,7 @@ typedef enum zfs_ioc {
ZFS_IOC_REDACT, /* 0x5a51 */ ZFS_IOC_REDACT, /* 0x5a51 */
ZFS_IOC_GET_BOOKMARK_PROPS, /* 0x5a52 */ ZFS_IOC_GET_BOOKMARK_PROPS, /* 0x5a52 */
ZFS_IOC_WAIT, /* 0x5a53 */ ZFS_IOC_WAIT, /* 0x5a53 */
ZFS_IOC_WAIT_FS, /* 0x5a54 */
/* /*
* Per-platform (Optional) - 6/128 numbers reserved. * Per-platform (Optional) - 6/128 numbers reserved.
@ -1358,6 +1359,11 @@ typedef enum {
ZPOOL_WAIT_NUM_ACTIVITIES ZPOOL_WAIT_NUM_ACTIVITIES
} zpool_wait_activity_t; } zpool_wait_activity_t;
typedef enum {
ZFS_WAIT_DELETEQ,
ZFS_WAIT_NUM_ACTIVITIES
} zfs_wait_activity_t;
/* /*
* Bookmark name values. * Bookmark name values.
*/ */
@ -1415,6 +1421,12 @@ typedef enum {
#define ZPOOL_WAIT_TAG "wait_tag" #define ZPOOL_WAIT_TAG "wait_tag"
#define ZPOOL_WAIT_WAITED "wait_waited" #define ZPOOL_WAIT_WAITED "wait_waited"
/*
* The following are names used when invoking ZFS_IOC_WAIT_FS.
*/
#define ZFS_WAIT_ACTIVITY "wait_activity"
#define ZFS_WAIT_WAITED "wait_waited"
/* /*
* Flags for ZFS_IOC_VDEV_SET_STATE * Flags for ZFS_IOC_VDEV_SET_STATE
*/ */

View File

@ -5599,3 +5599,31 @@ zvol_volsize_to_reservation(zpool_handle_t *zph, uint64_t volsize,
volsize += numdb; volsize += numdb;
return (volsize); return (volsize);
} }
/*
* Wait for the given activity and return the status of the wait (whether or not
* any waiting was done) in the 'waited' parameter. Non-existent fses are
* reported via the 'missing' parameter, rather than by printing an error
* message. This is convenient when this function is called in a loop over a
* long period of time (as it is, for example, by zfs's wait cmd). In that
* scenario, a fs being exported or destroyed should be considered a normal
* event, so we don't want to print an error when we find that the fs doesn't
* exist.
*/
int
zfs_wait_status(zfs_handle_t *zhp, zfs_wait_activity_t activity,
boolean_t *missing, boolean_t *waited)
{
int error = lzc_wait_fs(zhp->zfs_name, activity, waited);
*missing = (error == ENOENT);
if (*missing)
return (0);
if (error != 0) {
(void) zfs_standard_error_fmt(zhp->zfs_hdl, error,
dgettext(TEXT_DOMAIN, "error waiting in fs '%s'"),
zhp->zfs_name);
}
return (error);
}

View File

@ -1621,3 +1621,23 @@ lzc_wait_tag(const char *pool, zpool_wait_activity_t activity, uint64_t tag,
{ {
return (wait_common(pool, activity, B_TRUE, tag, waited)); return (wait_common(pool, activity, B_TRUE, tag, waited));
} }
int
lzc_wait_fs(const char *fs, zfs_wait_activity_t activity, boolean_t *waited)
{
nvlist_t *args = fnvlist_alloc();
nvlist_t *result = NULL;
fnvlist_add_int32(args, ZFS_WAIT_ACTIVITY, activity);
int error = lzc_ioctl(ZFS_IOC_WAIT_FS, fs, args, &result);
if (error == 0 && waited != NULL)
*waited = fnvlist_lookup_boolean_value(result,
ZFS_WAIT_WAITED);
fnvlist_free(args);
fnvlist_free(result);
return (error);
}

View File

@ -41,6 +41,7 @@ dist_man_MANS = \
zfs-unmount.8 \ zfs-unmount.8 \
zfs-upgrade.8 \ zfs-upgrade.8 \
zfs-userspace.8 \ zfs-userspace.8 \
zfs-wait.8 \
zgenhostid.8 \ zgenhostid.8 \
zinject.8 \ zinject.8 \
zpool.8 \ zpool.8 \

71
man/man8/zfs-wait.8 Normal file
View File

@ -0,0 +1,71 @@
.\"
.\" CDDL HEADER START
.\"
.\" The contents of this file are subject to the terms of the
.\" Common Development and Distribution License (the "License").
.\" You may not use this file except in compliance with the License.
.\"
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
.\" or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions
.\" and limitations under the License.
.\"
.\" When distributing Covered Code, include this CDDL HEADER in each
.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
.\"
.\" CDDL HEADER END
.\"
.\"
.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
.\" Copyright (c) 2017 Datto Inc.
.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
.Dd August 9, 2019
.Dt ZFS-WAIT 8
.Os Linux
.Sh NAME
.Nm zfs Ns Pf - Cm wait
.Nd Wait for background activity to stop in a ZFS filesystem
.Sh SYNOPSIS
.Nm
.Cm wait
.Op Fl t Ar activity Ns Oo , Ns Ar activity Ns Oc Ns ...
.Ar fs
.Sh DESCRIPTION
.Bl -tag -width Ds
.It Xo
.Nm
.Cm wait
.Op Fl t Ar activity Ns Oo , Ns Ar activity Ns Oc Ns ...
.Ar fs
.Xc
Waits until all background activity of the given types has ceased in the given
filesystem.
The activity could cease because it has completed or because the filesystem has
been destroyed or unmounted.
If no activities are specified, the command waits until background activity of
every type listed below has ceased.
If there is no activity of the given types in progress, the command returns
immediately.
.Pp
These are the possible values for
.Ar activity ,
along with what each one waits for:
.Bd -literal
deleteq The filesystem's internal delete queue to empty
.Ed
.Pp
Note that the internal delete queue does not finish draining until
all large files have had time to be fully destroyed and all open file
handles to unlinked files are closed.
.El
.El
.Sh SEE ALSO
.Xr lsof 8

View File

@ -281,6 +281,11 @@ Attaches a filesystem to a jail.
.It Xr zfs-unjail 8 .It Xr zfs-unjail 8
Detaches a filesystem from a jail. Detaches a filesystem from a jail.
.El .El
.Ss Waiting
.Bl -tag -width ""
.It Xr zfs-wait 8
Wait for background activity in a filesystem to complete.
.El
.Sh EXIT STATUS .Sh EXIT STATUS
The The
.Nm .Nm

View File

@ -52,6 +52,8 @@
#include <sys/zfs_fuid.h> #include <sys/zfs_fuid.h>
#include <sys/sa.h> #include <sys/sa.h>
#include <sys/zfs_sa.h> #include <sys/zfs_sa.h>
#include <sys/dmu_objset.h>
#include <sys/dsl_dir.h>
/* /*
* zfs_match_find() is used by zfs_dirent_lock() to perform zap lookups * zfs_match_find() is used by zfs_dirent_lock() to perform zap lookups
@ -739,6 +741,8 @@ zfs_rmnode(znode_t *zp)
zfs_unlinked_add(xzp, tx); zfs_unlinked_add(xzp, tx);
} }
mutex_enter(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
/* /*
* Remove this znode from the unlinked set. If a has rollback has * Remove this znode from the unlinked set. If a has rollback has
* occurred while a file is open and unlinked. Then when the file * occurred while a file is open and unlinked. Then when the file
@ -749,6 +753,13 @@ zfs_rmnode(znode_t *zp)
zp->z_id, tx); zp->z_id, tx);
VERIFY(error == 0 || error == ENOENT); VERIFY(error == 0 || error == ENOENT);
uint64_t count;
if (zap_count(os, zfsvfs->z_unlinkedobj, &count) == 0 && count == 0) {
cv_broadcast(&os->os_dsl_dataset->ds_dir->dd_activity_cv);
}
mutex_exit(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1); dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1);
zfs_znode_delete(zp, tx); zfs_znode_delete(zp, tx);

View File

@ -55,6 +55,7 @@
#include <sys/zfs_quota.h> #include <sys/zfs_quota.h>
#include <sys/sunddi.h> #include <sys/sunddi.h>
#include <sys/dmu_objset.h> #include <sys/dmu_objset.h>
#include <sys/dsl_dir.h>
#include <sys/spa_boot.h> #include <sys/spa_boot.h>
#include <sys/objlist.h> #include <sys/objlist.h>
#include <sys/zpl.h> #include <sys/zpl.h>
@ -872,6 +873,8 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
"num_entries in unlinked set: %llu", "num_entries in unlinked set: %llu",
zs.zs_num_entries); zs.zs_num_entries);
zfs_unlinked_drain(zfsvfs); zfs_unlinked_drain(zfsvfs);
dsl_dir_t *dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
dd->dd_activity_cancelled = B_FALSE;
} }
/* /*
@ -1423,6 +1426,8 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
} }
dmu_objset_evict_dbufs(zfsvfs->z_os); dmu_objset_evict_dbufs(zfsvfs->z_os);
dsl_dir_t *dd = os->os_dsl_dataset->ds_dir;
dsl_dir_cancel_waiters(dd);
return (0); return (0);
} }
@ -1813,6 +1818,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
if (err != 0) if (err != 0)
goto bail; goto bail;
ds->ds_dir->dd_activity_cancelled = B_FALSE;
VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
zfs_set_fuid_feature(zfsvfs); zfs_set_fuid_feature(zfsvfs);

View File

@ -3077,20 +3077,26 @@ dsl_dataset_rename_snapshot(const char *fsname,
static int static int
dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx) dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx)
{ {
boolean_t held; boolean_t held = B_FALSE;
if (!dmu_tx_is_syncing(tx)) if (!dmu_tx_is_syncing(tx))
return (0); return (0);
if (owner != NULL) { dsl_dir_t *dd = ds->ds_dir;
VERIFY3P(ds->ds_owner, ==, owner); mutex_enter(&dd->dd_activity_lock);
dsl_dataset_long_rele(ds, owner); uint64_t holds = zfs_refcount_count(&ds->ds_longholds) -
} (owner != NULL ? 1 : 0);
/*
held = dsl_dataset_long_held(ds); * The value of dd_activity_waiters can chance as soon as we drop the
* lock, but we're fine with that; new waiters coming in or old
if (owner != NULL) * waiters leaving doesn't cause problems, since we're going to cancel
dsl_dataset_long_hold(ds, owner); * waiters later anyway. The goal of this check is to verify that no
* non-waiters have long-holds, and all new long-holds will be
* prevented because we're holding the pool config as writer.
*/
if (holds != dd->dd_activity_waiters)
held = B_TRUE;
mutex_exit(&dd->dd_activity_lock);
if (held) if (held)
return (SET_ERROR(EBUSY)); return (SET_ERROR(EBUSY));
@ -4036,6 +4042,8 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
DMU_MAX_ACCESS * spa_asize_inflation); DMU_MAX_ACCESS * spa_asize_inflation);
ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev); ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev);
dsl_dir_cancel_waiters(origin_head->ds_dir);
/* /*
* Swap per-dataset feature flags. * Swap per-dataset feature flags.
*/ */

View File

@ -766,6 +766,8 @@ dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
if (zfs_refcount_count(&ds->ds_longholds) != expected_holds) if (zfs_refcount_count(&ds->ds_longholds) != expected_holds)
return (SET_ERROR(EBUSY)); return (SET_ERROR(EBUSY));
ASSERT0(ds->ds_dir->dd_activity_waiters);
mos = ds->ds_dir->dd_pool->dp_meta_objset; mos = ds->ds_dir->dd_pool->dp_meta_objset;
/* /*
@ -1002,6 +1004,8 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
/* We need to log before removing it from the namespace. */ /* We need to log before removing it from the namespace. */
spa_history_log_internal_ds(ds, "destroy", tx, " "); spa_history_log_internal_ds(ds, "destroy", tx, " ");
dsl_dir_cancel_waiters(ds->ds_dir);
rmorigin = (dsl_dir_is_clone(ds->ds_dir) && rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
DS_IS_DEFER_DESTROY(ds->ds_prev) && DS_IS_DEFER_DESTROY(ds->ds_prev) &&
dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 && dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 &&

View File

@ -51,6 +51,9 @@
#include <sys/zthr.h> #include <sys/zthr.h>
#include "zfs_namecheck.h" #include "zfs_namecheck.h"
#include "zfs_prop.h" #include "zfs_prop.h"
#ifdef _KERNEL
#include <sys/zfs_vfsops.h>
#endif
/* /*
* Filesystem and Snapshot Limits * Filesystem and Snapshot Limits
@ -160,6 +163,8 @@ dsl_dir_evict_async(void *dbu)
dsl_dir_livelist_close(dd); dsl_dir_livelist_close(dd);
dsl_prop_fini(dd); dsl_prop_fini(dd);
cv_destroy(&dd->dd_activity_cv);
mutex_destroy(&dd->dd_activity_lock);
mutex_destroy(&dd->dd_lock); mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t)); kmem_free(dd, sizeof (dsl_dir_t));
} }
@ -207,6 +212,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
} }
mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&dd->dd_activity_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&dd->dd_activity_cv, NULL, CV_DEFAULT, NULL);
dsl_prop_init(dd); dsl_prop_init(dd);
dsl_dir_snap_cmtime_update(dd); dsl_dir_snap_cmtime_update(dd);
@ -280,6 +287,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
if (dsl_deadlist_is_open(&dd->dd_livelist)) if (dsl_deadlist_is_open(&dd->dd_livelist))
dsl_dir_livelist_close(dd); dsl_dir_livelist_close(dd);
dsl_prop_fini(dd); dsl_prop_fini(dd);
cv_destroy(&dd->dd_activity_cv);
mutex_destroy(&dd->dd_activity_lock);
mutex_destroy(&dd->dd_lock); mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t)); kmem_free(dd, sizeof (dsl_dir_t));
dd = winner; dd = winner;
@ -310,6 +319,8 @@ errout:
if (dsl_deadlist_is_open(&dd->dd_livelist)) if (dsl_deadlist_is_open(&dd->dd_livelist))
dsl_dir_livelist_close(dd); dsl_dir_livelist_close(dd);
dsl_prop_fini(dd); dsl_prop_fini(dd);
cv_destroy(&dd->dd_activity_cv);
mutex_destroy(&dd->dd_activity_lock);
mutex_destroy(&dd->dd_lock); mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t)); kmem_free(dd, sizeof (dsl_dir_t));
dmu_buf_rele(dbuf, tag); dmu_buf_rele(dbuf, tag);
@ -2282,6 +2293,108 @@ dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total)
} }
} }
static int
dsl_dir_activity_in_progress(dsl_dir_t *dd, dsl_dataset_t *ds,
zfs_wait_activity_t activity, boolean_t *in_progress)
{
int error = 0;
ASSERT(MUTEX_HELD(&dd->dd_activity_lock));
switch (activity) {
case ZFS_WAIT_DELETEQ: {
#ifdef _KERNEL
objset_t *os;
error = dmu_objset_from_ds(ds, &os);
if (error != 0)
break;
mutex_enter(&os->os_user_ptr_lock);
void *user = dmu_objset_get_user(os);
mutex_exit(&os->os_user_ptr_lock);
if (dmu_objset_type(os) != DMU_OST_ZFS ||
user == NULL || zfs_get_vfs_flag_unmounted(os)) {
*in_progress = B_FALSE;
return (0);
}
uint64_t readonly = B_FALSE;
error = zfs_get_temporary_prop(ds, ZFS_PROP_READONLY, &readonly,
NULL);
if (error != 0)
break;
if (readonly || !spa_writeable(dd->dd_pool->dp_spa)) {
*in_progress = B_FALSE;
return (0);
}
uint64_t count, unlinked_obj;
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
&unlinked_obj);
if (error != 0) {
dsl_dataset_rele(ds, FTAG);
break;
}
error = zap_count(os, unlinked_obj, &count);
if (error == 0)
*in_progress = (count != 0);
break;
#else
/*
* The delete queue is ZPL specific, and libzpool doesn't have
* it. It doesn't make sense to wait for it.
*/
*in_progress = B_FALSE;
break;
#endif
}
default:
panic("unrecognized value for activity %d", activity);
}
return (error);
}
int
dsl_dir_wait(dsl_dir_t *dd, dsl_dataset_t *ds, zfs_wait_activity_t activity,
boolean_t *waited)
{
int error = 0;
boolean_t in_progress;
dsl_pool_t *dp = dd->dd_pool;
for (;;) {
dsl_pool_config_enter(dp, FTAG);
error = dsl_dir_activity_in_progress(dd, ds, activity,
&in_progress);
dsl_pool_config_exit(dp, FTAG);
if (error != 0 || !in_progress)
break;
*waited = B_TRUE;
if (cv_wait_sig(&dd->dd_activity_cv, &dd->dd_activity_lock) ==
0 || dd->dd_activity_cancelled) {
error = SET_ERROR(EINTR);
break;
}
}
return (error);
}
void
dsl_dir_cancel_waiters(dsl_dir_t *dd)
{
mutex_enter(&dd->dd_activity_lock);
dd->dd_activity_cancelled = B_TRUE;
cv_broadcast(&dd->dd_activity_cv);
while (dd->dd_activity_waiters > 0)
cv_wait(&dd->dd_activity_cv, &dd->dd_activity_lock);
mutex_exit(&dd->dd_activity_lock);
}
#if defined(_KERNEL) #if defined(_KERNEL)
EXPORT_SYMBOL(dsl_dir_set_quota); EXPORT_SYMBOL(dsl_dir_set_quota);
EXPORT_SYMBOL(dsl_dir_set_reservation); EXPORT_SYMBOL(dsl_dir_set_reservation);

View File

@ -4072,6 +4072,83 @@ zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
return (error); return (error);
} }
/*
* This ioctl waits for activity of a particular type to complete. If there is
* no activity of that type in progress, it returns immediately, and the
* returned value "waited" is false. If there is activity in progress, and no
* tag is passed in, the ioctl blocks until all activity of that type is
* complete, and then returns with "waited" set to true.
*
* If a thread waiting in the ioctl receives a signal, the call will return
* immediately, and the return value will be EINTR.
*
* innvl: {
* "wait_activity" -> int32_t
* }
*
* outnvl: "waited" -> boolean_t
*/
static const zfs_ioc_key_t zfs_keys_fs_wait[] = {
{ZFS_WAIT_ACTIVITY, DATA_TYPE_INT32, 0},
};
static int
zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
{
int32_t activity;
boolean_t waited = B_FALSE;
int error;
dsl_pool_t *dp;
dsl_dir_t *dd;
dsl_dataset_t *ds;
if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0)
return (SET_ERROR(EINVAL));
if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0)
return (SET_ERROR(EINVAL));
if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
return (error);
if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
dsl_pool_rele(dp, FTAG);
return (error);
}
dd = ds->ds_dir;
mutex_enter(&dd->dd_activity_lock);
dd->dd_activity_waiters++;
/*
* We get a long-hold here so that the dsl_dataset_t and dsl_dir_t
* aren't evicted while we're waiting. Normally this is prevented by
* holding the pool, but we can't do that while we're waiting since
* that would prevent TXGs from syncing out. Some of the functionality
* of long-holds (e.g. preventing deletion) is unnecessary for this
* case, since we would cancel the waiters before proceeding with a
* deletion. An alternative mechanism for keeping the dataset around
* could be developed but this is simpler.
*/
dsl_dataset_long_hold(ds, FTAG);
dsl_pool_rele(dp, FTAG);
error = dsl_dir_wait(dd, ds, activity, &waited);
dsl_dataset_long_rele(ds, FTAG);
dd->dd_activity_waiters--;
if (dd->dd_activity_waiters == 0)
cv_signal(&dd->dd_activity_cv);
mutex_exit(&dd->dd_activity_lock);
dsl_dataset_rele(ds, FTAG);
if (error == 0)
fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited);
return (error);
}
/* /*
* fsname is name of dataset to rollback (to most recent snapshot) * fsname is name of dataset to rollback (to most recent snapshot)
* *
@ -6915,6 +6992,11 @@ zfs_ioctl_init(void)
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait)); zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait));
zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS,
zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME,
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait));
/* IOCTLS that use the legacy function signature */ /* IOCTLS that use the legacy function signature */
zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,

View File

@ -288,6 +288,10 @@ tests = ['zfs_upgrade_001_pos', 'zfs_upgrade_002_pos', 'zfs_upgrade_003_pos',
'zfs_upgrade_007_neg'] 'zfs_upgrade_007_neg']
tags = ['functional', 'cli_root', 'zfs_upgrade'] tags = ['functional', 'cli_root', 'zfs_upgrade']
[tests/functional/cli_root/zfs_wait]
tests = ['zfs_wait_deleteq']
tags = ['functional', 'cli_root', 'zfs_wait']
[tests/functional/cli_root/zpool] [tests/functional/cli_root/zpool]
tests = ['zpool_001_neg', 'zpool_002_pos', 'zpool_003_pos', 'zpool_colors'] tests = ['zpool_001_neg', 'zpool_002_pos', 'zpool_003_pos', 'zpool_colors']
tags = ['functional', 'cli_root', 'zpool'] tags = ['functional', 'cli_root', 'zpool']

View File

@ -739,6 +739,18 @@ test_wait(const char *pool)
nvlist_free(optional); nvlist_free(optional);
} }
static void
test_wait_fs(const char *dataset)
{
nvlist_t *required = fnvlist_alloc();
fnvlist_add_int32(required, "wait_activity", 2);
IOC_INPUT_TEST(ZFS_IOC_WAIT_FS, dataset, required, NULL, EINVAL);
nvlist_free(required);
}
static void static void
zfs_ioc_input_tests(const char *pool) zfs_ioc_input_tests(const char *pool)
{ {
@ -826,6 +838,7 @@ zfs_ioc_input_tests(const char *pool)
test_vdev_trim(pool); test_vdev_trim(pool);
test_wait(pool); test_wait(pool);
test_wait_fs(dataset);
/* /*
* cleanup * cleanup
@ -980,6 +993,7 @@ validate_ioc_values(void)
CHECK(ZFS_IOC_BASE + 81 == ZFS_IOC_REDACT); CHECK(ZFS_IOC_BASE + 81 == ZFS_IOC_REDACT);
CHECK(ZFS_IOC_BASE + 82 == ZFS_IOC_GET_BOOKMARK_PROPS); CHECK(ZFS_IOC_BASE + 82 == ZFS_IOC_GET_BOOKMARK_PROPS);
CHECK(ZFS_IOC_BASE + 83 == ZFS_IOC_WAIT); CHECK(ZFS_IOC_BASE + 83 == ZFS_IOC_WAIT);
CHECK(ZFS_IOC_BASE + 84 == ZFS_IOC_WAIT_FS);
CHECK(ZFS_IOC_PLATFORM_BASE + 1 == ZFS_IOC_EVENTS_NEXT); CHECK(ZFS_IOC_PLATFORM_BASE + 1 == ZFS_IOC_EVENTS_NEXT);
CHECK(ZFS_IOC_PLATFORM_BASE + 2 == ZFS_IOC_EVENTS_CLEAR); CHECK(ZFS_IOC_PLATFORM_BASE + 2 == ZFS_IOC_EVENTS_CLEAR);
CHECK(ZFS_IOC_PLATFORM_BASE + 3 == ZFS_IOC_EVENTS_SEEK); CHECK(ZFS_IOC_PLATFORM_BASE + 3 == ZFS_IOC_EVENTS_SEEK);

View File

@ -32,6 +32,7 @@ SUBDIRS = \
zfs_unmount \ zfs_unmount \
zfs_unshare \ zfs_unshare \
zfs_upgrade \ zfs_upgrade \
zfs_wait \
zpool \ zpool \
zpool_add \ zpool_add \
zpool_attach \ zpool_attach \

View File

@ -0,0 +1,8 @@
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zfs_wait
dist_pkgdata_SCRIPTS = \
setup.ksh \
cleanup.ksh \
zfs_wait_deleteq.ksh
dist_pkgdata_DATA = \
zfs_wait.kshlib

View File

@ -0,0 +1,20 @@
#!/bin/ksh -p
#
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
default_cleanup

View File

@ -0,0 +1,21 @@
#!/bin/ksh -p
#
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
DISK=${DISKS%% *}
default_setup $DISK

View File

@ -0,0 +1,80 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018, 2019 by Delphix. All rights reserved.
#
typeset -a disk_array=($(find_disks $DISKS))
typeset -r DISK1=${disk_array[0]}
typeset -r DISK2=${disk_array[1]}
typeset -r DISK3=${disk_array[2]}
#
# When the condition it is waiting for becomes true, 'zfs wait' should return
# promptly. We want to enforce this, but any check will be racey because it will
# take some small but indeterminate amount of time for the waiting thread to be
# woken up and for the process to exit.
#
# To deal with this, we provide a grace period after the condition becomes true
# during which 'zfs wait' can exit. If it hasn't exited by the time the grace
# period expires we assume something is wrong and fail the test. While there is
# no value that can really be correct, the idea is we choose something large
# enough that it shouldn't cause issues in practice.
#
typeset -r WAIT_EXIT_GRACE=2.0
function proc_exists # pid
{
ps -p $1 >/dev/null
}
function proc_must_exist # pid
{
proc_exists $1 || log_fail "zpool process exited too soon"
}
function proc_must_not_exist # pid
{
proc_exists $1 && log_fail "zpool process took too long to exit"
}
function get_time
{
date +'%H:%M:%S'
}
function kill_if_running
{
typeset pid=$1
[[ $pid ]] && proc_exists $pid && log_must kill -s TERM $pid
}
# Log a command and then start it running in the background
function log_bkgrnd
{
log_note "$(get_time) Starting cmd in background '$@'"
"$@" &
}
# Check that a background process has completed and exited with a status of 0
function bkgrnd_proc_succeeded
{
typeset pid=$1
log_must sleep $WAIT_EXIT_GRACE
proc_must_not_exist $pid
wait $pid || log_fail "process exited with status $?"
log_note "$(get_time) wait completed successfully"
}

View File

@ -0,0 +1,57 @@
#!/bin/ksh -p
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zfs_wait/zfs_wait.kshlib
#
# DESCRIPTION:
# 'zfs wait' works when waiting for checkpoint discard to complete.
#
# STRATEGY:
# 1. Create a file
# 2. Open a file descriptor pointing to that file.
# 3. Delete the file.
# 4. Start a background process waiting for the delete queue to empty.
# 5. Verify that the command doesn't return immediately.
# 6. Close the open file descriptor.
# 7. Verify that the command returns soon after the descriptor is closed.
#
function cleanup
{
kill_if_running $pid
exec 3<&-
}
typeset -r TESTFILE="/$TESTPOOL/testfile"
typeset pid
log_onexit cleanup
log_must touch $TESTFILE
exec 3<> $TESTFILE
log_must rm $TESTFILE
log_bkgrnd zfs wait -t deleteq $TESTPOOL
pid=$!
proc_must_exist $pid
exec 3<&-
log_must sleep 0.5
bkgrnd_proc_succeeded $pid
log_pass "'zfs wait -t discard' works."