OpenZFS 8115 - parallel zfs mount

Porting Notes:
* Use thread pools (tpool) API instead of introducing taskq interfaces
  to libzfs.
* Use pthread_mutext for locks as mutex_t isn't available.
* Ignore alternative libshare initialization since OpenZFS-7955 is
  not present on zfsonlinux.

Authored by: Sebastien Roy <seb@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Prashanth Sreenivasa <pks@delphix.com>
Authored by: Brian Behlendorf <behlendorf1@llnl.gov>
Approved by: Matt Ahrens <mahrens@delphix.com>
Ported-by: Don Brady <don.brady@delphix.com>

OpenZFS-issue: https://www.illumos.org/issues/8115
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/a3f0e2b569
Closes #8092
This commit is contained in:
Sebastien Roy 2018-11-05 08:40:05 -07:00 committed by Brian Behlendorf
parent af2e8411da
commit a10d50f999
10 changed files with 718 additions and 141 deletions

View File

@ -6059,7 +6059,12 @@ zfs_do_holds(int argc, char **argv)
#define CHECK_SPINNER 30 #define CHECK_SPINNER 30
#define SPINNER_TIME 3 /* seconds */ #define SPINNER_TIME 3 /* seconds */
#define MOUNT_TIME 5 /* seconds */ #define MOUNT_TIME 1 /* seconds */
typedef struct get_all_state {
boolean_t ga_verbose;
get_all_cb_t *ga_cbp;
} get_all_state_t;
static int static int
get_one_dataset(zfs_handle_t *zhp, void *data) get_one_dataset(zfs_handle_t *zhp, void *data)
@ -6068,10 +6073,10 @@ get_one_dataset(zfs_handle_t *zhp, void *data)
static int spinval = 0; static int spinval = 0;
static int spincheck = 0; static int spincheck = 0;
static time_t last_spin_time = (time_t)0; static time_t last_spin_time = (time_t)0;
get_all_cb_t *cbp = data; get_all_state_t *state = data;
zfs_type_t type = zfs_get_type(zhp); zfs_type_t type = zfs_get_type(zhp);
if (cbp->cb_verbose) { if (state->ga_verbose) {
if (--spincheck < 0) { if (--spincheck < 0) {
time_t now = time(NULL); time_t now = time(NULL);
if (last_spin_time + SPINNER_TIME < now) { if (last_spin_time + SPINNER_TIME < now) {
@ -6097,25 +6102,23 @@ get_one_dataset(zfs_handle_t *zhp, void *data)
zfs_close(zhp); zfs_close(zhp);
return (0); return (0);
} }
libzfs_add_handle(cbp, zhp); libzfs_add_handle(state->ga_cbp, zhp);
assert(cbp->cb_used <= cbp->cb_alloc); assert(state->ga_cbp->cb_used <= state->ga_cbp->cb_alloc);
return (0); return (0);
} }
static void static void
get_all_datasets(zfs_handle_t ***dslist, size_t *count, boolean_t verbose) get_all_datasets(get_all_cb_t *cbp, boolean_t verbose)
{ {
get_all_cb_t cb = { 0 }; get_all_state_t state = {
cb.cb_verbose = verbose; .ga_verbose = verbose,
cb.cb_getone = get_one_dataset; .ga_cbp = cbp
};
if (verbose) if (verbose)
set_progress_header(gettext("Reading ZFS config")); set_progress_header(gettext("Reading ZFS config"));
(void) zfs_iter_root(g_zfs, get_one_dataset, &cb); (void) zfs_iter_root(g_zfs, get_one_dataset, &state);
*dslist = cb.cb_handles;
*count = cb.cb_used;
if (verbose) if (verbose)
finish_progress(gettext("done.")); finish_progress(gettext("done."));
@ -6126,8 +6129,19 @@ get_all_datasets(zfs_handle_t ***dslist, size_t *count, boolean_t verbose)
* similar, we have a common function with an extra parameter to determine which * similar, we have a common function with an extra parameter to determine which
* mode we are using. * mode we are using.
*/ */
#define OP_SHARE 0x1 typedef enum { OP_SHARE, OP_MOUNT } share_mount_op_t;
#define OP_MOUNT 0x2
typedef struct share_mount_state {
share_mount_op_t sm_op;
boolean_t sm_verbose;
int sm_flags;
char *sm_options;
char *sm_proto; /* only valid for OP_SHARE */
pthread_mutex_t sm_lock; /* protects the remaining fields */
uint_t sm_total; /* number of filesystems to process */
uint_t sm_done; /* number of filesystems processed */
int sm_status; /* -1 if any of the share/mount operations failed */
} share_mount_state_t;
/* /*
* Share or mount a dataset. * Share or mount a dataset.
@ -6385,6 +6399,29 @@ report_mount_progress(int current, int total)
update_progress(info); update_progress(info);
} }
/*
* zfs_foreach_mountpoint() callback that mounts or shares one filesystem and
* updates the progress meter.
*/
static int
share_mount_one_cb(zfs_handle_t *zhp, void *arg)
{
share_mount_state_t *sms = arg;
int ret;
ret = share_mount_one(zhp, sms->sm_op, sms->sm_flags, sms->sm_proto,
B_FALSE, sms->sm_options);
pthread_mutex_lock(&sms->sm_lock);
if (ret != 0)
sms->sm_status = ret;
sms->sm_done++;
if (sms->sm_verbose)
report_mount_progress(sms->sm_done, sms->sm_total);
pthread_mutex_unlock(&sms->sm_lock);
return (ret);
}
static void static void
append_options(char *mntopts, char *newopts) append_options(char *mntopts, char *newopts)
{ {
@ -6459,8 +6496,6 @@ share_mount(int op, int argc, char **argv)
/* check number of arguments */ /* check number of arguments */
if (do_all) { if (do_all) {
zfs_handle_t **dslist = NULL;
size_t i, count = 0;
char *protocol = NULL; char *protocol = NULL;
if (op == OP_SHARE && argc > 0) { if (op == OP_SHARE && argc > 0) {
@ -6481,27 +6516,35 @@ share_mount(int op, int argc, char **argv)
} }
start_progress_timer(); start_progress_timer();
get_all_datasets(&dslist, &count, verbose); get_all_cb_t cb = { 0 };
get_all_datasets(&cb, verbose);
if (count == 0) { if (cb.cb_used == 0) {
if (options != NULL) if (options != NULL)
free(options); free(options);
return (0); return (0);
} }
qsort(dslist, count, sizeof (void *), libzfs_dataset_cmp); share_mount_state_t share_mount_state = { 0 };
share_mount_state.sm_op = op;
share_mount_state.sm_verbose = verbose;
share_mount_state.sm_flags = flags;
share_mount_state.sm_options = options;
share_mount_state.sm_proto = protocol;
share_mount_state.sm_total = cb.cb_used;
pthread_mutex_init(&share_mount_state.sm_lock, NULL);
for (i = 0; i < count; i++) { /*
if (verbose) * libshare isn't mt-safe, so only do the operation in parallel
report_mount_progress(i, count); * if we're mounting.
*/
zfs_foreach_mountpoint(g_zfs, cb.cb_handles, cb.cb_used,
share_mount_one_cb, &share_mount_state, op == OP_MOUNT);
ret = share_mount_state.sm_status;
if (share_mount_one(dslist[i], op, flags, protocol, for (int i = 0; i < cb.cb_used; i++)
B_FALSE, options) != 0) zfs_close(cb.cb_handles[i]);
ret = 1; free(cb.cb_handles);
zfs_close(dslist[i]);
}
free(dslist);
} else if (argc == 0) { } else if (argc == 0) {
struct mnttab entry; struct mnttab entry;

View File

@ -573,12 +573,11 @@ typedef struct get_all_cb {
zfs_handle_t **cb_handles; zfs_handle_t **cb_handles;
size_t cb_alloc; size_t cb_alloc;
size_t cb_used; size_t cb_used;
boolean_t cb_verbose;
int (*cb_getone)(zfs_handle_t *, void *);
} get_all_cb_t; } get_all_cb_t;
void zfs_foreach_mountpoint(libzfs_handle_t *, zfs_handle_t **, size_t,
zfs_iter_f, void *, boolean_t);
void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *); void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *);
int libzfs_dataset_cmp(const void *, const void *);
/* /*
* Functions to create and destroy datasets. * Functions to create and destroy datasets.

View File

@ -21,7 +21,7 @@
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2015 by Delphix. All rights reserved. * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2018 Datto Inc. * Copyright (c) 2018 Datto Inc.
*/ */
@ -60,6 +60,13 @@ struct libzfs_handle {
void *libzfs_sharehdl; /* libshare handle */ void *libzfs_sharehdl; /* libshare handle */
uint_t libzfs_shareflags; uint_t libzfs_shareflags;
boolean_t libzfs_mnttab_enable; boolean_t libzfs_mnttab_enable;
/*
* We need a lock to handle the case where parallel mount
* threads are populating the mnttab cache simultaneously. The
* lock only protects the integrity of the avl tree, and does
* not protect the contents of the mnttab entries themselves.
*/
pthread_mutex_t libzfs_mnttab_cache_lock;
avl_tree_t libzfs_mnttab_cache; avl_tree_t libzfs_mnttab_cache;
int libzfs_pool_iter; int libzfs_pool_iter;
char libzfs_chassis_id[256]; char libzfs_chassis_id[256];

View File

@ -791,6 +791,7 @@ libzfs_mnttab_cache_compare(const void *arg1, const void *arg2)
void void
libzfs_mnttab_init(libzfs_handle_t *hdl) libzfs_mnttab_init(libzfs_handle_t *hdl)
{ {
pthread_mutex_init(&hdl->libzfs_mnttab_cache_lock, NULL);
assert(avl_numnodes(&hdl->libzfs_mnttab_cache) == 0); assert(avl_numnodes(&hdl->libzfs_mnttab_cache) == 0);
avl_create(&hdl->libzfs_mnttab_cache, libzfs_mnttab_cache_compare, avl_create(&hdl->libzfs_mnttab_cache, libzfs_mnttab_cache_compare,
sizeof (mnttab_node_t), offsetof(mnttab_node_t, mtn_node)); sizeof (mnttab_node_t), offsetof(mnttab_node_t, mtn_node));
@ -849,6 +850,7 @@ libzfs_mnttab_fini(libzfs_handle_t *hdl)
free(mtn); free(mtn);
} }
avl_destroy(&hdl->libzfs_mnttab_cache); avl_destroy(&hdl->libzfs_mnttab_cache);
(void) pthread_mutex_destroy(&hdl->libzfs_mnttab_cache_lock);
} }
void void
@ -863,7 +865,7 @@ libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname,
{ {
mnttab_node_t find; mnttab_node_t find;
mnttab_node_t *mtn; mnttab_node_t *mtn;
int error; int ret = ENOENT;
if (!hdl->libzfs_mnttab_enable) { if (!hdl->libzfs_mnttab_enable) {
struct mnttab srch = { 0 }; struct mnttab srch = { 0 };
@ -883,17 +885,24 @@ libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname,
return (ENOENT); return (ENOENT);
} }
if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0) pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock);
if ((error = libzfs_mnttab_update(hdl)) != 0) if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0) {
int error;
if ((error = libzfs_mnttab_update(hdl)) != 0) {
pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
return (error); return (error);
}
}
find.mtn_mt.mnt_special = (char *)fsname; find.mtn_mt.mnt_special = (char *)fsname;
mtn = avl_find(&hdl->libzfs_mnttab_cache, &find, NULL); mtn = avl_find(&hdl->libzfs_mnttab_cache, &find, NULL);
if (mtn) { if (mtn) {
*entry = mtn->mtn_mt; *entry = mtn->mtn_mt;
return (0); ret = 0;
} }
return (ENOENT); pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
return (ret);
} }
void void
@ -902,14 +911,23 @@ libzfs_mnttab_add(libzfs_handle_t *hdl, const char *special,
{ {
mnttab_node_t *mtn; mnttab_node_t *mtn;
if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0) pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock);
return; if (avl_numnodes(&hdl->libzfs_mnttab_cache) != 0) {
mtn = zfs_alloc(hdl, sizeof (mnttab_node_t)); mtn = zfs_alloc(hdl, sizeof (mnttab_node_t));
mtn->mtn_mt.mnt_special = zfs_strdup(hdl, special); mtn->mtn_mt.mnt_special = zfs_strdup(hdl, special);
mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, mountp); mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, mountp);
mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, MNTTYPE_ZFS); mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, MNTTYPE_ZFS);
mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, mntopts); mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, mntopts);
/*
* Another thread may have already added this entry
* via libzfs_mnttab_update. If so we should skip it.
*/
if (avl_find(&hdl->libzfs_mnttab_cache, mtn, NULL) != NULL)
free(mtn);
else
avl_add(&hdl->libzfs_mnttab_cache, mtn); avl_add(&hdl->libzfs_mnttab_cache, mtn);
}
pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
} }
void void
@ -918,6 +936,7 @@ libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname)
mnttab_node_t find; mnttab_node_t find;
mnttab_node_t *ret; mnttab_node_t *ret;
pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock);
find.mtn_mt.mnt_special = (char *)fsname; find.mtn_mt.mnt_special = (char *)fsname;
if ((ret = avl_find(&hdl->libzfs_mnttab_cache, (void *)&find, NULL)) if ((ret = avl_find(&hdl->libzfs_mnttab_cache, (void *)&find, NULL))
!= NULL) { != NULL) {
@ -928,6 +947,7 @@ libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname)
free(ret->mtn_mt.mnt_mntopts); free(ret->mtn_mt.mnt_mntopts);
free(ret); free(ret);
} }
pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
} }
int int

View File

@ -22,7 +22,7 @@
/* /*
* Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2015 by Delphix. All rights reserved. * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
* Copyright 2017 RackTop Systems. * Copyright 2017 RackTop Systems.
* Copyright (c) 2018 Datto Inc. * Copyright (c) 2018 Datto Inc.
@ -84,11 +84,15 @@
#include <libzfs.h> #include <libzfs.h>
#include "libzfs_impl.h" #include "libzfs_impl.h"
#include <thread_pool.h>
#include <libshare.h> #include <libshare.h>
#include <sys/systeminfo.h> #include <sys/systeminfo.h>
#define MAXISALEN 257 /* based on sysinfo(2) man page */ #define MAXISALEN 257 /* based on sysinfo(2) man page */
static int mount_tp_nthr = 512; /* tpool threads for multi-threaded mounting */
static void zfs_mount_task(void *);
static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *); static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *);
zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **, zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **,
zfs_share_proto_t); zfs_share_proto_t);
@ -1146,25 +1150,32 @@ remove_mountpoint(zfs_handle_t *zhp)
} }
} }
/*
* Add the given zfs handle to the cb_handles array, dynamically reallocating
* the array if it is out of space.
*/
void void
libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp) libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp)
{ {
if (cbp->cb_alloc == cbp->cb_used) { if (cbp->cb_alloc == cbp->cb_used) {
size_t newsz; size_t newsz;
void *ptr; zfs_handle_t **newhandles;
newsz = cbp->cb_alloc ? cbp->cb_alloc * 2 : 64; newsz = cbp->cb_alloc != 0 ? cbp->cb_alloc * 2 : 64;
ptr = zfs_realloc(zhp->zfs_hdl, newhandles = zfs_realloc(zhp->zfs_hdl,
cbp->cb_handles, cbp->cb_alloc * sizeof (void *), cbp->cb_handles, cbp->cb_alloc * sizeof (zfs_handle_t *),
newsz * sizeof (void *)); newsz * sizeof (zfs_handle_t *));
cbp->cb_handles = ptr; cbp->cb_handles = newhandles;
cbp->cb_alloc = newsz; cbp->cb_alloc = newsz;
} }
cbp->cb_handles[cbp->cb_used++] = zhp; cbp->cb_handles[cbp->cb_used++] = zhp;
} }
/*
* Recursive helper function used during file system enumeration
*/
static int static int
mount_cb(zfs_handle_t *zhp, void *data) zfs_iter_cb(zfs_handle_t *zhp, void *data)
{ {
get_all_cb_t *cbp = data; get_all_cb_t *cbp = data;
@ -1196,112 +1207,351 @@ mount_cb(zfs_handle_t *zhp, void *data)
} }
libzfs_add_handle(cbp, zhp); libzfs_add_handle(cbp, zhp);
if (zfs_iter_filesystems(zhp, mount_cb, cbp) != 0) { if (zfs_iter_filesystems(zhp, zfs_iter_cb, cbp) != 0) {
zfs_close(zhp); zfs_close(zhp);
return (-1); return (-1);
} }
return (0); return (0);
} }
/*
* Sort comparator that compares two mountpoint paths. We sort these paths so
* that subdirectories immediately follow their parents. This means that we
* effectively treat the '/' character as the lowest value non-nul char. An
* example sorted list using this comparator would look like:
*
* /foo
* /foo/bar
* /foo/bar/baz
* /foo/baz
* /foo.bar
*
* The mounting code depends on this ordering to deterministically iterate
* over filesystems in order to spawn parallel mount tasks.
*/
int int
libzfs_dataset_cmp(const void *a, const void *b) mountpoint_cmp(const void *arga, const void *argb)
{ {
zfs_handle_t **za = (zfs_handle_t **)a; zfs_handle_t *const *zap = arga;
zfs_handle_t **zb = (zfs_handle_t **)b; zfs_handle_t *za = *zap;
zfs_handle_t *const *zbp = argb;
zfs_handle_t *zb = *zbp;
char mounta[MAXPATHLEN]; char mounta[MAXPATHLEN];
char mountb[MAXPATHLEN]; char mountb[MAXPATHLEN];
const char *a = mounta;
const char *b = mountb;
boolean_t gota, gotb; boolean_t gota, gotb;
if ((gota = (zfs_get_type(*za) == ZFS_TYPE_FILESYSTEM)) != 0) gota = (zfs_get_type(za) == ZFS_TYPE_FILESYSTEM);
verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta, if (gota) {
verify(zfs_prop_get(za, ZFS_PROP_MOUNTPOINT, mounta,
sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0); sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
if ((gotb = (zfs_get_type(*zb) == ZFS_TYPE_FILESYSTEM)) != 0) }
verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb, gotb = (zfs_get_type(zb) == ZFS_TYPE_FILESYSTEM);
if (gotb) {
verify(zfs_prop_get(zb, ZFS_PROP_MOUNTPOINT, mountb,
sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0); sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
}
if (gota && gotb) if (gota && gotb) {
return (strcmp(mounta, mountb)); while (*a != '\0' && (*a == *b)) {
a++;
b++;
}
if (*a == *b)
return (0);
if (*a == '\0')
return (-1);
if (*b == '\0')
return (1);
if (*a == '/')
return (-1);
if (*b == '/')
return (1);
return (*a < *b ? -1 : *a > *b);
}
if (gota) if (gota)
return (-1); return (-1);
if (gotb) if (gotb)
return (1); return (1);
return (strcmp(zfs_get_name(*za), zfs_get_name(*zb))); /*
* If neither filesystem has a mountpoint, revert to sorting by
* dataset name.
*/
return (strcmp(zfs_get_name(za), zfs_get_name(zb)));
}
/*
* Return true if path2 is a child of path1.
*/
static boolean_t
libzfs_path_contains(const char *path1, const char *path2)
{
return (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/');
}
/*
* Given a mountpoint specified by idx in the handles array, find the first
* non-descendent of that mountpoint and return its index. Descendant paths
* start with the parent's path. This function relies on the ordering
* enforced by mountpoint_cmp().
*/
static int
non_descendant_idx(zfs_handle_t **handles, size_t num_handles, int idx)
{
char parent[ZFS_MAXPROPLEN];
char child[ZFS_MAXPROPLEN];
int i;
verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, parent,
sizeof (parent), NULL, NULL, 0, B_FALSE) == 0);
for (i = idx + 1; i < num_handles; i++) {
verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, child,
sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
if (!libzfs_path_contains(parent, child))
break;
}
return (i);
}
typedef struct mnt_param {
libzfs_handle_t *mnt_hdl;
tpool_t *mnt_tp;
zfs_handle_t **mnt_zhps; /* filesystems to mount */
size_t mnt_num_handles;
int mnt_idx; /* Index of selected entry to mount */
zfs_iter_f mnt_func;
void *mnt_data;
} mnt_param_t;
/*
* Allocate and populate the parameter struct for mount function, and
* schedule mounting of the entry selected by idx.
*/
static void
zfs_dispatch_mount(libzfs_handle_t *hdl, zfs_handle_t **handles,
size_t num_handles, int idx, zfs_iter_f func, void *data, tpool_t *tp)
{
mnt_param_t *mnt_param = zfs_alloc(hdl, sizeof (mnt_param_t));
mnt_param->mnt_hdl = hdl;
mnt_param->mnt_tp = tp;
mnt_param->mnt_zhps = handles;
mnt_param->mnt_num_handles = num_handles;
mnt_param->mnt_idx = idx;
mnt_param->mnt_func = func;
mnt_param->mnt_data = data;
(void) tpool_dispatch(tp, zfs_mount_task, (void*)mnt_param);
}
/*
* This is the structure used to keep state of mounting or sharing operations
* during a call to zpool_enable_datasets().
*/
typedef struct mount_state {
/*
* ms_mntstatus is set to -1 if any mount fails. While multiple threads
* could update this variable concurrently, no synchronization is
* needed as it's only ever set to -1.
*/
int ms_mntstatus;
int ms_mntflags;
const char *ms_mntopts;
} mount_state_t;
static int
zfs_mount_one(zfs_handle_t *zhp, void *arg)
{
mount_state_t *ms = arg;
int ret = 0;
/*
* don't attempt to mount encrypted datasets with
* unloaded keys
*/
if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
ZFS_KEYSTATUS_UNAVAILABLE)
return (0);
if (zfs_mount(zhp, ms->ms_mntopts, ms->ms_mntflags) != 0)
ret = ms->ms_mntstatus = -1;
return (ret);
}
static int
zfs_share_one(zfs_handle_t *zhp, void *arg)
{
mount_state_t *ms = arg;
int ret = 0;
if (zfs_share(zhp) != 0)
ret = ms->ms_mntstatus = -1;
return (ret);
}
/*
* Thread pool function to mount one file system. On completion, it finds and
* schedules its children to be mounted. This depends on the sorting done in
* zfs_foreach_mountpoint(). Note that the degenerate case (chain of entries
* each descending from the previous) will have no parallelism since we always
* have to wait for the parent to finish mounting before we can schedule
* its children.
*/
static void
zfs_mount_task(void *arg)
{
mnt_param_t *mp = arg;
int idx = mp->mnt_idx;
zfs_handle_t **handles = mp->mnt_zhps;
size_t num_handles = mp->mnt_num_handles;
char mountpoint[ZFS_MAXPROPLEN];
verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, mountpoint,
sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
if (mp->mnt_func(handles[idx], mp->mnt_data) != 0)
return;
/*
* We dispatch tasks to mount filesystems with mountpoints underneath
* this one. We do this by dispatching the next filesystem with a
* descendant mountpoint of the one we just mounted, then skip all of
* its descendants, dispatch the next descendant mountpoint, and so on.
* The non_descendant_idx() function skips over filesystems that are
* descendants of the filesystem we just dispatched.
*/
for (int i = idx + 1; i < num_handles;
i = non_descendant_idx(handles, num_handles, i)) {
char child[ZFS_MAXPROPLEN];
verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT,
child, sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
if (!libzfs_path_contains(mountpoint, child))
break; /* not a descendant, return */
zfs_dispatch_mount(mp->mnt_hdl, handles, num_handles, i,
mp->mnt_func, mp->mnt_data, mp->mnt_tp);
}
free(mp);
}
/*
* Issue the func callback for each ZFS handle contained in the handles
* array. This function is used to mount all datasets, and so this function
* guarantees that filesystems for parent mountpoints are called before their
* children. As such, before issuing any callbacks, we first sort the array
* of handles by mountpoint.
*
* Callbacks are issued in one of two ways:
*
* 1. Sequentially: If the parallel argument is B_FALSE or the ZFS_SERIAL_MOUNT
* environment variable is set, then we issue callbacks sequentially.
*
* 2. In parallel: If the parallel argument is B_TRUE and the ZFS_SERIAL_MOUNT
* environment variable is not set, then we use a tpool to dispatch threads
* to mount filesystems in parallel. This function dispatches tasks to mount
* the filesystems at the top-level mountpoints, and these tasks in turn
* are responsible for recursively mounting filesystems in their children
* mountpoints.
*/
void
zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,
size_t num_handles, zfs_iter_f func, void *data, boolean_t parallel)
{
/*
* The ZFS_SERIAL_MOUNT environment variable is an undocumented
* variable that can be used as a convenience to do a/b comparison
* of serial vs. parallel mounting.
*/
boolean_t serial_mount = !parallel ||
(getenv("ZFS_SERIAL_MOUNT") != NULL);
/*
* Sort the datasets by mountpoint. See mountpoint_cmp for details
* of how these are sorted.
*/
qsort(handles, num_handles, sizeof (zfs_handle_t *), mountpoint_cmp);
if (serial_mount) {
for (int i = 0; i < num_handles; i++) {
func(handles[i], data);
}
return;
}
/*
* Issue the callback function for each dataset using a parallel
* algorithm that uses a thread pool to manage threads.
*/
tpool_t *tp = tpool_create(1, mount_tp_nthr, 0, NULL);
/*
* There may be multiple "top level" mountpoints outside of the pool's
* root mountpoint, e.g.: /foo /bar. Dispatch a mount task for each of
* these.
*/
for (int i = 0; i < num_handles;
i = non_descendant_idx(handles, num_handles, i)) {
zfs_dispatch_mount(hdl, handles, num_handles, i, func, data,
tp);
}
tpool_wait(tp); /* wait for all scheduled mounts to complete */
tpool_destroy(tp);
} }
/* /*
* Mount and share all datasets within the given pool. This assumes that no * Mount and share all datasets within the given pool. This assumes that no
* datasets within the pool are currently mounted. Because users can create * datasets within the pool are currently mounted.
* complicated nested hierarchies of mountpoints, we first gather all the
* datasets and mountpoints within the pool, and sort them by mountpoint. Once
* we have the list of all filesystems, we iterate over them in order and mount
* and/or share each one.
*/ */
#pragma weak zpool_mount_datasets = zpool_enable_datasets #pragma weak zpool_mount_datasets = zpool_enable_datasets
int int
zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
{ {
get_all_cb_t cb = { 0 }; get_all_cb_t cb = { 0 };
libzfs_handle_t *hdl = zhp->zpool_hdl; mount_state_t ms = { 0 };
zfs_handle_t *zfsp; zfs_handle_t *zfsp;
int i, ret = -1; int ret = 0;
int *good;
/* if ((zfsp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
* Gather all non-snap datasets within the pool. ZFS_TYPE_DATASET)) == NULL)
*/
if ((zfsp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_DATASET)) == NULL)
goto out; goto out;
/*
* Gather all non-snapshot datasets within the pool. Start by adding
* the root filesystem for this pool to the list, and then iterate
* over all child filesystems.
*/
libzfs_add_handle(&cb, zfsp); libzfs_add_handle(&cb, zfsp);
if (zfs_iter_filesystems(zfsp, mount_cb, &cb) != 0) if (zfs_iter_filesystems(zfsp, zfs_iter_cb, &cb) != 0)
goto out;
/*
* Sort the datasets by mountpoint.
*/
qsort(cb.cb_handles, cb.cb_used, sizeof (void *),
libzfs_dataset_cmp);
/*
* And mount all the datasets, keeping track of which ones
* succeeded or failed.
*/
if ((good = zfs_alloc(zhp->zpool_hdl,
cb.cb_used * sizeof (int))) == NULL)
goto out; goto out;
ret = 0;
for (i = 0; i < cb.cb_used; i++) {
/* /*
* don't attempt to mount encrypted datasets with * Mount all filesystems
* unloaded keys
*/ */
if (zfs_prop_get_int(cb.cb_handles[i], ZFS_PROP_KEYSTATUS) == ms.ms_mntopts = mntopts;
ZFS_KEYSTATUS_UNAVAILABLE) ms.ms_mntflags = flags;
continue; zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
zfs_mount_one, &ms, B_TRUE);
if (zfs_mount(cb.cb_handles[i], mntopts, flags) != 0) if (ms.ms_mntstatus != 0)
ret = -1; ret = ms.ms_mntstatus;
else
good[i] = 1;
}
/* /*
* Then share all the ones that need to be shared. This needs * Share all filesystems that need to be shared. This needs to be
* to be a separate pass in order to avoid excessive reloading * a separate pass because libshare is not mt-safe, and so we need
* of the configuration. Good should never be NULL since * to share serially.
* zfs_alloc is supposed to exit if memory isn't available.
*/ */
for (i = 0; i < cb.cb_used; i++) { ms.ms_mntstatus = 0;
if (good[i] && zfs_share(cb.cb_handles[i]) != 0) zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
ret = -1; zfs_share_one, &ms, B_FALSE);
} if (ms.ms_mntstatus != 0)
ret = ms.ms_mntstatus;
free(good);
out: out:
for (i = 0; i < cb.cb_used; i++) for (int i = 0; i < cb.cb_used; i++)
zfs_close(cb.cb_handles[i]); zfs_close(cb.cb_handles[i]);
free(cb.cb_handles); free(cb.cb_handles);

View File

@ -181,7 +181,7 @@ tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos',
'zfs_mount_007_pos', 'zfs_mount_008_pos', 'zfs_mount_009_neg', 'zfs_mount_007_pos', 'zfs_mount_008_pos', 'zfs_mount_009_neg',
'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_012_neg', 'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_012_neg',
'zfs_mount_all_001_pos', 'zfs_mount_encrypted', 'zfs_mount_remount', 'zfs_mount_all_001_pos', 'zfs_mount_encrypted', 'zfs_mount_remount',
'zfs_multi_mount'] 'zfs_multi_mount', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints']
tags = ['functional', 'cli_root', 'zfs_mount'] tags = ['functional', 'cli_root', 'zfs_mount']
[tests/functional/cli_root/zfs_program] [tests/functional/cli_root/zfs_program]

View File

@ -14,8 +14,10 @@ dist_pkgdata_SCRIPTS = \
zfs_mount_010_neg.ksh \ zfs_mount_010_neg.ksh \
zfs_mount_011_neg.ksh \ zfs_mount_011_neg.ksh \
zfs_mount_012_neg.ksh \ zfs_mount_012_neg.ksh \
zfs_mount_encrypted.ksh \
zfs_mount_all_001_pos.ksh \ zfs_mount_all_001_pos.ksh \
zfs_mount_all_fail.ksh \
zfs_mount_all_mountpoints.ksh \
zfs_mount_encrypted.ksh \
zfs_mount_remount.ksh \ zfs_mount_remount.ksh \
zfs_multi_mount.ksh zfs_multi_mount.ksh

View File

@ -25,7 +25,7 @@
# #
# #
# Copyright (c) 2016 by Delphix. All rights reserved. # Copyright (c) 2017 by Delphix. All rights reserved.
# #
. $STF_SUITE/include/libtest.shlib . $STF_SUITE/include/libtest.shlib
@ -84,14 +84,12 @@ function setup_filesystem #disklist #pool #fs #mntpoint #type #vdev
fi fi
case "$type" in case "$type" in
'ctr') log_must zfs create $pool/$fs 'ctr') log_must zfs create -o mountpoint=$mntpoint $pool/$fs
log_must zfs set mountpoint=$mntpoint $pool/$fs
;; ;;
'vol') log_must zfs create -V $VOLSIZE $pool/$fs 'vol') log_must zfs create -V $VOLSIZE $pool/$fs
block_device_wait block_device_wait
;; ;;
*) log_must zfs create $pool/$fs *) log_must zfs create -o mountpoint=$mntpoint $pool/$fs
log_must zfs set mountpoint=$mntpoint $pool/$fs
;; ;;
esac esac

View File

@ -0,0 +1,96 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#
#
# Copyright (c) 2017 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
# DESCRIPTION:
# Verify that if 'zfs mount -a' fails to mount one filesystem,
# the command fails with a non-zero error code, but all other
# filesystems are mounted.
#
# STRATEGY:
# 1. Create zfs filesystems
# 2. Unmount a leaf filesystem
# 3. Create a file in the above filesystem's mountpoint
# 4. Verify that 'zfs mount -a' fails to mount the above
# 5. Verify that all other filesystems were mounted
#
verify_runnable "both"
typeset -a filesystems
typeset path=${TEST_BASE_DIR%%/}/testroot$$/$TESTPOOL
typeset fscount=10
function setup_all
{
# Create $fscount filesystems at the top level of $path
for ((i=0; i<$fscount; i++)); do
setup_filesystem "$DISKS" "$TESTPOOL" $i "$path/$i" ctr
done
zfs list -r $TESTPOOL
return 0
}
function cleanup_all
{
export __ZFS_POOL_RESTRICT="$TESTPOOL"
log_must zfs $unmountall
unset __ZFS_POOL_RESTRICT
[[ -d ${TEST_BASE_DIR%%/}/testroot$$ ]] && \
rm -rf ${TEST_BASE_DIR%%/}/testroot$$
}
log_onexit cleanup_all
log_must setup_all
#
# Unmount all of the above so that we can create the stray file
# in one of the mountpoint directories.
#
export __ZFS_POOL_RESTRICT="$TESTPOOL"
log_must zfs $unmountall
unset __ZFS_POOL_RESTRICT
# All of our filesystems should be unmounted at this point
for ((i=0; i<$fscount; i++)); do
log_mustnot mounted "$TESTPOOL/$i"
done
# Create a stray file in one filesystem's mountpoint
touch $path/0/strayfile
# Verify that zfs mount -a fails
export __ZFS_POOL_RESTRICT="$TESTPOOL"
log_mustnot zfs $mountall
unset __ZFS_POOL_RESTRICT
# All filesystems except for "0" should be mounted
log_mustnot mounted "$TESTPOOL/0"
for ((i=1; i<$fscount; i++)); do
log_must mounted "$TESTPOOL/$i"
done
log_pass "'zfs $mountall' failed as expected."

View File

@ -0,0 +1,162 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#
#
# Copyright (c) 2017 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
# DESCRIPTION:
# Verify that 'zfs mount -a' succeeds given a set of filesystems
# whose mountpoints have a parent/child relationship which is
# counter to the filesystem parent/child relationship.
#
# STRATEGY:
# 1. Create zfs filesystems within the given pool.
# 2. Unmount all the filesystems.
# 3. Verify that 'zfs mount -a' command succeed,
# and all available ZFS filesystems are mounted.
# 4. Verify that 'zfs mount' is identical with 'df -F zfs'
#
verify_runnable "both"
typeset -a filesystems
function setup_all
{
typeset path=${TEST_BASE_DIR%%/}/testroot$$/$TESTPOOL
typeset fscount=10
#
# Generate an array of filesystem names that represent a deep
# hierarchy as such:
#
# 0
# 0/1
# 0/1/2
# 0/1/2/3
# 0/1/2/3/4
# ...
#
fs=0
for ((i=0; i<$fscount; i++)); do
if [[ $i -gt 0 ]]; then
fs=$fs/$i
fi
filesystems+=($fs)
done
# Create all of the above filesystems
for ((i=0; i<$fscount; i++)); do
fs=${filesystems[$i]}
setup_filesystem "$DISKS" "$TESTPOOL" "$fs" "$path/$i" ctr
done
zfs list -r $TESTPOOL
#
# Unmount all of the above so that we can setup our convoluted
# mount paths.
#
export __ZFS_POOL_RESTRICT="$TESTPOOL"
log_must zfs $unmountall
unset __ZFS_POOL_RESTRICT
#
# Configure the mount paths so that each mountpoint is contained
# in a child filesystem. We should end up with something like the
# following structure (modulo the number of filesystems):
#
# NAME MOUNTPOINT
# testpool /testpool
# testpool/0 /testroot25416/testpool/0/1/2/3/4/5/6
# testpool/0/1 /testroot25416/testpool/0/1/2/3/4/5
# testpool/0/1/2 /testroot25416/testpool/0/1/2/3/4
# testpool/0/1/2/3 /testroot25416/testpool/0/1/2/3
# testpool/0/1/2/3/4 /testroot25416/testpool/0/1/2
# testpool/0/1/2/3/4/5 /testroot25416/testpool/0/1
# testpool/0/1/2/3/4/5/6 /testroot25416/testpool/0
#
for ((i=0; i<$fscount; i++)); do
fs=$TESTPOOL/${filesystems[$(($fscount - $i - 1))]}
mnt=$path/${filesystems[$i]}
zfs set mountpoint=$mnt $fs
done
zfs list -r $TESTPOOL
return 0
}
function cleanup_all
{
export __ZFS_POOL_RESTRICT="$TESTPOOL"
log_must zfs $unmountall
unset __ZFS_POOL_RESTRICT
for fs in ${filesystems[@]}; do
cleanup_filesystem "$TESTPOOL" "$fs"
done
[[ -d ${TEST_BASE_DIR%%/}/testroot$$ ]] && \
rm -rf ${TEST_BASE_DIR%%/}/testroot$$
}
#
# This function takes a single true/false argument. If true it will verify that
# all file systems are mounted. If false it will verify that they are not
# mounted.
#
function verify_all
{
if $1; then
logfunc=log_must
else
logfunc=log_mustnot
fi
for fs in ${filesystems[@]}; do
$logfunc mounted "$TESTPOOL/$fs"
done
return 0
}
log_onexit cleanup_all
log_must setup_all
export __ZFS_POOL_RESTRICT="$TESTPOOL"
log_must zfs $unmountall
unset __ZFS_POOL_RESTRICT
verify_all false
export __ZFS_POOL_RESTRICT="$TESTPOOL"
log_must zfs $mountall
unset __ZFS_POOL_RESTRICT
verify_all true
log_note "Verify that 'zfs $mountcmd' will display " \
"all ZFS filesystems currently mounted."
verify_mount_display
log_pass "'zfs $mountall' succeeds as root, " \
"and all available ZFS filesystems are mounted."