Better control the thread pool size when mounting datasets

Ever since a10d50f999, ZFS has mounted file systems in parallel when
importing a pool.  It uses a fixed size of 512 for the thread pool.  But
since c183d164aa, it has also imported pools in parallel.  So the total
number of threads at one time is 513 * npools + 1.  That can easily
exceed the system's limit on the number of threads per process, which
will cause one or more pools to be unable to allocate any worker
threads, forcing them to fallback to slow serial mounting .  To
forestall that, manage the threadpool size in /sbin/zpool, not libzfs.
Use the same size (512), but divided by the number of pools.

This is a backwards-incompatible change to the libzfs abi.

Sponsored by: Axcient
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: George Wilson <gwilson@delphix.com>
Signed-off-by: Alan Somers <asomers@FreeBSD.org>
Closes #16178
This commit is contained in:
Alan Somers 2024-05-08 10:01:22 -06:00 committed by Brian Behlendorf
parent eced2e2f1e
commit b64afa41d5
6 changed files with 38 additions and 23 deletions

View File

@ -702,7 +702,7 @@ zfs_enable_ds(void *arg)
{ {
unavailpool_t *pool = (unavailpool_t *)arg; unavailpool_t *pool = (unavailpool_t *)arg;
(void) zpool_enable_datasets(pool->uap_zhp, NULL, 0); (void) zpool_enable_datasets(pool->uap_zhp, NULL, 0, 512);
zpool_close(pool->uap_zhp); zpool_close(pool->uap_zhp);
free(pool); free(pool);
} }

View File

@ -7192,6 +7192,8 @@ share_mount(int op, int argc, char **argv)
int c, ret = 0; int c, ret = 0;
char *options = NULL; char *options = NULL;
int flags = 0; int flags = 0;
const uint_t mount_nthr = 512;
uint_t nthr;
/* check options */ /* check options */
while ((c = getopt(argc, argv, op == OP_MOUNT ? ":aRlvo:Of" : "al")) while ((c = getopt(argc, argv, op == OP_MOUNT ? ":aRlvo:Of" : "al"))
@ -7310,9 +7312,9 @@ share_mount(int op, int argc, char **argv)
* be serialized so that we can prompt the user for their keys * be serialized so that we can prompt the user for their keys
* in a consistent manner. * in a consistent manner.
*/ */
nthr = op == OP_MOUNT && !(flags & MS_CRYPT) ? mount_nthr : 1;
zfs_foreach_mountpoint(g_zfs, cb.cb_handles, cb.cb_used, zfs_foreach_mountpoint(g_zfs, cb.cb_handles, cb.cb_used,
share_mount_one_cb, &share_mount_state, share_mount_one_cb, &share_mount_state, nthr);
op == OP_MOUNT && !(flags & MS_CRYPT));
zfs_commit_shares(NULL); zfs_commit_shares(NULL);
ret = share_mount_state.sm_status; ret = share_mount_state.sm_status;

View File

@ -80,6 +80,8 @@
libzfs_handle_t *g_zfs; libzfs_handle_t *g_zfs;
static int mount_tp_nthr = 512; /* tpool threads for multi-threaded mounting */
static int zpool_do_create(int, char **); static int zpool_do_create(int, char **);
static int zpool_do_destroy(int, char **); static int zpool_do_destroy(int, char **);
@ -3418,7 +3420,7 @@ zfs_force_import_required(nvlist_t *config)
*/ */
static int static int
do_import(nvlist_t *config, const char *newname, const char *mntopts, do_import(nvlist_t *config, const char *newname, const char *mntopts,
nvlist_t *props, int flags) nvlist_t *props, int flags, uint_t mntthreads)
{ {
int ret = 0; int ret = 0;
int ms_status = 0; int ms_status = 0;
@ -3518,7 +3520,7 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL &&
!(flags & ZFS_IMPORT_ONLY)) { !(flags & ZFS_IMPORT_ONLY)) {
ms_status = zpool_enable_datasets(zhp, mntopts, 0); ms_status = zpool_enable_datasets(zhp, mntopts, 0, mntthreads);
if (ms_status == EZFS_SHAREFAILED) { if (ms_status == EZFS_SHAREFAILED) {
(void) fprintf(stderr, gettext("Import was " (void) fprintf(stderr, gettext("Import was "
"successful, but unable to share some datasets\n")); "successful, but unable to share some datasets\n"));
@ -3537,6 +3539,7 @@ typedef struct import_parameters {
const char *ip_mntopts; const char *ip_mntopts;
nvlist_t *ip_props; nvlist_t *ip_props;
int ip_flags; int ip_flags;
uint_t ip_mntthreads;
int *ip_err; int *ip_err;
} import_parameters_t; } import_parameters_t;
@ -3545,7 +3548,7 @@ do_import_task(void *arg)
{ {
import_parameters_t *ip = arg; import_parameters_t *ip = arg;
*ip->ip_err |= do_import(ip->ip_config, NULL, ip->ip_mntopts, *ip->ip_err |= do_import(ip->ip_config, NULL, ip->ip_mntopts,
ip->ip_props, ip->ip_flags); ip->ip_props, ip->ip_flags, ip->ip_mntthreads);
free(ip); free(ip);
} }
@ -3559,6 +3562,7 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
uint64_t pool_state; uint64_t pool_state;
boolean_t pool_specified = (import->poolname != NULL || boolean_t pool_specified = (import->poolname != NULL ||
import->guid != 0); import->guid != 0);
uint_t npools = 0;
tpool_t *tp = NULL; tpool_t *tp = NULL;
@ -3576,6 +3580,10 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
int err = 0; int err = 0;
nvpair_t *elem = NULL; nvpair_t *elem = NULL;
boolean_t first = B_TRUE; boolean_t first = B_TRUE;
if (!pool_specified && import->do_all) {
while ((elem = nvlist_next_nvpair(pools, elem)) != NULL)
npools++;
}
while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) { while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
verify(nvpair_value_nvlist(elem, &config) == 0); verify(nvpair_value_nvlist(elem, &config) == 0);
@ -3606,6 +3614,7 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
ip->ip_mntopts = mntopts; ip->ip_mntopts = mntopts;
ip->ip_props = props; ip->ip_props = props;
ip->ip_flags = flags; ip->ip_flags = flags;
ip->ip_mntthreads = mount_tp_nthr / npools;
ip->ip_err = &err; ip->ip_err = &err;
(void) tpool_dispatch(tp, do_import_task, (void) tpool_dispatch(tp, do_import_task,
@ -3673,7 +3682,7 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
err = B_TRUE; err = B_TRUE;
} else { } else {
err |= do_import(found_config, new_name, err |= do_import(found_config, new_name,
mntopts, props, flags); mntopts, props, flags, mount_tp_nthr);
} }
} }
@ -7217,7 +7226,8 @@ zpool_do_split(int argc, char **argv)
} }
if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) { if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) {
ms_status = zpool_enable_datasets(zhp, mntopts, 0); ms_status = zpool_enable_datasets(zhp, mntopts, 0,
mount_tp_nthr);
if (ms_status == EZFS_SHAREFAILED) { if (ms_status == EZFS_SHAREFAILED) {
(void) fprintf(stderr, gettext("Split was successful, " (void) fprintf(stderr, gettext("Split was successful, "
"datasets are mounted but sharing of some datasets " "datasets are mounted but sharing of some datasets "

View File

@ -716,7 +716,7 @@ typedef struct get_all_cb {
} get_all_cb_t; } get_all_cb_t;
_LIBZFS_H void zfs_foreach_mountpoint(libzfs_handle_t *, zfs_handle_t **, _LIBZFS_H void zfs_foreach_mountpoint(libzfs_handle_t *, zfs_handle_t **,
size_t, zfs_iter_f, void *, boolean_t); size_t, zfs_iter_f, void *, uint_t);
_LIBZFS_H void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *); _LIBZFS_H void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *);
/* /*
@ -1004,7 +1004,8 @@ _LIBZFS_H int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *,
* Enable and disable datasets within a pool by mounting/unmounting and * Enable and disable datasets within a pool by mounting/unmounting and
* sharing/unsharing them. * sharing/unsharing them.
*/ */
_LIBZFS_H int zpool_enable_datasets(zpool_handle_t *, const char *, int); _LIBZFS_H int zpool_enable_datasets(zpool_handle_t *, const char *, int,
uint_t);
_LIBZFS_H int zpool_disable_datasets(zpool_handle_t *, boolean_t); _LIBZFS_H int zpool_disable_datasets(zpool_handle_t *, boolean_t);
_LIBZFS_H void zpool_disable_datasets_os(zpool_handle_t *, boolean_t); _LIBZFS_H void zpool_disable_datasets_os(zpool_handle_t *, boolean_t);
_LIBZFS_H void zpool_disable_volume_os(const char *); _LIBZFS_H void zpool_disable_volume_os(const char *);

View File

@ -5532,13 +5532,14 @@
<parameter type-id='b59d7dce' name='num_handles'/> <parameter type-id='b59d7dce' name='num_handles'/>
<parameter type-id='d8e49ab9' name='func'/> <parameter type-id='d8e49ab9' name='func'/>
<parameter type-id='eaa32e2f' name='data'/> <parameter type-id='eaa32e2f' name='data'/>
<parameter type-id='c19b74c3' name='parallel'/> <parameter type-id='3502e3ff' name='nthr'/>
<return type-id='48b5725f'/> <return type-id='48b5725f'/>
</function-decl> </function-decl>
<function-decl name='zpool_enable_datasets' mangled-name='zpool_enable_datasets' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_enable_datasets'> <function-decl name='zpool_enable_datasets' mangled-name='zpool_enable_datasets' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_enable_datasets'>
<parameter type-id='4c81de99' name='zhp'/> <parameter type-id='4c81de99' name='zhp'/>
<parameter type-id='80f4b756' name='mntopts'/> <parameter type-id='80f4b756' name='mntopts'/>
<parameter type-id='95e97e5e' name='flags'/> <parameter type-id='95e97e5e' name='flags'/>
<parameter type-id='3502e3ff' name='nthr'/>
<return type-id='95e97e5e'/> <return type-id='95e97e5e'/>
</function-decl> </function-decl>
<function-decl name='zpool_disable_datasets' mangled-name='zpool_disable_datasets' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_disable_datasets'> <function-decl name='zpool_disable_datasets' mangled-name='zpool_disable_datasets' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_disable_datasets'>

View File

@ -83,8 +83,6 @@
#include <sys/systeminfo.h> #include <sys/systeminfo.h>
#define MAXISALEN 257 /* based on sysinfo(2) man page */ #define MAXISALEN 257 /* based on sysinfo(2) man page */
static int mount_tp_nthr = 512; /* tpool threads for multi-threaded mounting */
static void zfs_mount_task(void *); static void zfs_mount_task(void *);
static const proto_table_t proto_table[SA_PROTOCOL_COUNT] = { static const proto_table_t proto_table[SA_PROTOCOL_COUNT] = {
@ -1205,19 +1203,20 @@ out:
* *
* Callbacks are issued in one of two ways: * Callbacks are issued in one of two ways:
* *
* 1. Sequentially: If the parallel argument is B_FALSE or the ZFS_SERIAL_MOUNT * 1. Sequentially: If the nthr argument is <= 1 or the ZFS_SERIAL_MOUNT
* environment variable is set, then we issue callbacks sequentially. * environment variable is set, then we issue callbacks sequentially.
* *
* 2. In parallel: If the parallel argument is B_TRUE and the ZFS_SERIAL_MOUNT * 2. In parallel: If the nthr argument is > 1 and the ZFS_SERIAL_MOUNT
* environment variable is not set, then we use a tpool to dispatch threads * environment variable is not set, then we use a tpool to dispatch threads
* to mount filesystems in parallel. This function dispatches tasks to mount * to mount filesystems in parallel. This function dispatches tasks to mount
* the filesystems at the top-level mountpoints, and these tasks in turn * the filesystems at the top-level mountpoints, and these tasks in turn
* are responsible for recursively mounting filesystems in their children * are responsible for recursively mounting filesystems in their children
* mountpoints. * mountpoints. The value of the nthr argument will be the number of worker
* threads for the thread pool.
*/ */
void void
zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles, zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,
size_t num_handles, zfs_iter_f func, void *data, boolean_t parallel) size_t num_handles, zfs_iter_f func, void *data, uint_t nthr)
{ {
zoneid_t zoneid = getzoneid(); zoneid_t zoneid = getzoneid();
@ -1226,7 +1225,7 @@ zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,
* variable that can be used as a convenience to do a/b comparison * variable that can be used as a convenience to do a/b comparison
* of serial vs. parallel mounting. * of serial vs. parallel mounting.
*/ */
boolean_t serial_mount = !parallel || boolean_t serial_mount = nthr <= 1 ||
(getenv("ZFS_SERIAL_MOUNT") != NULL); (getenv("ZFS_SERIAL_MOUNT") != NULL);
/* /*
@ -1246,7 +1245,7 @@ zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,
* Issue the callback function for each dataset using a parallel * Issue the callback function for each dataset using a parallel
* algorithm that uses a thread pool to manage threads. * algorithm that uses a thread pool to manage threads.
*/ */
tpool_t *tp = tpool_create(1, mount_tp_nthr, 0, NULL); tpool_t *tp = tpool_create(1, nthr, 0, NULL);
/* /*
* There may be multiple "top level" mountpoints outside of the pool's * There may be multiple "top level" mountpoints outside of the pool's
@ -1273,10 +1272,12 @@ zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,
/* /*
* Mount and share all datasets within the given pool. This assumes that no * Mount and share all datasets within the given pool. This assumes that no
* datasets within the pool are currently mounted. * datasets within the pool are currently mounted. nthr will be number of
* worker threads to use while mounting datasets.
*/ */
int int
zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags,
uint_t nthr)
{ {
get_all_cb_t cb = { 0 }; get_all_cb_t cb = { 0 };
mount_state_t ms = { 0 }; mount_state_t ms = { 0 };
@ -1302,7 +1303,7 @@ zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
ms.ms_mntopts = mntopts; ms.ms_mntopts = mntopts;
ms.ms_mntflags = flags; ms.ms_mntflags = flags;
zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used, zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
zfs_mount_one, &ms, B_TRUE); zfs_mount_one, &ms, nthr);
if (ms.ms_mntstatus != 0) if (ms.ms_mntstatus != 0)
ret = EZFS_MOUNTFAILED; ret = EZFS_MOUNTFAILED;
@ -1313,7 +1314,7 @@ zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
*/ */
ms.ms_mntstatus = 0; ms.ms_mntstatus = 0;
zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used, zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
zfs_share_one, &ms, B_FALSE); zfs_share_one, &ms, 1);
if (ms.ms_mntstatus != 0) if (ms.ms_mntstatus != 0)
ret = EZFS_SHAREFAILED; ret = EZFS_SHAREFAILED;
else else