Parallel pool import

This commit allow spa_load() to drop the spa_namespace_lock so
that imports can happen concurrently. Prior to dropping the
spa_namespace_lock, the import logic will set the spa_load_thread
value to track the thread which is doing the import.

Consumers of spa_lookup() retain the same behavior by blocking
when either a thread is holding the spa_namespace_lock or the
spa_load_thread value is set. This will ensure that critical
concurrent operations cannot take place while a pool is being
imported.

The zpool command is also enhanced to provide multi-threaded support
when invoking zpool import -a.

Lastly, zinject provides a mechanism to insert artificial delays
when importing a pool and new zfs tests are added to verify parallel
import functionality.

Contributions-by: Don Brady <don.brady@klarasystems.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: George Wilson <gwilson@delphix.com>
Closes #16093
This commit is contained in:
George Wilson 2024-04-22 12:42:38 -04:00 committed by GitHub
parent f4f156157d
commit c183d164aa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 818 additions and 72 deletions

View File

@ -22,7 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2017, Intel Corporation. * Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2024, Klara Inc. * Copyright (c) 2023-2024, Klara Inc.
*/ */
/* /*
@ -310,6 +310,11 @@ usage(void)
"\t\tcreate 3 lanes on the device; one lane with a latency\n" "\t\tcreate 3 lanes on the device; one lane with a latency\n"
"\t\tof 10 ms and two lanes with a 25 ms latency.\n" "\t\tof 10 ms and two lanes with a 25 ms latency.\n"
"\n" "\n"
"\tzinject -P import|export -s <seconds> pool\n"
"\t\tAdd an artificial delay to a future pool import or export,\n"
"\t\tsuch that the operation takes a minimum of supplied seconds\n"
"\t\tto complete.\n"
"\n"
"\tzinject -I [-s <seconds> | -g <txgs>] pool\n" "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
"\t\tCause the pool to stop writing blocks yet not\n" "\t\tCause the pool to stop writing blocks yet not\n"
"\t\treport errors for a duration. Simulates buggy hardware\n" "\t\treport errors for a duration. Simulates buggy hardware\n"
@ -392,8 +397,10 @@ print_data_handler(int id, const char *pool, zinject_record_t *record,
{ {
int *count = data; int *count = data;
if (record->zi_guid != 0 || record->zi_func[0] != '\0') if (record->zi_guid != 0 || record->zi_func[0] != '\0' ||
record->zi_duration != 0) {
return (0); return (0);
}
if (*count == 0) { if (*count == 0) {
(void) printf("%3s %-15s %-6s %-6s %-8s %3s %-4s " (void) printf("%3s %-15s %-6s %-6s %-8s %3s %-4s "
@ -507,6 +514,33 @@ print_panic_handler(int id, const char *pool, zinject_record_t *record,
return (0); return (0);
} }
static int
print_pool_delay_handler(int id, const char *pool, zinject_record_t *record,
void *data)
{
int *count = data;
if (record->zi_cmd != ZINJECT_DELAY_IMPORT &&
record->zi_cmd != ZINJECT_DELAY_EXPORT) {
return (0);
}
if (*count == 0) {
(void) printf("%3s %-19s %-11s %s\n",
"ID", "POOL", "DELAY (sec)", "COMMAND");
(void) printf("--- ------------------- -----------"
" -------\n");
}
*count += 1;
(void) printf("%3d %-19s %-11llu %s\n",
id, pool, (u_longlong_t)record->zi_duration,
record->zi_cmd == ZINJECT_DELAY_IMPORT ? "import": "export");
return (0);
}
/* /*
* Print all registered error handlers. Returns the number of handlers * Print all registered error handlers. Returns the number of handlers
* registered. * registered.
@ -537,6 +571,13 @@ print_all_handlers(void)
count = 0; count = 0;
} }
(void) iter_handlers(print_pool_delay_handler, &count);
if (count > 0) {
total += count;
(void) printf("\n");
count = 0;
}
(void) iter_handlers(print_panic_handler, &count); (void) iter_handlers(print_panic_handler, &count);
return (count + total); return (count + total);
@ -609,9 +650,27 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
zc.zc_guid = flags; zc.zc_guid = flags;
if (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_FAULT, &zc) != 0) { if (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
(void) fprintf(stderr, "failed to add handler: %s\n", const char *errmsg = strerror(errno);
errno == EDOM ? "block level exceeds max level of object" :
strerror(errno)); switch (errno) {
case EDOM:
errmsg = "block level exceeds max level of object";
break;
case EEXIST:
if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
errmsg = "pool already imported";
if (record->zi_cmd == ZINJECT_DELAY_EXPORT)
errmsg = "a handler already exists";
break;
case ENOENT:
/* import delay injector running on older zfs module */
if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
errmsg = "import delay injector not supported";
break;
default:
break;
}
(void) fprintf(stderr, "failed to add handler: %s\n", errmsg);
return (1); return (1);
} }
@ -636,6 +695,9 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
} else if (record->zi_duration < 0) { } else if (record->zi_duration < 0) {
(void) printf(" txgs: %lld \n", (void) printf(" txgs: %lld \n",
(u_longlong_t)-record->zi_duration); (u_longlong_t)-record->zi_duration);
} else if (record->zi_timer > 0) {
(void) printf(" timer: %lld ms\n",
(u_longlong_t)NSEC2MSEC(record->zi_timer));
} else { } else {
(void) printf("objset: %llu\n", (void) printf("objset: %llu\n",
(u_longlong_t)record->zi_objset); (u_longlong_t)record->zi_objset);
@ -834,7 +896,7 @@ main(int argc, char **argv)
} }
while ((c = getopt(argc, argv, while ((c = getopt(argc, argv,
":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) { ":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) {
switch (c) { switch (c) {
case 'a': case 'a':
flags |= ZINJECT_FLUSH_ARC; flags |= ZINJECT_FLUSH_ARC;
@ -952,6 +1014,19 @@ main(int argc, char **argv)
sizeof (record.zi_func)); sizeof (record.zi_func));
record.zi_cmd = ZINJECT_PANIC; record.zi_cmd = ZINJECT_PANIC;
break; break;
case 'P':
if (strcasecmp(optarg, "import") == 0) {
record.zi_cmd = ZINJECT_DELAY_IMPORT;
} else if (strcasecmp(optarg, "export") == 0) {
record.zi_cmd = ZINJECT_DELAY_EXPORT;
} else {
(void) fprintf(stderr, "invalid command '%s': "
"must be 'import' or 'export'\n", optarg);
usage();
libzfs_fini(g_zfs);
return (1);
}
break;
case 'q': case 'q':
quiet = 1; quiet = 1;
break; break;
@ -1033,7 +1108,7 @@ main(int argc, char **argv)
argc -= optind; argc -= optind;
argv += optind; argv += optind;
if (record.zi_duration != 0) if (record.zi_duration != 0 && record.zi_cmd == 0)
record.zi_cmd = ZINJECT_IGNORED_WRITES; record.zi_cmd = ZINJECT_IGNORED_WRITES;
if (cancel != NULL) { if (cancel != NULL) {
@ -1179,8 +1254,8 @@ main(int argc, char **argv)
if (raw != NULL || range != NULL || type != TYPE_INVAL || if (raw != NULL || range != NULL || type != TYPE_INVAL ||
level != 0 || device != NULL || record.zi_freq > 0 || level != 0 || device != NULL || record.zi_freq > 0 ||
dvas != 0) { dvas != 0) {
(void) fprintf(stderr, "panic (-p) incompatible with " (void) fprintf(stderr, "%s incompatible with other "
"other options\n"); "options\n", "import|export delay (-P)");
usage(); usage();
libzfs_fini(g_zfs); libzfs_fini(g_zfs);
return (2); return (2);
@ -1198,6 +1273,28 @@ main(int argc, char **argv)
if (argv[1] != NULL) if (argv[1] != NULL)
record.zi_type = atoi(argv[1]); record.zi_type = atoi(argv[1]);
dataset[0] = '\0'; dataset[0] = '\0';
} else if (record.zi_cmd == ZINJECT_DELAY_IMPORT ||
record.zi_cmd == ZINJECT_DELAY_EXPORT) {
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
level != 0 || device != NULL || record.zi_freq > 0 ||
dvas != 0) {
(void) fprintf(stderr, "%s incompatible with other "
"options\n", "import|export delay (-P)");
usage();
libzfs_fini(g_zfs);
return (2);
}
if (argc != 1 || record.zi_duration <= 0) {
(void) fprintf(stderr, "import|export delay (-P) "
"injection requires a duration (-s) and a single "
"pool name\n");
usage();
libzfs_fini(g_zfs);
return (2);
}
(void) strlcpy(pool, argv[0], sizeof (pool));
} else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) { } else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
if (raw != NULL || range != NULL || type != TYPE_INVAL || if (raw != NULL || range != NULL || type != TYPE_INVAL ||
level != 0 || record.zi_freq > 0 || dvas != 0) { level != 0 || record.zi_freq > 0 || dvas != 0) {

View File

@ -50,6 +50,7 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <thread_pool.h>
#include <time.h> #include <time.h>
#include <unistd.h> #include <unistd.h>
#include <pwd.h> #include <pwd.h>
@ -3455,15 +3456,40 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
return (ret); return (ret);
} }
typedef struct import_parameters {
nvlist_t *ip_config;
const char *ip_mntopts;
nvlist_t *ip_props;
int ip_flags;
int *ip_err;
} import_parameters_t;
static void
do_import_task(void *arg)
{
import_parameters_t *ip = arg;
*ip->ip_err |= do_import(ip->ip_config, NULL, ip->ip_mntopts,
ip->ip_props, ip->ip_flags);
free(ip);
}
static int static int
import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags, import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
char *orig_name, char *new_name, char *orig_name, char *new_name, importargs_t *import)
boolean_t do_destroyed, boolean_t pool_specified, boolean_t do_all,
importargs_t *import)
{ {
nvlist_t *config = NULL; nvlist_t *config = NULL;
nvlist_t *found_config = NULL; nvlist_t *found_config = NULL;
uint64_t pool_state; uint64_t pool_state;
boolean_t pool_specified = (import->poolname != NULL ||
import->guid != 0);
tpool_t *tp = NULL;
if (import->do_all) {
tp = tpool_create(1, 5 * sysconf(_SC_NPROCESSORS_ONLN),
0, NULL);
}
/* /*
* At this point we have a list of import candidate configs. Even if * At this point we have a list of import candidate configs. Even if
@ -3480,9 +3506,11 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
&pool_state) == 0); &pool_state) == 0);
if (!do_destroyed && pool_state == POOL_STATE_DESTROYED) if (!import->do_destroyed &&
pool_state == POOL_STATE_DESTROYED)
continue; continue;
if (do_destroyed && pool_state != POOL_STATE_DESTROYED) if (import->do_destroyed &&
pool_state != POOL_STATE_DESTROYED)
continue; continue;
verify(nvlist_add_nvlist(config, ZPOOL_LOAD_POLICY, verify(nvlist_add_nvlist(config, ZPOOL_LOAD_POLICY,
@ -3491,12 +3519,21 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
if (!pool_specified) { if (!pool_specified) {
if (first) if (first)
first = B_FALSE; first = B_FALSE;
else if (!do_all) else if (!import->do_all)
(void) fputc('\n', stdout); (void) fputc('\n', stdout);
if (do_all) { if (import->do_all) {
err |= do_import(config, NULL, mntopts, import_parameters_t *ip = safe_malloc(
props, flags); sizeof (import_parameters_t));
ip->ip_config = config;
ip->ip_mntopts = mntopts;
ip->ip_props = props;
ip->ip_flags = flags;
ip->ip_err = &err;
(void) tpool_dispatch(tp, do_import_task,
(void *)ip);
} else { } else {
/* /*
* If we're importing from cachefile, then * If we're importing from cachefile, then
@ -3544,6 +3581,10 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
found_config = config; found_config = config;
} }
} }
if (import->do_all) {
tpool_wait(tp);
tpool_destroy(tp);
}
/* /*
* If we were searching for a specific pool, verify that we found a * If we were searching for a specific pool, verify that we found a
@ -3773,7 +3814,6 @@ zpool_do_import(int argc, char **argv)
boolean_t xtreme_rewind = B_FALSE; boolean_t xtreme_rewind = B_FALSE;
boolean_t do_scan = B_FALSE; boolean_t do_scan = B_FALSE;
boolean_t pool_exists = B_FALSE; boolean_t pool_exists = B_FALSE;
boolean_t pool_specified = B_FALSE;
uint64_t txg = -1ULL; uint64_t txg = -1ULL;
char *cachefile = NULL; char *cachefile = NULL;
importargs_t idata = { 0 }; importargs_t idata = { 0 };
@ -3972,7 +4012,6 @@ zpool_do_import(int argc, char **argv)
searchname = argv[0]; searchname = argv[0];
searchguid = 0; searchguid = 0;
} }
pool_specified = B_TRUE;
/* /*
* User specified a name or guid. Ensure it's unique. * User specified a name or guid. Ensure it's unique.
@ -4005,6 +4044,8 @@ zpool_do_import(int argc, char **argv)
idata.cachefile = cachefile; idata.cachefile = cachefile;
idata.scan = do_scan; idata.scan = do_scan;
idata.policy = policy; idata.policy = policy;
idata.do_destroyed = do_destroyed;
idata.do_all = do_all;
libpc_handle_t lpch = { libpc_handle_t lpch = {
.lpc_lib_handle = g_zfs, .lpc_lib_handle = g_zfs,
@ -4047,9 +4088,7 @@ zpool_do_import(int argc, char **argv)
} }
err = import_pools(pools, props, mntopts, flags, err = import_pools(pools, props, mntopts, flags,
argc >= 1 ? argv[0] : NULL, argc >= 1 ? argv[0] : NULL, argc >= 2 ? argv[1] : NULL, &idata);
argc >= 2 ? argv[1] : NULL,
do_destroyed, pool_specified, do_all, &idata);
/* /*
* If we're using the cachefile and we failed to import, then * If we're using the cachefile and we failed to import, then
@ -4070,9 +4109,8 @@ zpool_do_import(int argc, char **argv)
pools = zpool_search_import(&lpch, &idata); pools = zpool_search_import(&lpch, &idata);
err = import_pools(pools, props, mntopts, flags, err = import_pools(pools, props, mntopts, flags,
argc >= 1 ? argv[0] : NULL, argc >= 1 ? argv[0] : NULL, argc >= 2 ? argv[1] : NULL,
argc >= 2 ? argv[1] : NULL, &idata);
do_destroyed, pool_specified, do_all, &idata);
} }
error: error:

View File

@ -20,7 +20,7 @@
*/ */
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018 by Delphix. All rights reserved. * Copyright (c) 2018, 2024 by Delphix. All rights reserved.
*/ */
#ifndef _LIBZUTIL_H #ifndef _LIBZUTIL_H
@ -79,6 +79,8 @@ typedef struct importargs {
boolean_t can_be_active; /* can the pool be active? */ boolean_t can_be_active; /* can the pool be active? */
boolean_t scan; /* prefer scanning to libblkid cache */ boolean_t scan; /* prefer scanning to libblkid cache */
nvlist_t *policy; /* load policy (max txg, rewind, etc.) */ nvlist_t *policy; /* load policy (max txg, rewind, etc.) */
boolean_t do_destroyed;
boolean_t do_all;
} importargs_t; } importargs_t;
typedef struct libpc_handle { typedef struct libpc_handle {

View File

@ -833,6 +833,8 @@ void spa_select_allocator(zio_t *zio);
/* spa namespace global mutex */ /* spa namespace global mutex */
extern kmutex_t spa_namespace_lock; extern kmutex_t spa_namespace_lock;
extern avl_tree_t spa_namespace_avl;
extern kcondvar_t spa_namespace_cv;
/* /*
* SPA configuration functions in spa_config.c * SPA configuration functions in spa_config.c

View File

@ -20,7 +20,7 @@
*/ */
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2019 by Delphix. All rights reserved. * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved.
@ -237,6 +237,7 @@ struct spa {
dsl_pool_t *spa_dsl_pool; dsl_pool_t *spa_dsl_pool;
boolean_t spa_is_initializing; /* true while opening pool */ boolean_t spa_is_initializing; /* true while opening pool */
boolean_t spa_is_exporting; /* true while exporting pool */ boolean_t spa_is_exporting; /* true while exporting pool */
kthread_t *spa_load_thread; /* loading, no namespace lock */
metaslab_class_t *spa_normal_class; /* normal data class */ metaslab_class_t *spa_normal_class; /* normal data class */
metaslab_class_t *spa_log_class; /* intent log data class */ metaslab_class_t *spa_log_class; /* intent log data class */
metaslab_class_t *spa_embedded_log_class; /* log on normal vdevs */ metaslab_class_t *spa_embedded_log_class; /* log on normal vdevs */

View File

@ -20,7 +20,7 @@
*/ */
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2020 by Delphix. All rights reserved. * Copyright (c) 2012, 2024 by Delphix. All rights reserved.
* Copyright 2016 RackTop Systems. * Copyright 2016 RackTop Systems.
* Copyright (c) 2017, Intel Corporation. * Copyright (c) 2017, Intel Corporation.
*/ */
@ -454,6 +454,8 @@ typedef enum zinject_type {
ZINJECT_PANIC, ZINJECT_PANIC,
ZINJECT_DELAY_IO, ZINJECT_DELAY_IO,
ZINJECT_DECRYPT_FAULT, ZINJECT_DECRYPT_FAULT,
ZINJECT_DELAY_IMPORT,
ZINJECT_DELAY_EXPORT,
} zinject_type_t; } zinject_type_t;
typedef struct zfs_share { typedef struct zfs_share {

View File

@ -22,7 +22,7 @@
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, 2020 by Delphix. All rights reserved. * Copyright (c) 2012, 2024 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright 2016 Toomas Soome <tsoome@me.com> * Copyright 2016 Toomas Soome <tsoome@me.com>
@ -686,6 +686,8 @@ extern int zio_handle_device_injections(vdev_t *vd, zio_t *zio, int err1,
extern int zio_handle_label_injection(zio_t *zio, int error); extern int zio_handle_label_injection(zio_t *zio, int error);
extern void zio_handle_ignored_writes(zio_t *zio); extern void zio_handle_ignored_writes(zio_t *zio);
extern hrtime_t zio_handle_io_delay(zio_t *zio); extern hrtime_t zio_handle_io_delay(zio_t *zio);
extern void zio_handle_import_delay(spa_t *spa, hrtime_t elapsed);
extern void zio_handle_export_delay(spa_t *spa, hrtime_t elapsed);
/* /*
* Checksum ereport functions * Checksum ereport functions

View File

@ -129,6 +129,14 @@ Force a vdev error.
. .
.It Xo .It Xo
.Nm zinject .Nm zinject
.Fl i Ar seconds
.Ar pool
.Xc
Add an artificial delay during the future import of a pool.
This injector is automatically cleared after the import is finished.
.
.It Xo
.Nm zinject
.Fl I .Fl I
.Op Fl s Ar seconds Ns | Ns Fl g Ar txgs .Op Fl s Ar seconds Ns | Ns Fl g Ar txgs
.Ar pool .Ar pool

View File

@ -3273,8 +3273,6 @@ spa_spawn_aux_threads(spa_t *spa)
{ {
ASSERT(spa_writeable(spa)); ASSERT(spa_writeable(spa));
ASSERT(MUTEX_HELD(&spa_namespace_lock));
spa_start_raidz_expansion_thread(spa); spa_start_raidz_expansion_thread(spa);
spa_start_indirect_condensing_thread(spa); spa_start_indirect_condensing_thread(spa);
spa_start_livelist_destroy_thread(spa); spa_start_livelist_destroy_thread(spa);
@ -4981,7 +4979,8 @@ spa_ld_read_checkpoint_txg(spa_t *spa)
int error = 0; int error = 0;
ASSERT0(spa->spa_checkpoint_txg); ASSERT0(spa->spa_checkpoint_txg);
ASSERT(MUTEX_HELD(&spa_namespace_lock)); ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
spa->spa_load_thread == curthread);
error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t), DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t),
@ -5228,6 +5227,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
boolean_t checkpoint_rewind = boolean_t checkpoint_rewind =
(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); (spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT);
boolean_t update_config_cache = B_FALSE; boolean_t update_config_cache = B_FALSE;
hrtime_t load_start = gethrtime();
ASSERT(MUTEX_HELD(&spa_namespace_lock)); ASSERT(MUTEX_HELD(&spa_namespace_lock));
ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE); ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE);
@ -5272,13 +5272,19 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
return (error); return (error);
} }
/*
* Drop the namespace lock for the rest of the function.
*/
spa->spa_load_thread = curthread;
mutex_exit(&spa_namespace_lock);
/* /*
* Retrieve the checkpoint txg if the pool has a checkpoint. * Retrieve the checkpoint txg if the pool has a checkpoint.
*/ */
spa_import_progress_set_notes(spa, "Loading checkpoint txg"); spa_import_progress_set_notes(spa, "Loading checkpoint txg");
error = spa_ld_read_checkpoint_txg(spa); error = spa_ld_read_checkpoint_txg(spa);
if (error != 0) if (error != 0)
return (error); goto fail;
/* /*
* Retrieve the mapping of indirect vdevs. Those vdevs were removed * Retrieve the mapping of indirect vdevs. Those vdevs were removed
@ -5291,7 +5297,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
spa_import_progress_set_notes(spa, "Loading indirect vdev metadata"); spa_import_progress_set_notes(spa, "Loading indirect vdev metadata");
error = spa_ld_open_indirect_vdev_metadata(spa); error = spa_ld_open_indirect_vdev_metadata(spa);
if (error != 0) if (error != 0)
return (error); goto fail;
/* /*
* Retrieve the full list of active features from the MOS and check if * Retrieve the full list of active features from the MOS and check if
@ -5300,7 +5306,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
spa_import_progress_set_notes(spa, "Checking feature flags"); spa_import_progress_set_notes(spa, "Checking feature flags");
error = spa_ld_check_features(spa, &missing_feat_write); error = spa_ld_check_features(spa, &missing_feat_write);
if (error != 0) if (error != 0)
return (error); goto fail;
/* /*
* Load several special directories from the MOS needed by the dsl_pool * Load several special directories from the MOS needed by the dsl_pool
@ -5309,7 +5315,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
spa_import_progress_set_notes(spa, "Loading special MOS directories"); spa_import_progress_set_notes(spa, "Loading special MOS directories");
error = spa_ld_load_special_directories(spa); error = spa_ld_load_special_directories(spa);
if (error != 0) if (error != 0)
return (error); goto fail;
/* /*
* Retrieve pool properties from the MOS. * Retrieve pool properties from the MOS.
@ -5317,7 +5323,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
spa_import_progress_set_notes(spa, "Loading properties"); spa_import_progress_set_notes(spa, "Loading properties");
error = spa_ld_get_props(spa); error = spa_ld_get_props(spa);
if (error != 0) if (error != 0)
return (error); goto fail;
/* /*
* Retrieve the list of auxiliary devices - cache devices and spares - * Retrieve the list of auxiliary devices - cache devices and spares -
@ -5326,7 +5332,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
spa_import_progress_set_notes(spa, "Loading AUX vdevs"); spa_import_progress_set_notes(spa, "Loading AUX vdevs");
error = spa_ld_open_aux_vdevs(spa, type); error = spa_ld_open_aux_vdevs(spa, type);
if (error != 0) if (error != 0)
return (error); goto fail;
/* /*
* Load the metadata for all vdevs. Also check if unopenable devices * Load the metadata for all vdevs. Also check if unopenable devices
@ -5335,17 +5341,17 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
spa_import_progress_set_notes(spa, "Loading vdev metadata"); spa_import_progress_set_notes(spa, "Loading vdev metadata");
error = spa_ld_load_vdev_metadata(spa); error = spa_ld_load_vdev_metadata(spa);
if (error != 0) if (error != 0)
return (error); goto fail;
spa_import_progress_set_notes(spa, "Loading dedup tables"); spa_import_progress_set_notes(spa, "Loading dedup tables");
error = spa_ld_load_dedup_tables(spa); error = spa_ld_load_dedup_tables(spa);
if (error != 0) if (error != 0)
return (error); goto fail;
spa_import_progress_set_notes(spa, "Loading BRT"); spa_import_progress_set_notes(spa, "Loading BRT");
error = spa_ld_load_brt(spa); error = spa_ld_load_brt(spa);
if (error != 0) if (error != 0)
return (error); goto fail;
/* /*
* Verify the logs now to make sure we don't have any unexpected errors * Verify the logs now to make sure we don't have any unexpected errors
@ -5354,7 +5360,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
spa_import_progress_set_notes(spa, "Verifying Log Devices"); spa_import_progress_set_notes(spa, "Verifying Log Devices");
error = spa_ld_verify_logs(spa, type, ereport); error = spa_ld_verify_logs(spa, type, ereport);
if (error != 0) if (error != 0)
return (error); goto fail;
if (missing_feat_write) { if (missing_feat_write) {
ASSERT(spa->spa_load_state == SPA_LOAD_TRYIMPORT); ASSERT(spa->spa_load_state == SPA_LOAD_TRYIMPORT);
@ -5364,8 +5370,9 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
* read-only mode but not read-write mode. We now have enough * read-only mode but not read-write mode. We now have enough
* information and can return to userland. * information and can return to userland.
*/ */
return (spa_vdev_err(spa->spa_root_vdev, VDEV_AUX_UNSUP_FEAT, error = spa_vdev_err(spa->spa_root_vdev, VDEV_AUX_UNSUP_FEAT,
ENOTSUP)); ENOTSUP);
goto fail;
} }
/* /*
@ -5376,7 +5383,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
spa_import_progress_set_notes(spa, "Verifying pool data"); spa_import_progress_set_notes(spa, "Verifying pool data");
error = spa_ld_verify_pool_data(spa); error = spa_ld_verify_pool_data(spa);
if (error != 0) if (error != 0)
return (error); goto fail;
/* /*
* Calculate the deflated space for the pool. This must be done before * Calculate the deflated space for the pool. This must be done before
@ -5501,13 +5508,19 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
spa_config_exit(spa, SCL_CONFIG, FTAG); spa_config_exit(spa, SCL_CONFIG, FTAG);
spa_import_progress_set_notes(spa, "Finished importing"); spa_import_progress_set_notes(spa, "Finished importing");
} }
zio_handle_import_delay(spa, gethrtime() - load_start);
spa_import_progress_remove(spa_guid(spa)); spa_import_progress_remove(spa_guid(spa));
spa_async_request(spa, SPA_ASYNC_L2CACHE_REBUILD); spa_async_request(spa, SPA_ASYNC_L2CACHE_REBUILD);
spa_load_note(spa, "LOADED"); spa_load_note(spa, "LOADED");
fail:
mutex_enter(&spa_namespace_lock);
spa->spa_load_thread = NULL;
cv_broadcast(&spa_namespace_cv);
return (error);
return (0);
} }
static int static int
@ -6757,9 +6770,14 @@ spa_tryimport(nvlist_t *tryconfig)
/* /*
* Create and initialize the spa structure. * Create and initialize the spa structure.
*/ */
char *name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
(void) snprintf(name, MAXPATHLEN, "%s-%llx-%s",
TRYIMPORT_NAME, (u_longlong_t)curthread, poolname);
mutex_enter(&spa_namespace_lock); mutex_enter(&spa_namespace_lock);
spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL); spa = spa_add(name, tryconfig, NULL);
spa_activate(spa, SPA_MODE_READ); spa_activate(spa, SPA_MODE_READ);
kmem_free(name, MAXPATHLEN);
/* /*
* Rewind pool if a max txg was provided. * Rewind pool if a max txg was provided.
@ -6874,6 +6892,7 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
{ {
int error; int error;
spa_t *spa; spa_t *spa;
hrtime_t export_start = gethrtime();
if (oldconfig) if (oldconfig)
*oldconfig = NULL; *oldconfig = NULL;
@ -7018,6 +7037,9 @@ export_spa:
spa->spa_is_exporting = B_FALSE; spa->spa_is_exporting = B_FALSE;
} }
if (new_state == POOL_STATE_EXPORTED)
zio_handle_export_delay(spa, gethrtime() - export_start);
mutex_exit(&spa_namespace_lock); mutex_exit(&spa_namespace_lock);
return (0); return (0);

View File

@ -20,7 +20,7 @@
*/ */
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2019 by Delphix. All rights reserved. * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved.
@ -82,7 +82,8 @@
* - Check if spa_refcount is zero * - Check if spa_refcount is zero
* - Rename a spa_t * - Rename a spa_t
* - add/remove/attach/detach devices * - add/remove/attach/detach devices
* - Held for the duration of create/destroy/import/export * - Held for the duration of create/destroy/export
* - Held at the start and end of import
* *
* It does not need to handle recursion. A create or destroy may * It does not need to handle recursion. A create or destroy may
* reference objects (files or zvols) in other pools, but by * reference objects (files or zvols) in other pools, but by
@ -235,9 +236,9 @@
* locking is, always, based on spa_namespace_lock and spa_config_lock[]. * locking is, always, based on spa_namespace_lock and spa_config_lock[].
*/ */
static avl_tree_t spa_namespace_avl; avl_tree_t spa_namespace_avl;
kmutex_t spa_namespace_lock; kmutex_t spa_namespace_lock;
static kcondvar_t spa_namespace_cv; kcondvar_t spa_namespace_cv;
static const int spa_max_replication_override = SPA_DVAS_PER_BP; static const int spa_max_replication_override = SPA_DVAS_PER_BP;
static kmutex_t spa_spare_lock; static kmutex_t spa_spare_lock;
@ -619,6 +620,7 @@ spa_lookup(const char *name)
ASSERT(MUTEX_HELD(&spa_namespace_lock)); ASSERT(MUTEX_HELD(&spa_namespace_lock));
retry:
(void) strlcpy(search.spa_name, name, sizeof (search.spa_name)); (void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
/* /*
@ -630,6 +632,14 @@ spa_lookup(const char *name)
*cp = '\0'; *cp = '\0';
spa = avl_find(&spa_namespace_avl, &search, &where); spa = avl_find(&spa_namespace_avl, &search, &where);
if (spa == NULL)
return (NULL);
if (spa->spa_load_thread != NULL &&
spa->spa_load_thread != curthread) {
cv_wait(&spa_namespace_cv, &spa_namespace_lock);
goto retry;
}
return (spa); return (spa);
} }
@ -728,6 +738,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
spa_config_lock_init(spa); spa_config_lock_init(spa);
spa_stats_init(spa); spa_stats_init(spa);
ASSERT(MUTEX_HELD(&spa_namespace_lock));
avl_add(&spa_namespace_avl, spa); avl_add(&spa_namespace_avl, spa);
/* /*
@ -826,7 +837,6 @@ spa_remove(spa_t *spa)
nvlist_free(spa->spa_config_splitting); nvlist_free(spa->spa_config_splitting);
avl_remove(&spa_namespace_avl, spa); avl_remove(&spa_namespace_avl, spa);
cv_broadcast(&spa_namespace_cv);
if (spa->spa_root) if (spa->spa_root)
spa_strfree(spa->spa_root); spa_strfree(spa->spa_root);
@ -920,7 +930,8 @@ void
spa_open_ref(spa_t *spa, const void *tag) spa_open_ref(spa_t *spa, const void *tag)
{ {
ASSERT(zfs_refcount_count(&spa->spa_refcount) >= spa->spa_minref || ASSERT(zfs_refcount_count(&spa->spa_refcount) >= spa->spa_minref ||
MUTEX_HELD(&spa_namespace_lock)); MUTEX_HELD(&spa_namespace_lock) ||
spa->spa_load_thread == curthread);
(void) zfs_refcount_add(&spa->spa_refcount, tag); (void) zfs_refcount_add(&spa->spa_refcount, tag);
} }
@ -932,7 +943,8 @@ void
spa_close(spa_t *spa, const void *tag) spa_close(spa_t *spa, const void *tag)
{ {
ASSERT(zfs_refcount_count(&spa->spa_refcount) > spa->spa_minref || ASSERT(zfs_refcount_count(&spa->spa_refcount) > spa->spa_minref ||
MUTEX_HELD(&spa_namespace_lock)); MUTEX_HELD(&spa_namespace_lock) ||
spa->spa_load_thread == curthread);
(void) zfs_refcount_remove(&spa->spa_refcount, tag); (void) zfs_refcount_remove(&spa->spa_refcount, tag);
} }

View File

@ -20,7 +20,7 @@
*/ */
/* /*
* Copyright (c) 2016, 2019 by Delphix. All rights reserved. * Copyright (c) 2016, 2024 by Delphix. All rights reserved.
*/ */
#include <sys/spa.h> #include <sys/spa.h>
@ -775,7 +775,8 @@ vdev_initialize_stop_all(vdev_t *vd, vdev_initializing_state_t tgt_state)
void void
vdev_initialize_restart(vdev_t *vd) vdev_initialize_restart(vdev_t *vd)
{ {
ASSERT(MUTEX_HELD(&spa_namespace_lock)); ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
vd->vdev_spa->spa_load_thread == curthread);
ASSERT(!spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER)); ASSERT(!spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER));
if (vd->vdev_leaf_zap != 0) { if (vd->vdev_leaf_zap != 0) {

View File

@ -23,6 +23,7 @@
* Copyright (c) 2018, Intel Corporation. * Copyright (c) 2018, Intel Corporation.
* Copyright (c) 2020 by Lawrence Livermore National Security, LLC. * Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
* Copyright (c) 2022 Hewlett Packard Enterprise Development LP. * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
* Copyright (c) 2024 by Delphix. All rights reserved.
*/ */
#include <sys/vdev_impl.h> #include <sys/vdev_impl.h>
@ -1071,7 +1072,8 @@ vdev_rebuild_restart_impl(vdev_t *vd)
void void
vdev_rebuild_restart(spa_t *spa) vdev_rebuild_restart(spa_t *spa)
{ {
ASSERT(MUTEX_HELD(&spa_namespace_lock)); ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
spa->spa_load_thread == curthread);
vdev_rebuild_restart_impl(spa->spa_root_vdev); vdev_rebuild_restart_impl(spa->spa_root_vdev);
} }

View File

@ -20,7 +20,7 @@
*/ */
/* /*
* Copyright (c) 2016 by Delphix. All rights reserved. * Copyright (c) 2016, 2024 by Delphix. All rights reserved.
* Copyright (c) 2019 by Lawrence Livermore National Security, LLC. * Copyright (c) 2019 by Lawrence Livermore National Security, LLC.
* Copyright (c) 2021 Hewlett Packard Enterprise Development LP * Copyright (c) 2021 Hewlett Packard Enterprise Development LP
* Copyright 2023 RackTop Systems, Inc. * Copyright 2023 RackTop Systems, Inc.
@ -1148,7 +1148,8 @@ vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state)
void void
vdev_trim_restart(vdev_t *vd) vdev_trim_restart(vdev_t *vd)
{ {
ASSERT(MUTEX_HELD(&spa_namespace_lock)); ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
vd->vdev_spa->spa_load_thread == curthread);
ASSERT(!spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER)); ASSERT(!spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER));
if (vd->vdev_leaf_zap != 0) { if (vd->vdev_leaf_zap != 0) {
@ -1568,8 +1569,8 @@ vdev_autotrim_stop_all(spa_t *spa)
void void
vdev_autotrim_restart(spa_t *spa) vdev_autotrim_restart(spa_t *spa)
{ {
ASSERT(MUTEX_HELD(&spa_namespace_lock)); ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
spa->spa_load_thread == curthread);
if (spa->spa_autotrim) if (spa->spa_autotrim)
vdev_autotrim(spa); vdev_autotrim(spa);
} }

View File

@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2017, Intel Corporation. * Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2024, Klara Inc.
*/ */
/* /*
@ -59,6 +60,7 @@ uint32_t zio_injection_enabled = 0;
typedef struct inject_handler { typedef struct inject_handler {
int zi_id; int zi_id;
spa_t *zi_spa; spa_t *zi_spa;
char *zi_spa_name; /* ZINJECT_DELAY_IMPORT only */
zinject_record_t zi_record; zinject_record_t zi_record;
uint64_t *zi_lanes; uint64_t *zi_lanes;
int zi_next_lane; int zi_next_lane;
@ -703,6 +705,63 @@ zio_handle_io_delay(zio_t *zio)
return (min_target); return (min_target);
} }
static void
zio_handle_pool_delay(spa_t *spa, hrtime_t elapsed, zinject_type_t command)
{
inject_handler_t *handler;
hrtime_t delay = 0;
int id = 0;
rw_enter(&inject_lock, RW_READER);
for (handler = list_head(&inject_handlers);
handler != NULL && handler->zi_record.zi_cmd == command;
handler = list_next(&inject_handlers, handler)) {
ASSERT3P(handler->zi_spa_name, !=, NULL);
if (strcmp(spa_name(spa), handler->zi_spa_name) == 0) {
uint64_t pause =
SEC2NSEC(handler->zi_record.zi_duration);
if (pause > elapsed) {
delay = pause - elapsed;
}
id = handler->zi_id;
break;
}
}
rw_exit(&inject_lock);
if (delay) {
if (command == ZINJECT_DELAY_IMPORT) {
spa_import_progress_set_notes(spa, "injecting %llu "
"sec delay", (u_longlong_t)NSEC2SEC(delay));
}
zfs_sleep_until(gethrtime() + delay);
}
if (id) {
/* all done with this one-shot handler */
zio_clear_fault(id);
}
}
/*
* For testing, inject a delay during an import
*/
void
zio_handle_import_delay(spa_t *spa, hrtime_t elapsed)
{
zio_handle_pool_delay(spa, elapsed, ZINJECT_DELAY_IMPORT);
}
/*
* For testing, inject a delay during an export
*/
void
zio_handle_export_delay(spa_t *spa, hrtime_t elapsed)
{
zio_handle_pool_delay(spa, elapsed, ZINJECT_DELAY_EXPORT);
}
static int static int
zio_calculate_range(const char *pool, zinject_record_t *record) zio_calculate_range(const char *pool, zinject_record_t *record)
{ {
@ -760,6 +819,28 @@ zio_calculate_range(const char *pool, zinject_record_t *record)
return (0); return (0);
} }
static boolean_t
zio_pool_handler_exists(const char *name, zinject_type_t command)
{
boolean_t exists = B_FALSE;
rw_enter(&inject_lock, RW_READER);
for (inject_handler_t *handler = list_head(&inject_handlers);
handler != NULL; handler = list_next(&inject_handlers, handler)) {
if (command != handler->zi_record.zi_cmd)
continue;
const char *pool = (handler->zi_spa_name != NULL) ?
handler->zi_spa_name : spa_name(handler->zi_spa);
if (strcmp(name, pool) == 0) {
exists = B_TRUE;
break;
}
}
rw_exit(&inject_lock);
return (exists);
}
/* /*
* Create a new handler for the given record. We add it to the list, adding * Create a new handler for the given record. We add it to the list, adding
* a reference to the spa_t in the process. We increment zio_injection_enabled, * a reference to the spa_t in the process. We increment zio_injection_enabled,
@ -810,16 +891,42 @@ zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
if (!(flags & ZINJECT_NULL)) { if (!(flags & ZINJECT_NULL)) {
/* /*
* spa_inject_ref() will add an injection reference, which will * Pool delays for import or export don't take an
* prevent the pool from being removed from the namespace while * injection reference on the spa. Instead they
* still allowing it to be unloaded. * rely on matching by name.
*/
if (record->zi_cmd == ZINJECT_DELAY_IMPORT ||
record->zi_cmd == ZINJECT_DELAY_EXPORT) {
if (record->zi_duration <= 0)
return (SET_ERROR(EINVAL));
/*
* Only one import | export delay handler per pool.
*/
if (zio_pool_handler_exists(name, record->zi_cmd))
return (SET_ERROR(EEXIST));
mutex_enter(&spa_namespace_lock);
boolean_t has_spa = spa_lookup(name) != NULL;
mutex_exit(&spa_namespace_lock);
if (record->zi_cmd == ZINJECT_DELAY_IMPORT && has_spa)
return (SET_ERROR(EEXIST));
if (record->zi_cmd == ZINJECT_DELAY_EXPORT && !has_spa)
return (SET_ERROR(ENOENT));
spa = NULL;
} else {
/*
* spa_inject_ref() will add an injection reference,
* which will prevent the pool from being removed
* from the namespace while still allowing it to be
* unloaded.
*/ */
if ((spa = spa_inject_addref(name)) == NULL) if ((spa = spa_inject_addref(name)) == NULL)
return (SET_ERROR(ENOENT)); return (SET_ERROR(ENOENT));
}
handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP); handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP);
handler->zi_spa = spa; /* note: can be NULL */
handler->zi_spa = spa;
handler->zi_record = *record; handler->zi_record = *record;
if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) { if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) {
@ -832,6 +939,11 @@ zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
handler->zi_next_lane = 0; handler->zi_next_lane = 0;
} }
if (handler->zi_spa == NULL)
handler->zi_spa_name = spa_strdup(name);
else
handler->zi_spa_name = NULL;
rw_enter(&inject_lock, RW_WRITER); rw_enter(&inject_lock, RW_WRITER);
/* /*
@ -891,7 +1003,11 @@ zio_inject_list_next(int *id, char *name, size_t buflen,
if (handler) { if (handler) {
*record = handler->zi_record; *record = handler->zi_record;
*id = handler->zi_id; *id = handler->zi_id;
ASSERT(handler->zi_spa || handler->zi_spa_name);
if (handler->zi_spa != NULL)
(void) strlcpy(name, spa_name(handler->zi_spa), buflen); (void) strlcpy(name, spa_name(handler->zi_spa), buflen);
else
(void) strlcpy(name, handler->zi_spa_name, buflen);
ret = 0; ret = 0;
} else { } else {
ret = SET_ERROR(ENOENT); ret = SET_ERROR(ENOENT);
@ -941,6 +1057,10 @@ zio_clear_fault(int id)
ASSERT3P(handler->zi_lanes, ==, NULL); ASSERT3P(handler->zi_lanes, ==, NULL);
} }
if (handler->zi_spa_name != NULL)
spa_strfree(handler->zi_spa_name);
if (handler->zi_spa != NULL)
spa_inject_delref(handler->zi_spa); spa_inject_delref(handler->zi_spa);
kmem_free(handler, sizeof (inject_handler_t)); kmem_free(handler, sizeof (inject_handler_t));
atomic_dec_32(&zio_injection_enabled); atomic_dec_32(&zio_injection_enabled);

View File

@ -466,7 +466,8 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos',
'import_paths_changed', 'import_paths_changed',
'import_rewind_config_changed', 'import_rewind_config_changed',
'import_rewind_device_replaced', 'import_rewind_device_replaced',
'zpool_import_status'] 'zpool_import_status', 'zpool_import_parallel_pos',
'zpool_import_parallel_neg', 'zpool_import_parallel_admin']
tags = ['functional', 'cli_root', 'zpool_import'] tags = ['functional', 'cli_root', 'zpool_import']
timeout = 1200 timeout = 1200

View File

@ -1144,6 +1144,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_import/zpool_import_missing_003_pos.ksh \ functional/cli_root/zpool_import/zpool_import_missing_003_pos.ksh \
functional/cli_root/zpool_import/zpool_import_rename_001_pos.ksh \ functional/cli_root/zpool_import/zpool_import_rename_001_pos.ksh \
functional/cli_root/zpool_import/zpool_import_status.ksh \ functional/cli_root/zpool_import/zpool_import_status.ksh \
functional/cli_root/zpool_import/zpool_import_parallel_admin.ksh \
functional/cli_root/zpool_import/zpool_import_parallel_neg.ksh \
functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh \
functional/cli_root/zpool_initialize/cleanup.ksh \ functional/cli_root/zpool_initialize/cleanup.ksh \
functional/cli_root/zpool_initialize/zpool_initialize_attach_detach_add_remove.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_attach_detach_add_remove.ksh \
functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh \

View File

@ -0,0 +1,165 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
# Copyright (c) 2023 Klara, Inc.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
#
# DESCRIPTION:
# Verify that admin commands to different pool are not blocked by import
#
# STRATEGY:
# 1. Create 2 pools
# 2. Export one of the pools
# 4. Import the pool with an injected delay
# 5. Execute some admin commands against both pools
# 6. Verify that the admin commands to the non-imported pool don't stall
#
verify_runnable "global"
function cleanup
{
zinject -c all
destroy_pool $TESTPOOL1
destroy_pool $TESTPOOL2
}
function pool_import
{
typeset dir=$1
typeset pool=$2
SECONDS=0
errmsg=$(zpool import -d $dir -f $pool 2>&1 > /dev/null)
if [[ $? -eq 0 ]]; then
echo ${pool}: imported in $SECONDS secs
echo $SECONDS > ${DEVICE_DIR}/${pool}-import
else
echo ${pool}: import failed $errmsg in $SECONDS secs
fi
}
function pool_add_device
{
typeset pool=$1
typeset device=$2
typeset devtype=$3
SECONDS=0
errmsg=$(zpool add $pool $devtype $device 2>&1 > /dev/null)
if [[ $? -eq 0 ]]; then
echo ${pool}: added $devtype vdev in $SECONDS secs
echo $SECONDS > ${DEVICE_DIR}/${pool}-add
else
echo ${pool}: add $devtype vdev failed ${errmsg}, in $SECONDS secs
fi
}
function pool_stats
{
typeset stats=$1
typeset pool=$2
SECONDS=0
errmsg=$(zpool $stats $pool 2>&1 > /dev/null)
if [[ $? -eq 0 ]]; then
echo ${pool}: $stats in $SECONDS secs
echo $SECONDS > ${DEVICE_DIR}/${pool}-${stats}
else
echo ${pool}: $stats failed ${errmsg}, in $SECONDS secs
fi
}
function pool_create
{
typeset pool=$1
typeset device=$2
SECONDS=0
errmsg=$(zpool create $pool $device 2>&1 > /dev/null)
if [[ $? -eq 0 ]]; then
echo ${pool}: created in $SECONDS secs
echo $SECONDS > ${DEVICE_DIR}/${pool}-create
else
echo ${pool}: create failed ${errmsg}, in $SECONDS secs
fi
}
log_assert "Simple admin commands to different pool not blocked by import"
log_onexit cleanup
#
# create two pools and export one
#
log_must zpool create $TESTPOOL1 $VDEV0
log_must zpool export $TESTPOOL1
log_must zpool create $TESTPOOL2 $VDEV1
#
# import pool asyncronously with an injected 10 second delay
#
log_must zinject -P import -s 10 $TESTPOOL1
pool_import $DEVICE_DIR $TESTPOOL1 &
sleep 2
#
# run some admin commands on the pools while the import is in progress
#
pool_add_device $TESTPOOL1 $VDEV2 "log" &
pool_add_device $TESTPOOL2 $VDEV3 "cache" &
pool_stats "status" $TESTPOOL1 &
pool_stats "status" $TESTPOOL2 &
pool_stats "list" $TESTPOOL1 &
pool_stats "list" $TESTPOOL2 &
pool_create $TESTPOOL1 $VDEV4 &
wait
log_must zpool sync $TESTPOOL1 $TESTPOOL2
zpool history $TESTPOOL1
zpool history $TESTPOOL2
log_must test "5" -lt $(<${DEVICE_DIR}/${TESTPOOL1}-import)
#
# verify that commands to second pool did not wait for import to finish
#
log_must test "2" -gt $(<${DEVICE_DIR}/${TESTPOOL2}-status)
log_must test "2" -gt $(<${DEVICE_DIR}/${TESTPOOL2}-list)
log_must test "2" -gt $(<${DEVICE_DIR}/${TESTPOOL2}-add)
[[ -e ${DEVICE_DIR}/${TESTPOOL1}-create ]] && log_fail "unexpected pool create"
log_pass "Simple admin commands to different pool not blocked by import"

View File

@ -0,0 +1,130 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
# Copyright (c) 2023 Klara, Inc.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
#
# DESCRIPTION:
# Verify that pool imports by same name only have one winner
#
# STRATEGY:
# 1. Create 4 single disk pools with the same name
# 2. Generate some ZIL records (for a longer import)
# 3. Export the pools
# 4. Import the pools in parallel
# 5. Repeat with using matching guids
#
verify_runnable "global"
POOLNAME="import_pool"
DEV_DIR_PREFIX="$DEVICE_DIR/$POOLNAME"
VDEVSIZE=$((512 * 1024 * 1024))
log_assert "parallel pool imports by same name only have one winner"
# each pool has its own device directory
for i in {0..3}; do
log_must mkdir -p ${DEV_DIR_PREFIX}$i
log_must truncate -s $VDEVSIZE ${DEV_DIR_PREFIX}$i/${DEVICE_FILE}$i
done
function cleanup
{
zinject -c all
log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 0
log_must set_tunable64 METASLAB_DEBUG_LOAD 0
destroy_pool $POOLNAME
log_must rm -rf $DEV_DIR_PREFIX*
}
log_onexit cleanup
log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 1
log_must set_tunable64 METASLAB_DEBUG_LOAD 1
function import_pool
{
typeset dir=$1
typeset pool=$2
typeset newname=$3
SECONDS=0
errmsg=$(zpool import -N -d $dir -f $pool $newname 2>&1 > /dev/null)
if [[ $? -eq 0 ]]; then
touch $dir/imported
echo "imported $pool in $SECONDS secs"
elif [[ $errmsg == *"cannot import"* ]]; then
echo "pool import failed: $errmsg, waited $SECONDS secs"
touch $dir/failed
fi
}
#
# create four exported pools with the same name
#
for i in {0..3}; do
log_must zpool create $POOLNAME ${DEV_DIR_PREFIX}$i/${DEVICE_FILE}$i
log_must zpool export $POOLNAME
done
log_must zinject -P import -s 10 $POOLNAME
#
# import the pools in parallel, expecting only one winner
#
for i in {0..3}; do
import_pool ${DEV_DIR_PREFIX}$i $POOLNAME &
done
wait
# check the result of background imports
typeset num_imports=0
typeset num_cannot=0
for i in {0..3}; do
if [[ -f ${DEV_DIR_PREFIX}$i/imported ]]; then
((num_imports += 1))
fi
if [[ -f ${DEV_DIR_PREFIX}$i/failed ]]; then
((num_cannot += 1))
loser=$i
fi
done
[[ $num_imports -eq "1" ]] || log_fail "expecting an import"
[[ $num_cannot -eq "3" ]] || \
log_fail "expecting 3 pool exists errors, found $num_cannot"
log_note "$num_imports imported and $num_cannot failed (expected)"
log_pass "parallel pool imports by same name only have one winner"

View File

@ -0,0 +1,137 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
# Copyright (c) 2023 Klara, Inc.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
# test uses 8 vdevs
export MAX_NUM=8
#
# DESCRIPTION:
# Verify that pool imports can occur in parallel
#
# STRATEGY:
# 1. Create 8 pools
# 2. Generate some ZIL records
# 3. Export the pools
# 4. Import half of the pools synchronously to baseline sequential cost
# 5. Import the other half asynchronously to demonstrate parallel savings
# 6. Export 4 pools
# 7. Test zpool import -a
#
verify_runnable "global"
#
# override the minimum sized vdevs
#
VDEVSIZE=$((512 * 1024 * 1024))
increase_device_sizes $VDEVSIZE
POOLNAME="import_pool"
function cleanup
{
zinject -c all
log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 0
log_must set_tunable64 METASLAB_DEBUG_LOAD 0
for i in {0..$(($MAX_NUM - 1))}; do
destroy_pool $POOLNAME-$i
done
# reset the devices
increase_device_sizes 0
increase_device_sizes $FILE_SIZE
}
log_assert "Pool imports can occur in parallel"
log_onexit cleanup
log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 1
log_must set_tunable64 METASLAB_DEBUG_LOAD 1
#
# create some exported pools with import delay injectors
#
for i in {0..$(($MAX_NUM - 1))}; do
log_must zpool create $POOLNAME-$i $DEVICE_DIR/${DEVICE_FILE}$i
log_must zpool export $POOLNAME-$i
log_must zinject -P import -s 12 $POOLNAME-$i
done
wait
#
# import half of the pools synchronously
#
SECONDS=0
for i in {0..3}; do
log_must zpool import -d $DEVICE_DIR -f $POOLNAME-$i
done
sequential_time=$SECONDS
log_note "sequentially imported 4 pools in $sequential_time seconds"
#
# import half of the pools in parallel
#
SECONDS=0
for i in {4..7}; do
log_must zpool import -d $DEVICE_DIR -f $POOLNAME-$i &
done
wait
parallel_time=$SECONDS
log_note "asyncronously imported 4 pools in $parallel_time seconds"
log_must test $parallel_time -lt $(($sequential_time / 3))
#
# export pools with import delay injectors
#
for i in {4..7}; do
log_must zpool export $POOLNAME-$i
log_must zinject -P import -s 12 $POOLNAME-$i
done
wait
#
# now test zpool import -a
#
SECONDS=0
log_must zpool import -a -d $DEVICE_DIR -f
parallel_time=$SECONDS
log_note "asyncronously imported 4 pools in $parallel_time seconds"
log_must test $parallel_time -lt $(($sequential_time / 3))
log_pass "Pool imports occur in parallel"