commit
483b71247b
|
@ -37,7 +37,7 @@ import re
|
|||
|
||||
bhdr = ["pool", "objset", "object", "level", "blkid", "offset", "dbsize"]
|
||||
bxhdr = ["pool", "objset", "object", "level", "blkid", "offset", "dbsize",
|
||||
"meta", "state", "dbholds", "dbc", "list", "atype", "flags",
|
||||
"usize", "meta", "state", "dbholds", "dbc", "list", "atype", "flags",
|
||||
"count", "asize", "access", "mru", "gmru", "mfu", "gmfu", "l2",
|
||||
"l2_dattr", "l2_asize", "l2_comp", "aholds", "dtype", "btype",
|
||||
"data_bs", "meta_bs", "bsize", "lvls", "dholds", "blocks", "dsize"]
|
||||
|
@ -47,17 +47,17 @@ dhdr = ["pool", "objset", "object", "dtype", "cached"]
|
|||
dxhdr = ["pool", "objset", "object", "dtype", "btype", "data_bs", "meta_bs",
|
||||
"bsize", "lvls", "dholds", "blocks", "dsize", "cached", "direct",
|
||||
"indirect", "bonus", "spill"]
|
||||
dincompat = ["level", "blkid", "offset", "dbsize", "meta", "state", "dbholds",
|
||||
"dbc", "list", "atype", "flags", "count", "asize", "access",
|
||||
"mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr", "l2_asize",
|
||||
"l2_comp", "aholds"]
|
||||
dincompat = ["level", "blkid", "offset", "dbsize", "usize", "meta", "state",
|
||||
"dbholds", "dbc", "list", "atype", "flags", "count", "asize",
|
||||
"access", "mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr",
|
||||
"l2_asize", "l2_comp", "aholds"]
|
||||
|
||||
thdr = ["pool", "objset", "dtype", "cached"]
|
||||
txhdr = ["pool", "objset", "dtype", "cached", "direct", "indirect",
|
||||
"bonus", "spill"]
|
||||
tincompat = ["object", "level", "blkid", "offset", "dbsize", "meta", "state",
|
||||
"dbc", "dbholds", "list", "atype", "flags", "count", "asize",
|
||||
"access", "mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr",
|
||||
tincompat = ["object", "level", "blkid", "offset", "dbsize", "usize", "meta",
|
||||
"state", "dbc", "dbholds", "list", "atype", "flags", "count",
|
||||
"asize", "access", "mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr",
|
||||
"l2_asize", "l2_comp", "aholds", "btype", "data_bs", "meta_bs",
|
||||
"bsize", "lvls", "dholds", "blocks", "dsize"]
|
||||
|
||||
|
@ -70,6 +70,7 @@ cols = {
|
|||
"blkid": [8, -1, "block number of buffer"],
|
||||
"offset": [12, 1024, "offset in object of buffer"],
|
||||
"dbsize": [7, 1024, "size of buffer"],
|
||||
"usize": [7, 1024, "size of attached user data"],
|
||||
"meta": [4, -1, "is this buffer metadata?"],
|
||||
"state": [5, -1, "state of buffer (read, cached, etc)"],
|
||||
"dbholds": [7, 1000, "number of holds on buffer"],
|
||||
|
@ -399,6 +400,7 @@ def update_dict(d, k, line, labels):
|
|||
key = line[labels[k]]
|
||||
|
||||
dbsize = int(line[labels['dbsize']])
|
||||
usize = int(line[labels['usize']])
|
||||
blkid = int(line[labels['blkid']])
|
||||
level = int(line[labels['level']])
|
||||
|
||||
|
@ -416,7 +418,7 @@ def update_dict(d, k, line, labels):
|
|||
d[pool][objset][key]['indirect'] = 0
|
||||
d[pool][objset][key]['spill'] = 0
|
||||
|
||||
d[pool][objset][key]['cached'] += dbsize
|
||||
d[pool][objset][key]['cached'] += dbsize + usize
|
||||
|
||||
if blkid == -1:
|
||||
d[pool][objset][key]['bonus'] += dbsize
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
* Copyright (c) 2023-2024, Klara Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
|
@ -276,6 +277,11 @@ usage(void)
|
|||
"\t\tcreate 3 lanes on the device; one lane with a latency\n"
|
||||
"\t\tof 10 ms and two lanes with a 25 ms latency.\n"
|
||||
"\n"
|
||||
"\tzinject -P import|export -s <seconds> pool\n"
|
||||
"\t\tAdd an artificial delay to a future pool import or export,\n"
|
||||
"\t\tsuch that the operation takes a minimum of supplied seconds\n"
|
||||
"\t\tto complete.\n"
|
||||
"\n"
|
||||
"\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
|
||||
"\t\tCause the pool to stop writing blocks yet not\n"
|
||||
"\t\treport errors for a duration. Simulates buggy hardware\n"
|
||||
|
@ -358,8 +364,10 @@ print_data_handler(int id, const char *pool, zinject_record_t *record,
|
|||
{
|
||||
int *count = data;
|
||||
|
||||
if (record->zi_guid != 0 || record->zi_func[0] != '\0')
|
||||
if (record->zi_guid != 0 || record->zi_func[0] != '\0' ||
|
||||
record->zi_duration != 0) {
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (*count == 0) {
|
||||
(void) printf("%3s %-15s %-6s %-6s %-8s %3s %-4s "
|
||||
|
@ -462,6 +470,33 @@ print_panic_handler(int id, const char *pool, zinject_record_t *record,
|
|||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
print_pool_delay_handler(int id, const char *pool, zinject_record_t *record,
|
||||
void *data)
|
||||
{
|
||||
int *count = data;
|
||||
|
||||
if (record->zi_cmd != ZINJECT_DELAY_IMPORT &&
|
||||
record->zi_cmd != ZINJECT_DELAY_EXPORT) {
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (*count == 0) {
|
||||
(void) printf("%3s %-19s %-11s %s\n",
|
||||
"ID", "POOL", "DELAY (sec)", "COMMAND");
|
||||
(void) printf("--- ------------------- -----------"
|
||||
" -------\n");
|
||||
}
|
||||
|
||||
*count += 1;
|
||||
|
||||
(void) printf("%3d %-19s %-11llu %s\n",
|
||||
id, pool, (u_longlong_t)record->zi_duration,
|
||||
record->zi_cmd == ZINJECT_DELAY_IMPORT ? "import": "export");
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Print all registered error handlers. Returns the number of handlers
|
||||
* registered.
|
||||
|
@ -492,6 +527,13 @@ print_all_handlers(void)
|
|||
count = 0;
|
||||
}
|
||||
|
||||
(void) iter_handlers(print_pool_delay_handler, &count);
|
||||
if (count > 0) {
|
||||
total += count;
|
||||
(void) printf("\n");
|
||||
count = 0;
|
||||
}
|
||||
|
||||
(void) iter_handlers(print_panic_handler, &count);
|
||||
|
||||
return (count + total);
|
||||
|
@ -564,9 +606,27 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
|
|||
zc.zc_guid = flags;
|
||||
|
||||
if (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
|
||||
(void) fprintf(stderr, "failed to add handler: %s\n",
|
||||
errno == EDOM ? "block level exceeds max level of object" :
|
||||
strerror(errno));
|
||||
const char *errmsg = strerror(errno);
|
||||
|
||||
switch (errno) {
|
||||
case EDOM:
|
||||
errmsg = "block level exceeds max level of object";
|
||||
break;
|
||||
case EEXIST:
|
||||
if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
|
||||
errmsg = "pool already imported";
|
||||
if (record->zi_cmd == ZINJECT_DELAY_EXPORT)
|
||||
errmsg = "a handler already exists";
|
||||
break;
|
||||
case ENOENT:
|
||||
/* import delay injector running on older zfs module */
|
||||
if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
|
||||
errmsg = "import delay injector not supported";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
(void) fprintf(stderr, "failed to add handler: %s\n", errmsg);
|
||||
return (1);
|
||||
}
|
||||
|
||||
|
@ -591,6 +651,9 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
|
|||
} else if (record->zi_duration < 0) {
|
||||
(void) printf(" txgs: %lld \n",
|
||||
(u_longlong_t)-record->zi_duration);
|
||||
} else if (record->zi_timer > 0) {
|
||||
(void) printf(" timer: %lld ms\n",
|
||||
(u_longlong_t)NSEC2MSEC(record->zi_timer));
|
||||
} else {
|
||||
(void) printf("objset: %llu\n",
|
||||
(u_longlong_t)record->zi_objset);
|
||||
|
@ -789,7 +852,7 @@ main(int argc, char **argv)
|
|||
}
|
||||
|
||||
while ((c = getopt(argc, argv,
|
||||
":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) {
|
||||
":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) {
|
||||
switch (c) {
|
||||
case 'a':
|
||||
flags |= ZINJECT_FLUSH_ARC;
|
||||
|
@ -919,6 +982,19 @@ main(int argc, char **argv)
|
|||
sizeof (record.zi_func));
|
||||
record.zi_cmd = ZINJECT_PANIC;
|
||||
break;
|
||||
case 'P':
|
||||
if (strcasecmp(optarg, "import") == 0) {
|
||||
record.zi_cmd = ZINJECT_DELAY_IMPORT;
|
||||
} else if (strcasecmp(optarg, "export") == 0) {
|
||||
record.zi_cmd = ZINJECT_DELAY_EXPORT;
|
||||
} else {
|
||||
(void) fprintf(stderr, "invalid command '%s': "
|
||||
"must be 'import' or 'export'\n", optarg);
|
||||
usage();
|
||||
libzfs_fini(g_zfs);
|
||||
return (1);
|
||||
}
|
||||
break;
|
||||
case 'q':
|
||||
quiet = 1;
|
||||
break;
|
||||
|
@ -998,7 +1074,7 @@ main(int argc, char **argv)
|
|||
argc -= optind;
|
||||
argv += optind;
|
||||
|
||||
if (record.zi_duration != 0)
|
||||
if (record.zi_duration != 0 && record.zi_cmd == 0)
|
||||
record.zi_cmd = ZINJECT_IGNORED_WRITES;
|
||||
|
||||
if (cancel != NULL) {
|
||||
|
@ -1128,8 +1204,8 @@ main(int argc, char **argv)
|
|||
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
|
||||
level != 0 || device != NULL || record.zi_freq > 0 ||
|
||||
dvas != 0) {
|
||||
(void) fprintf(stderr, "panic (-p) incompatible with "
|
||||
"other options\n");
|
||||
(void) fprintf(stderr, "%s incompatible with other "
|
||||
"options\n", "import|export delay (-P)");
|
||||
usage();
|
||||
libzfs_fini(g_zfs);
|
||||
return (2);
|
||||
|
@ -1147,6 +1223,28 @@ main(int argc, char **argv)
|
|||
if (argv[1] != NULL)
|
||||
record.zi_type = atoi(argv[1]);
|
||||
dataset[0] = '\0';
|
||||
} else if (record.zi_cmd == ZINJECT_DELAY_IMPORT ||
|
||||
record.zi_cmd == ZINJECT_DELAY_EXPORT) {
|
||||
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
|
||||
level != 0 || device != NULL || record.zi_freq > 0 ||
|
||||
dvas != 0) {
|
||||
(void) fprintf(stderr, "%s incompatible with other "
|
||||
"options\n", "import|export delay (-P)");
|
||||
usage();
|
||||
libzfs_fini(g_zfs);
|
||||
return (2);
|
||||
}
|
||||
|
||||
if (argc != 1 || record.zi_duration <= 0) {
|
||||
(void) fprintf(stderr, "import|export delay (-P) "
|
||||
"injection requires a duration (-s) and a single "
|
||||
"pool name\n");
|
||||
usage();
|
||||
libzfs_fini(g_zfs);
|
||||
return (2);
|
||||
}
|
||||
|
||||
(void) strlcpy(pool, argv[0], sizeof (pool));
|
||||
} else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
|
||||
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
|
||||
level != 0 || record.zi_freq > 0 || dvas != 0) {
|
||||
|
|
|
@ -50,6 +50,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <thread_pool.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <pwd.h>
|
||||
|
@ -1848,10 +1849,19 @@ zpool_do_destroy(int argc, char **argv)
|
|||
}
|
||||
|
||||
typedef struct export_cbdata {
|
||||
tpool_t *tpool;
|
||||
pthread_mutex_t mnttab_lock;
|
||||
boolean_t force;
|
||||
boolean_t hardforce;
|
||||
int retval;
|
||||
} export_cbdata_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
char *aea_poolname;
|
||||
export_cbdata_t *aea_cbdata;
|
||||
} async_export_args_t;
|
||||
|
||||
/*
|
||||
* Export one pool
|
||||
*/
|
||||
|
@ -1860,11 +1870,20 @@ zpool_export_one(zpool_handle_t *zhp, void *data)
|
|||
{
|
||||
export_cbdata_t *cb = data;
|
||||
|
||||
if (zpool_disable_datasets(zhp, cb->force, cb->hardforce) != 0)
|
||||
return (1);
|
||||
/*
|
||||
* zpool_disable_datasets() is not thread-safe for mnttab access.
|
||||
* So we serialize access here for 'zpool export -a' parallel case.
|
||||
*/
|
||||
if (cb->tpool != NULL)
|
||||
pthread_mutex_lock(&cb->mnttab_lock);
|
||||
|
||||
/* The history must be logged as part of the export */
|
||||
log_history = B_FALSE;
|
||||
int retval = zpool_disable_datasets(zhp, cb->force, cb->hardforce);
|
||||
|
||||
if (cb->tpool != NULL)
|
||||
pthread_mutex_unlock(&cb->mnttab_lock);
|
||||
|
||||
if (retval)
|
||||
return (1);
|
||||
|
||||
if (cb->hardforce) {
|
||||
if (zpool_export_force(zhp, history_str) != 0)
|
||||
|
@ -1876,6 +1895,48 @@ zpool_export_one(zpool_handle_t *zhp, void *data)
|
|||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Asynchronous export request
|
||||
*/
|
||||
static void
|
||||
zpool_export_task(void *arg)
|
||||
{
|
||||
async_export_args_t *aea = arg;
|
||||
|
||||
zpool_handle_t *zhp = zpool_open(g_zfs, aea->aea_poolname);
|
||||
if (zhp != NULL) {
|
||||
int ret = zpool_export_one(zhp, aea->aea_cbdata);
|
||||
if (ret != 0)
|
||||
aea->aea_cbdata->retval = ret;
|
||||
zpool_close(zhp);
|
||||
} else {
|
||||
aea->aea_cbdata->retval = 1;
|
||||
}
|
||||
|
||||
free(aea->aea_poolname);
|
||||
free(aea);
|
||||
}
|
||||
|
||||
/*
|
||||
* Process an export request in parallel
|
||||
*/
|
||||
static int
|
||||
zpool_export_one_async(zpool_handle_t *zhp, void *data)
|
||||
{
|
||||
tpool_t *tpool = ((export_cbdata_t *)data)->tpool;
|
||||
async_export_args_t *aea = safe_malloc(sizeof (async_export_args_t));
|
||||
|
||||
/* save pool name since zhp will go out of scope */
|
||||
aea->aea_poolname = strdup(zpool_get_name(zhp));
|
||||
aea->aea_cbdata = data;
|
||||
|
||||
/* ship off actual export to another thread */
|
||||
if (tpool_dispatch(tpool, zpool_export_task, (void *)aea) != 0)
|
||||
return (errno); /* unlikely */
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* zpool export [-f] <pool> ...
|
||||
*
|
||||
|
@ -1919,17 +1980,33 @@ zpool_do_export(int argc, char **argv)
|
|||
|
||||
cb.force = force;
|
||||
cb.hardforce = hardforce;
|
||||
cb.tpool = NULL;
|
||||
cb.retval = 0;
|
||||
argc -= optind;
|
||||
argv += optind;
|
||||
|
||||
/* The history will be logged as part of the export itself */
|
||||
log_history = B_FALSE;
|
||||
|
||||
if (do_all) {
|
||||
if (argc != 0) {
|
||||
(void) fprintf(stderr, gettext("too many arguments\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
|
||||
return (for_each_pool(argc, argv, B_TRUE, NULL,
|
||||
B_FALSE, zpool_export_one, &cb));
|
||||
cb.tpool = tpool_create(1, 5 * sysconf(_SC_NPROCESSORS_ONLN),
|
||||
0, NULL);
|
||||
pthread_mutex_init(&cb.mnttab_lock, NULL);
|
||||
|
||||
/* Asynchronously call zpool_export_one using thread pool */
|
||||
ret = for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE,
|
||||
zpool_export_one_async, &cb);
|
||||
|
||||
tpool_wait(cb.tpool);
|
||||
tpool_destroy(cb.tpool);
|
||||
(void) pthread_mutex_destroy(&cb.mnttab_lock);
|
||||
|
||||
return (ret | cb.retval);
|
||||
}
|
||||
|
||||
/* check arguments */
|
||||
|
@ -3068,12 +3145,21 @@ zfs_force_import_required(nvlist_t *config)
|
|||
nvlist_t *nvinfo;
|
||||
|
||||
state = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE);
|
||||
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
|
||||
nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO);
|
||||
|
||||
/*
|
||||
* The hostid on LOAD_INFO comes from the MOS label via
|
||||
* spa_tryimport(). If its not there then we're likely talking to an
|
||||
* older kernel, so use the top one, which will be from the label
|
||||
* discovered in zpool_find_import(), or if a cachefile is in use, the
|
||||
* local hostid.
|
||||
*/
|
||||
if (nvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_HOSTID, &hostid) != 0)
|
||||
nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
|
||||
|
||||
if (state != POOL_STATE_EXPORTED && hostid != get_system_hostid())
|
||||
return (B_TRUE);
|
||||
|
||||
nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO);
|
||||
if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_STATE)) {
|
||||
mmp_state_t mmp_state = fnvlist_lookup_uint64(nvinfo,
|
||||
ZPOOL_CONFIG_MMP_STATE);
|
||||
|
@ -3143,7 +3229,10 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
|
|||
uint64_t timestamp = 0;
|
||||
uint64_t hostid = 0;
|
||||
|
||||
if (nvlist_exists(config, ZPOOL_CONFIG_HOSTNAME))
|
||||
if (nvlist_exists(nvinfo, ZPOOL_CONFIG_HOSTNAME))
|
||||
hostname = fnvlist_lookup_string(nvinfo,
|
||||
ZPOOL_CONFIG_HOSTNAME);
|
||||
else if (nvlist_exists(config, ZPOOL_CONFIG_HOSTNAME))
|
||||
hostname = fnvlist_lookup_string(config,
|
||||
ZPOOL_CONFIG_HOSTNAME);
|
||||
|
||||
|
@ -3151,7 +3240,10 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
|
|||
timestamp = fnvlist_lookup_uint64(config,
|
||||
ZPOOL_CONFIG_TIMESTAMP);
|
||||
|
||||
if (nvlist_exists(config, ZPOOL_CONFIG_HOSTID))
|
||||
if (nvlist_exists(nvinfo, ZPOOL_CONFIG_HOSTID))
|
||||
hostid = fnvlist_lookup_uint64(nvinfo,
|
||||
ZPOOL_CONFIG_HOSTID);
|
||||
else if (nvlist_exists(config, ZPOOL_CONFIG_HOSTID))
|
||||
hostid = fnvlist_lookup_uint64(config,
|
||||
ZPOOL_CONFIG_HOSTID);
|
||||
|
||||
|
@ -3196,15 +3288,40 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
|
|||
return (ret);
|
||||
}
|
||||
|
||||
typedef struct import_parameters {
|
||||
nvlist_t *ip_config;
|
||||
const char *ip_mntopts;
|
||||
nvlist_t *ip_props;
|
||||
int ip_flags;
|
||||
int *ip_err;
|
||||
} import_parameters_t;
|
||||
|
||||
static void
|
||||
do_import_task(void *arg)
|
||||
{
|
||||
import_parameters_t *ip = arg;
|
||||
*ip->ip_err |= do_import(ip->ip_config, NULL, ip->ip_mntopts,
|
||||
ip->ip_props, ip->ip_flags);
|
||||
free(ip);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
|
||||
char *orig_name, char *new_name,
|
||||
boolean_t do_destroyed, boolean_t pool_specified, boolean_t do_all,
|
||||
importargs_t *import)
|
||||
char *orig_name, char *new_name, importargs_t *import)
|
||||
{
|
||||
nvlist_t *config = NULL;
|
||||
nvlist_t *found_config = NULL;
|
||||
uint64_t pool_state;
|
||||
boolean_t pool_specified = (import->poolname != NULL ||
|
||||
import->guid != 0);
|
||||
|
||||
|
||||
tpool_t *tp = NULL;
|
||||
if (import->do_all) {
|
||||
tp = tpool_create(1, 5 * sysconf(_SC_NPROCESSORS_ONLN),
|
||||
0, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* At this point we have a list of import candidate configs. Even if
|
||||
|
@ -3221,9 +3338,11 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
|
|||
|
||||
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
|
||||
&pool_state) == 0);
|
||||
if (!do_destroyed && pool_state == POOL_STATE_DESTROYED)
|
||||
if (!import->do_destroyed &&
|
||||
pool_state == POOL_STATE_DESTROYED)
|
||||
continue;
|
||||
if (do_destroyed && pool_state != POOL_STATE_DESTROYED)
|
||||
if (import->do_destroyed &&
|
||||
pool_state != POOL_STATE_DESTROYED)
|
||||
continue;
|
||||
|
||||
verify(nvlist_add_nvlist(config, ZPOOL_LOAD_POLICY,
|
||||
|
@ -3232,12 +3351,21 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
|
|||
if (!pool_specified) {
|
||||
if (first)
|
||||
first = B_FALSE;
|
||||
else if (!do_all)
|
||||
else if (!import->do_all)
|
||||
(void) printf("\n");
|
||||
|
||||
if (do_all) {
|
||||
err |= do_import(config, NULL, mntopts,
|
||||
props, flags);
|
||||
if (import->do_all) {
|
||||
import_parameters_t *ip = safe_malloc(
|
||||
sizeof (import_parameters_t));
|
||||
|
||||
ip->ip_config = config;
|
||||
ip->ip_mntopts = mntopts;
|
||||
ip->ip_props = props;
|
||||
ip->ip_flags = flags;
|
||||
ip->ip_err = &err;
|
||||
|
||||
(void) tpool_dispatch(tp, do_import_task,
|
||||
(void *)ip);
|
||||
} else {
|
||||
/*
|
||||
* If we're importing from cachefile, then
|
||||
|
@ -3285,6 +3413,10 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
|
|||
found_config = config;
|
||||
}
|
||||
}
|
||||
if (import->do_all) {
|
||||
tpool_wait(tp);
|
||||
tpool_destroy(tp);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we were searching for a specific pool, verify that we found a
|
||||
|
@ -3514,7 +3646,6 @@ zpool_do_import(int argc, char **argv)
|
|||
boolean_t xtreme_rewind = B_FALSE;
|
||||
boolean_t do_scan = B_FALSE;
|
||||
boolean_t pool_exists = B_FALSE;
|
||||
boolean_t pool_specified = B_FALSE;
|
||||
uint64_t txg = -1ULL;
|
||||
char *cachefile = NULL;
|
||||
importargs_t idata = { 0 };
|
||||
|
@ -3722,7 +3853,6 @@ zpool_do_import(int argc, char **argv)
|
|||
searchname = argv[0];
|
||||
searchguid = 0;
|
||||
}
|
||||
pool_specified = B_TRUE;
|
||||
|
||||
/*
|
||||
* User specified a name or guid. Ensure it's unique.
|
||||
|
@ -3763,6 +3893,8 @@ zpool_do_import(int argc, char **argv)
|
|||
idata.cachefile = cachefile;
|
||||
idata.scan = do_scan;
|
||||
idata.policy = policy;
|
||||
idata.do_destroyed = do_destroyed;
|
||||
idata.do_all = do_all;
|
||||
|
||||
pools = zpool_search_import(g_zfs, &idata, &libzfs_config_ops);
|
||||
|
||||
|
@ -3802,9 +3934,7 @@ zpool_do_import(int argc, char **argv)
|
|||
}
|
||||
|
||||
err = import_pools(pools, props, mntopts, flags,
|
||||
argc >= 1 ? argv[0] : NULL,
|
||||
argc >= 2 ? argv[1] : NULL,
|
||||
do_destroyed, pool_specified, do_all, &idata);
|
||||
argc >= 1 ? argv[0] : NULL, argc >= 2 ? argv[1] : NULL, &idata);
|
||||
|
||||
/*
|
||||
* If we're using the cachefile and we failed to import, then
|
||||
|
@ -3825,9 +3955,8 @@ zpool_do_import(int argc, char **argv)
|
|||
pools = zpool_search_import(g_zfs, &idata, &libzfs_config_ops);
|
||||
|
||||
err = import_pools(pools, props, mntopts, flags,
|
||||
argc >= 1 ? argv[0] : NULL,
|
||||
argc >= 2 ? argv[1] : NULL,
|
||||
do_destroyed, pool_specified, do_all, &idata);
|
||||
argc >= 1 ? argv[0] : NULL, argc >= 2 ? argv[1] : NULL,
|
||||
&idata);
|
||||
}
|
||||
|
||||
error:
|
||||
|
@ -8411,7 +8540,7 @@ status_callback(zpool_handle_t *zhp, void *data)
|
|||
printf_color(ANSI_BOLD, gettext("action: "));
|
||||
printf_color(ANSI_YELLOW, gettext("Make sure the pool's devices"
|
||||
" are connected, then reboot your system and\n\timport the "
|
||||
"pool.\n"));
|
||||
"pool or run 'zpool clear' to resume the pool.\n"));
|
||||
break;
|
||||
|
||||
case ZPOOL_STATUS_IO_FAILURE_WAIT:
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
AC_DEFUN([ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE], [
|
||||
ZFS_LINUX_TEST_SRC([page_size], [
|
||||
#include <linux/mm.h>
|
||||
],[
|
||||
unsigned long s;
|
||||
s = page_size(NULL);
|
||||
])
|
||||
])
|
||||
AC_DEFUN([ZFS_AC_KERNEL_MM_PAGE_SIZE], [
|
||||
AC_MSG_CHECKING([whether page_size() is available])
|
||||
ZFS_LINUX_TEST_RESULT([page_size], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_MM_PAGE_SIZE, 1, [page_size() is available])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
|
@ -144,6 +144,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
|
|||
ZFS_AC_KERNEL_SRC_KTHREAD
|
||||
ZFS_AC_KERNEL_SRC_ZERO_PAGE
|
||||
ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC
|
||||
ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE
|
||||
|
||||
AC_MSG_CHECKING([for available kernel interfaces])
|
||||
ZFS_LINUX_TEST_COMPILE_ALL([kabi])
|
||||
|
@ -261,6 +262,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
|
|||
ZFS_AC_KERNEL_KTHREAD
|
||||
ZFS_AC_KERNEL_ZERO_PAGE
|
||||
ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC
|
||||
ZFS_AC_KERNEL_MM_PAGE_SIZE
|
||||
])
|
||||
|
||||
dnl #
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2018, 2024 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _LIBZUTIL_H
|
||||
|
@ -68,6 +68,8 @@ typedef struct importargs {
|
|||
boolean_t can_be_active; /* can the pool be active? */
|
||||
boolean_t scan; /* prefer scanning to libblkid cache */
|
||||
nvlist_t *policy; /* load policy (max txg, rewind, etc.) */
|
||||
boolean_t do_destroyed;
|
||||
boolean_t do_all;
|
||||
} importargs_t;
|
||||
|
||||
extern nvlist_t *zpool_search_import(void *, importargs_t *,
|
||||
|
|
|
@ -92,6 +92,12 @@
|
|||
#define param_set_max_auto_ashift_args(var) \
|
||||
CTLTYPE_U64, &var, 0, param_set_max_auto_ashift, "QU"
|
||||
|
||||
#define spa_taskq_read_param_set_args(var) \
|
||||
CTLTYPE_STRING, NULL, 0, spa_taskq_read_param, "A"
|
||||
|
||||
#define spa_taskq_write_param_set_args(var) \
|
||||
CTLTYPE_STRING, NULL, 0, spa_taskq_write_param, "A"
|
||||
|
||||
#define fletcher_4_param_set_args(var) \
|
||||
CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A"
|
||||
|
||||
|
|
|
@ -91,6 +91,8 @@ extern taskqid_t taskq_dispatch_delay(taskq_t *, task_func_t, void *,
|
|||
uint_t, clock_t);
|
||||
extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
|
||||
taskq_ent_t *);
|
||||
extern boolean_t taskq_try_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
|
||||
taskq_ent_t *);
|
||||
extern int taskq_empty_ent(taskq_ent_t *);
|
||||
taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
|
||||
taskq_t *taskq_create_instance(const char *, int, int, pri_t, int, int, uint_t);
|
||||
|
|
|
@ -36,7 +36,11 @@ struct xucred;
|
|||
typedef struct flock flock64_t;
|
||||
typedef struct vnode vnode_t;
|
||||
typedef struct vattr vattr_t;
|
||||
#if __FreeBSD_version < 1400093
|
||||
typedef enum vtype vtype_t;
|
||||
#else
|
||||
#define vtype_t __enum_uint8(vtype)
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/queue.h>
|
||||
|
|
|
@ -10,6 +10,7 @@ KERNEL_H = \
|
|||
simd_x86.h \
|
||||
simd_aarch64.h \
|
||||
simd_powerpc.h \
|
||||
mm_compat.h \
|
||||
mod_compat.h \
|
||||
page_compat.h \
|
||||
compiler_compat.h
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2023, 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
#ifndef _ZFS_MM_COMPAT_H
|
||||
#define _ZFS_MM_COMPAT_H
|
||||
|
||||
#include <linux/mm.h>
|
||||
|
||||
/* 5.4 introduced page_size(). Older kernels can use a trivial macro instead */
|
||||
#ifndef HAVE_MM_PAGE_SIZE
|
||||
#define page_size(p) ((unsigned long)(PAGE_SIZE << compound_order(p)))
|
||||
#endif
|
||||
|
||||
#endif /* _ZFS_MM_COMPAT_H */
|
|
@ -146,6 +146,8 @@ extern taskqid_t taskq_dispatch_delay(taskq_t *, task_func_t, void *,
|
|||
uint_t, clock_t);
|
||||
extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
|
||||
taskq_ent_t *);
|
||||
extern boolean_t taskq_try_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
|
||||
taskq_ent_t *);
|
||||
extern int taskq_empty_ent(taskq_ent_t *);
|
||||
extern void taskq_init_ent(taskq_ent_t *);
|
||||
extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
|
||||
|
|
|
@ -11,6 +11,7 @@ KERNEL_H = \
|
|||
trace_dnode.h \
|
||||
trace_multilist.h \
|
||||
trace_rrwlock.h \
|
||||
trace_spa_taskqs.h \
|
||||
trace_txg.h \
|
||||
trace_vdev.h \
|
||||
trace_zil.h \
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
#if defined(_KERNEL)
|
||||
#if defined(HAVE_DECLARE_EVENT_CLASS)
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM zfs
|
||||
|
||||
#undef TRACE_SYSTEM_VAR
|
||||
#define TRACE_SYSTEM_VAR zfs_spa_taskqs
|
||||
|
||||
#if !defined(_TRACE_SPA_TASKQS_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_SPA_TASKQS_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
/*
|
||||
* Generic support for two argument tracepoints of the form:
|
||||
*
|
||||
* DTRACE_PROBE2(...,
|
||||
* spa_taskqs_t *stqs, ...,
|
||||
* taskq_ent_t *ent, ...);
|
||||
*/
|
||||
/* BEGIN CSTYLED */
|
||||
DECLARE_EVENT_CLASS(zfs_spa_taskqs_ent_class,
|
||||
TP_PROTO(spa_taskqs_t *stqs, taskq_ent_t *ent),
|
||||
TP_ARGS(stqs, ent),
|
||||
);
|
||||
/* END CSTYLED */
|
||||
|
||||
/* BEGIN CSTYLED */
|
||||
#define DEFINE_SPA_TASKQS_ENT_EVENT(name) \
|
||||
DEFINE_EVENT(zfs_spa_taskqs_ent_class, name, \
|
||||
TP_PROTO(spa_taskqs_t *stqs, taskq_ent_t *ent), \
|
||||
TP_ARGS(stqs, ent))
|
||||
/* END CSTYLED */
|
||||
DEFINE_SPA_TASKQS_ENT_EVENT(zfs_spa_taskqs_ent__dispatch);
|
||||
DEFINE_SPA_TASKQS_ENT_EVENT(zfs_spa_taskqs_ent__dispatched);
|
||||
|
||||
#endif /* _TRACE_SPA_TASKQS_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#undef TRACE_INCLUDE_FILE
|
||||
#define TRACE_INCLUDE_PATH sys
|
||||
#define TRACE_INCLUDE_FILE trace_spa_taskqs
|
||||
#include <trace/define_trace.h>
|
||||
|
||||
#else
|
||||
|
||||
DEFINE_DTRACE_PROBE2(spa_taskqs_ent__dispatch);
|
||||
DEFINE_DTRACE_PROBE2(spa_taskqs_ent__dispatched);
|
||||
|
||||
#endif /* HAVE_DECLARE_EVENT_CLASS */
|
||||
#endif /* _KERNEL */
|
|
@ -44,6 +44,7 @@
|
|||
#include <sys/trace_dnode.h>
|
||||
#include <sys/trace_multilist.h>
|
||||
#include <sys/trace_rrwlock.h>
|
||||
#include <sys/trace_spa_taskqs.h>
|
||||
#include <sys/trace_txg.h>
|
||||
#include <sys/trace_vdev.h>
|
||||
#include <sys/trace_zil.h>
|
||||
|
|
|
@ -79,6 +79,9 @@ typedef struct abd {
|
|||
|
||||
typedef int abd_iter_func_t(void *buf, size_t len, void *priv);
|
||||
typedef int abd_iter_func2_t(void *bufa, void *bufb, size_t len, void *priv);
|
||||
#if defined(__linux__) && defined(_KERNEL)
|
||||
typedef int abd_iter_page_func_t(struct page *, size_t, size_t, void *);
|
||||
#endif
|
||||
|
||||
extern int zfs_abd_scatter_enabled;
|
||||
|
||||
|
@ -119,6 +122,10 @@ void abd_release_ownership_of_buf(abd_t *);
|
|||
int abd_iterate_func(abd_t *, size_t, size_t, abd_iter_func_t *, void *);
|
||||
int abd_iterate_func2(abd_t *, abd_t *, size_t, size_t, size_t,
|
||||
abd_iter_func2_t *, void *);
|
||||
#if defined(__linux__) && defined(_KERNEL)
|
||||
int abd_iterate_page_func(abd_t *, size_t, size_t, abd_iter_page_func_t *,
|
||||
void *);
|
||||
#endif
|
||||
void abd_copy_off(abd_t *, abd_t *, size_t, size_t, size_t);
|
||||
void abd_copy_from_buf_off(abd_t *, const void *, size_t, size_t);
|
||||
void abd_copy_to_buf_off(void *, abd_t *, size_t, size_t);
|
||||
|
@ -207,6 +214,8 @@ void abd_fini(void);
|
|||
|
||||
/*
|
||||
* Linux ABD bio functions
|
||||
* Note: these are only needed to support vdev_classic. See comment in
|
||||
* vdev_disk.c.
|
||||
*/
|
||||
#if defined(__linux__) && defined(_KERNEL)
|
||||
unsigned int abd_bio_map_off(struct bio *, abd_t *, unsigned int, size_t);
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
/*
|
||||
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
|
||||
* Copyright (c) 2016, 2019 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2023, 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
#ifndef _ABD_IMPL_H
|
||||
|
@ -38,12 +39,30 @@ typedef enum abd_stats_op {
|
|||
ABDSTAT_DECR /* Decrease abdstat values */
|
||||
} abd_stats_op_t;
|
||||
|
||||
struct scatterlist; /* forward declaration */
|
||||
/* forward declarations */
|
||||
struct scatterlist;
|
||||
struct page;
|
||||
|
||||
struct abd_iter {
|
||||
/* public interface */
|
||||
void *iter_mapaddr; /* addr corresponding to iter_pos */
|
||||
size_t iter_mapsize; /* length of data valid at mapaddr */
|
||||
union {
|
||||
/* for abd_iter_map()/abd_iter_unmap() */
|
||||
struct {
|
||||
/* addr corresponding to iter_pos */
|
||||
void *iter_mapaddr;
|
||||
/* length of data valid at mapaddr */
|
||||
size_t iter_mapsize;
|
||||
};
|
||||
/* for abd_iter_page() */
|
||||
struct {
|
||||
/* current page */
|
||||
struct page *iter_page;
|
||||
/* offset of data in page */
|
||||
size_t iter_page_doff;
|
||||
/* size of data in page */
|
||||
size_t iter_page_dsize;
|
||||
};
|
||||
};
|
||||
|
||||
/* private */
|
||||
abd_t *iter_abd; /* ABD being iterated through */
|
||||
|
@ -79,6 +98,7 @@ boolean_t abd_iter_at_end(struct abd_iter *);
|
|||
void abd_iter_advance(struct abd_iter *, size_t);
|
||||
void abd_iter_map(struct abd_iter *);
|
||||
void abd_iter_unmap(struct abd_iter *);
|
||||
void abd_iter_page(struct abd_iter *);
|
||||
|
||||
/*
|
||||
* Helper macros
|
||||
|
|
|
@ -639,6 +639,9 @@ typedef struct dmu_buf_user {
|
|||
*/
|
||||
taskq_ent_t dbu_tqent;
|
||||
|
||||
/* Size of user data, for inclusion in dbuf_cache accounting. */
|
||||
uint64_t dbu_size;
|
||||
|
||||
/*
|
||||
* This instance's eviction function pointers.
|
||||
*
|
||||
|
@ -721,6 +724,16 @@ void *dmu_buf_replace_user(dmu_buf_t *db,
|
|||
*/
|
||||
void *dmu_buf_remove_user(dmu_buf_t *db, dmu_buf_user_t *user);
|
||||
|
||||
/*
|
||||
* User data size accounting. This can be used to artifically inflate the size
|
||||
* of the dbuf during cache accounting, so that dbuf_evict_thread evicts enough
|
||||
* to satisfy memory reclaim requests. It's not used for anything else, and
|
||||
* defaults to 0.
|
||||
*/
|
||||
uint64_t dmu_buf_user_size(dmu_buf_t *db);
|
||||
void dmu_buf_add_user_size(dmu_buf_t *db, uint64_t nadd);
|
||||
void dmu_buf_sub_user_size(dmu_buf_t *db, uint64_t nsub);
|
||||
|
||||
/*
|
||||
* Returns the user data (dmu_buf_user_t *) associated with this dbuf.
|
||||
*/
|
||||
|
|
|
@ -798,7 +798,7 @@ extern void spa_add_feature_stats(spa_t *spa, nvlist_t *config);
|
|||
|
||||
#define SPA_ASYNC_CONFIG_UPDATE 0x01
|
||||
#define SPA_ASYNC_REMOVE 0x02
|
||||
#define SPA_ASYNC_PROBE 0x04
|
||||
#define SPA_ASYNC_FAULT_VDEV 0x04
|
||||
#define SPA_ASYNC_RESILVER_DONE 0x08
|
||||
#define SPA_ASYNC_RESILVER 0x10
|
||||
#define SPA_ASYNC_AUTOEXPAND 0x20
|
||||
|
@ -854,6 +854,8 @@ extern int zfs_sync_pass_deferred_free;
|
|||
|
||||
/* spa namespace global mutex */
|
||||
extern kmutex_t spa_namespace_lock;
|
||||
extern avl_tree_t spa_namespace_avl;
|
||||
extern kcondvar_t spa_namespace_cv;
|
||||
|
||||
/*
|
||||
* SPA configuration functions in spa_config.c
|
||||
|
@ -1004,6 +1006,10 @@ extern int spa_import_progress_set_max_txg(uint64_t pool_guid,
|
|||
uint64_t max_txg);
|
||||
extern int spa_import_progress_set_state(uint64_t pool_guid,
|
||||
spa_load_state_t spa_load_state);
|
||||
extern void spa_import_progress_set_notes(spa_t *spa,
|
||||
const char *fmt, ...) __printflike(2, 3);
|
||||
extern void spa_import_progress_set_notes_nolog(spa_t *spa,
|
||||
const char *fmt, ...) __printflike(2, 3);
|
||||
|
||||
/* Pool configuration locks */
|
||||
extern int spa_config_tryenter(spa_t *spa, int locks, const void *tag,
|
||||
|
@ -1147,6 +1153,8 @@ extern uint32_t spa_get_hostid(spa_t *spa);
|
|||
extern void spa_activate_allocation_classes(spa_t *, dmu_tx_t *);
|
||||
extern boolean_t spa_livelist_delete_check(spa_t *spa);
|
||||
|
||||
extern boolean_t spa_mmp_remote_host_activity(spa_t *spa);
|
||||
|
||||
extern spa_mode_t spa_mode(spa_t *spa);
|
||||
extern uint64_t zfs_strtonum(const char *str, char **nptr);
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, 2019 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011, 2024 by Delphix. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
* Copyright 2013 Saso Kiselkov. All rights reserved.
|
||||
|
@ -183,6 +183,8 @@ typedef enum spa_proc_state {
|
|||
} spa_proc_state_t;
|
||||
|
||||
typedef struct spa_taskqs {
|
||||
zio_taskq_type_t stqs_type;
|
||||
zio_type_t stqs_zio_type;
|
||||
uint_t stqs_count;
|
||||
taskq_t **stqs_taskq;
|
||||
} spa_taskqs_t;
|
||||
|
@ -229,6 +231,8 @@ struct spa {
|
|||
dsl_pool_t *spa_dsl_pool;
|
||||
boolean_t spa_is_initializing; /* true while opening pool */
|
||||
boolean_t spa_is_exporting; /* true while exporting pool */
|
||||
kthread_t *spa_export_thread; /* valid during pool export */
|
||||
kthread_t *spa_load_thread; /* loading, no namespace lock */
|
||||
metaslab_class_t *spa_normal_class; /* normal data class */
|
||||
metaslab_class_t *spa_log_class; /* intent log data class */
|
||||
metaslab_class_t *spa_embedded_log_class; /* log on normal vdevs */
|
||||
|
|
|
@ -50,20 +50,20 @@ extern "C" {
|
|||
#define MMP_SEQ_VALID_BIT 0x02
|
||||
#define MMP_FAIL_INT_VALID_BIT 0x04
|
||||
|
||||
#define MMP_VALID(ubp) (ubp->ub_magic == UBERBLOCK_MAGIC && \
|
||||
ubp->ub_mmp_magic == MMP_MAGIC)
|
||||
#define MMP_INTERVAL_VALID(ubp) (MMP_VALID(ubp) && (ubp->ub_mmp_config & \
|
||||
#define MMP_VALID(ubp) ((ubp)->ub_magic == UBERBLOCK_MAGIC && \
|
||||
(ubp)->ub_mmp_magic == MMP_MAGIC)
|
||||
#define MMP_INTERVAL_VALID(ubp) (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
|
||||
MMP_INTERVAL_VALID_BIT))
|
||||
#define MMP_SEQ_VALID(ubp) (MMP_VALID(ubp) && (ubp->ub_mmp_config & \
|
||||
#define MMP_SEQ_VALID(ubp) (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
|
||||
MMP_SEQ_VALID_BIT))
|
||||
#define MMP_FAIL_INT_VALID(ubp) (MMP_VALID(ubp) && (ubp->ub_mmp_config & \
|
||||
#define MMP_FAIL_INT_VALID(ubp) (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
|
||||
MMP_FAIL_INT_VALID_BIT))
|
||||
|
||||
#define MMP_INTERVAL(ubp) ((ubp->ub_mmp_config & 0x00000000FFFFFF00) \
|
||||
#define MMP_INTERVAL(ubp) (((ubp)->ub_mmp_config & 0x00000000FFFFFF00) \
|
||||
>> 8)
|
||||
#define MMP_SEQ(ubp) ((ubp->ub_mmp_config & 0x0000FFFF00000000) \
|
||||
#define MMP_SEQ(ubp) (((ubp)->ub_mmp_config & 0x0000FFFF00000000) \
|
||||
>> 32)
|
||||
#define MMP_FAIL_INT(ubp) ((ubp->ub_mmp_config & 0xFFFF000000000000) \
|
||||
#define MMP_FAIL_INT(ubp) (((ubp)->ub_mmp_config & 0xFFFF000000000000) \
|
||||
>> 48)
|
||||
|
||||
#define MMP_INTERVAL_SET(write) \
|
||||
|
|
|
@ -290,7 +290,7 @@ struct vdev {
|
|||
txg_list_t vdev_dtl_list; /* per-txg dirty DTL lists */
|
||||
txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */
|
||||
boolean_t vdev_remove_wanted; /* async remove wanted? */
|
||||
boolean_t vdev_probe_wanted; /* async probe wanted? */
|
||||
boolean_t vdev_fault_wanted; /* async faulted wanted? */
|
||||
list_node_t vdev_config_dirty_node; /* config dirty list */
|
||||
list_node_t vdev_state_dirty_node; /* state dirty list */
|
||||
uint64_t vdev_deflate_ratio; /* deflation ratio (x512) */
|
||||
|
|
|
@ -503,6 +503,8 @@ extern taskqid_t taskq_dispatch_delay(taskq_t *, task_func_t, void *, uint_t,
|
|||
clock_t);
|
||||
extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
|
||||
taskq_ent_t *);
|
||||
extern boolean_t taskq_try_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
|
||||
taskq_ent_t *);
|
||||
extern int taskq_empty_ent(taskq_ent_t *);
|
||||
extern void taskq_init_ent(taskq_ent_t *);
|
||||
extern void taskq_destroy(taskq_t *);
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2024 by Delphix. All rights reserved.
|
||||
* Copyright 2016 RackTop Systems.
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
*/
|
||||
|
@ -447,6 +447,8 @@ typedef enum zinject_type {
|
|||
ZINJECT_PANIC,
|
||||
ZINJECT_DELAY_IO,
|
||||
ZINJECT_DECRYPT_FAULT,
|
||||
ZINJECT_DELAY_IMPORT,
|
||||
ZINJECT_DELAY_EXPORT,
|
||||
} zinject_type_t;
|
||||
|
||||
typedef struct zfs_share {
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2024 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
|
||||
* Copyright 2016 Toomas Soome <tsoome@me.com>
|
||||
|
@ -685,6 +685,8 @@ extern int zio_handle_device_injections(vdev_t *vd, zio_t *zio, int err1,
|
|||
extern int zio_handle_label_injection(zio_t *zio, int error);
|
||||
extern void zio_handle_ignored_writes(zio_t *zio);
|
||||
extern hrtime_t zio_handle_io_delay(zio_t *zio);
|
||||
extern void zio_handle_import_delay(spa_t *spa, hrtime_t elapsed);
|
||||
extern void zio_handle_export_delay(spa_t *spa, hrtime_t elapsed);
|
||||
|
||||
/*
|
||||
* Checksum ereport functions
|
||||
|
|
|
@ -182,6 +182,8 @@ extern int lz4_decompress_zfs(void *src, void *dst, size_t s_len, size_t d_len,
|
|||
int level);
|
||||
extern size_t slack_compress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
extern int slack_decompress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
|
||||
/*
|
||||
* Compress and decompress data if necessary.
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
* Copyright (c) 2017, Intel Corporation.
|
||||
* Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
|
||||
* Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
|
||||
* Copyright (c) 2021, 2023, Klara Inc.
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
|
@ -265,6 +266,7 @@ zpool_get_state_str(zpool_handle_t *zhp)
|
|||
} else if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
|
||||
str = gettext("FAULTED");
|
||||
} else if (status == ZPOOL_STATUS_IO_FAILURE_WAIT ||
|
||||
status == ZPOOL_STATUS_IO_FAILURE_CONTINUE ||
|
||||
status == ZPOOL_STATUS_IO_FAILURE_MMP) {
|
||||
str = gettext("SUSPENDED");
|
||||
} else {
|
||||
|
|
|
@ -156,8 +156,8 @@ taskq_init_ent(taskq_ent_t *t)
|
|||
t->tqent_flags = 0;
|
||||
}
|
||||
|
||||
void
|
||||
taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
|
||||
static void
|
||||
taskq_dispatch_ent_impl(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
|
||||
taskq_ent_t *t)
|
||||
{
|
||||
ASSERT(func != NULL);
|
||||
|
@ -170,7 +170,6 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
|
|||
/*
|
||||
* Enqueue the task to the underlying queue.
|
||||
*/
|
||||
mutex_enter(&tq->tq_lock);
|
||||
|
||||
if (flags & TQ_FRONT) {
|
||||
t->tqent_next = tq->tq_task.tqent_next;
|
||||
|
@ -184,9 +183,28 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
|
|||
t->tqent_func = func;
|
||||
t->tqent_arg = arg;
|
||||
cv_signal(&tq->tq_dispatch_cv);
|
||||
}
|
||||
|
||||
void
|
||||
taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
|
||||
taskq_ent_t *t)
|
||||
{
|
||||
mutex_enter(&tq->tq_lock);
|
||||
taskq_dispatch_ent_impl(tq, func, arg, flags, t);
|
||||
mutex_exit(&tq->tq_lock);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
taskq_try_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
|
||||
taskq_ent_t *t)
|
||||
{
|
||||
if (!mutex_tryenter(&tq->tq_lock))
|
||||
return (B_FALSE);
|
||||
taskq_dispatch_ent_impl(tq, func, arg, flags, t);
|
||||
mutex_exit(&tq->tq_lock);
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
void
|
||||
taskq_wait(taskq_t *tq)
|
||||
{
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
.\" Copyright (c) 2013 by Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
|
||||
.\" Copyright (c) 2019, 2021 by Delphix. All rights reserved.
|
||||
.\" Copyright (c) 2019 Datto Inc.
|
||||
.\" Copyright (c) 2023, 2024 Klara, Inc.
|
||||
.\" The contents of this file are subject to the terms of the Common Development
|
||||
.\" and Distribution License (the "License"). You may not use this file except
|
||||
.\" in compliance with the License. You can obtain a copy of the license at
|
||||
|
@ -15,7 +16,7 @@
|
|||
.\" own identifying information:
|
||||
.\" Portions Copyright [yyyy] [name of copyright owner]
|
||||
.\"
|
||||
.Dd January 10, 2023
|
||||
.Dd January 9, 2024
|
||||
.Dt ZFS 4
|
||||
.Os
|
||||
.
|
||||
|
@ -1305,6 +1306,29 @@ as fuller devices will tend to be slower than empty devices.
|
|||
Also see
|
||||
.Sy zio_dva_throttle_enabled .
|
||||
.
|
||||
.It Sy zfs_vdev_disk_max_segs Ns = Ns Sy 0 Pq uint
|
||||
Maximum number of segments to add to a BIO (min 4).
|
||||
If this is higher than the maximum allowed by the device queue or the kernel
|
||||
itself, it will be clamped.
|
||||
Setting it to zero will cause the kernel's ideal size to be used.
|
||||
This parameter only applies on Linux.
|
||||
This parameter is ignored if
|
||||
.Sy zfs_vdev_disk_classic Ns = Ns Sy 1 .
|
||||
.
|
||||
.It Sy zfs_vdev_disk_classic Ns = Ns Sy 0 Ns | Ns 1 Pq uint
|
||||
If set to 1, OpenZFS will submit IO to Linux using the method it used in 2.2
|
||||
and earlier.
|
||||
This "classic" method has known issues with highly fragmented IO requests and
|
||||
is slower on many workloads, but it has been in use for many years and is known
|
||||
to be very stable.
|
||||
If you set this parameter, please also open a bug report why you did so,
|
||||
including the workload involved and any error messages.
|
||||
.Pp
|
||||
This parameter and the classic submission method will be removed once we have
|
||||
total confidence in the new method.
|
||||
.Pp
|
||||
This parameter only applies on Linux, and can only be set at module load time.
|
||||
.
|
||||
.It Sy zfs_expire_snapshot Ns = Ns Sy 300 Ns s Pq int
|
||||
Time before expiring
|
||||
.Pa .zfs/snapshot .
|
||||
|
@ -2167,6 +2191,16 @@ If
|
|||
.Sy 0 ,
|
||||
generate a system-dependent value close to 6 threads per taskq.
|
||||
.
|
||||
.It Sy zio_taskq_read Ns = Ns Sy fixed,1,8 null scale null Pq charp
|
||||
Set the queue and thread configuration for the IO read queues.
|
||||
This is an advanced debugging parameter.
|
||||
Don't change this unless you understand what it does.
|
||||
.
|
||||
.It Sy zio_taskq_write Ns = Ns Sy batch fixed,1,5 scale fixed,1,5 Pq charp
|
||||
Set the queue and thread configuration for the IO write queues.
|
||||
This is an advanced debugging parameter.
|
||||
Don't change this unless you understand what it does.
|
||||
.
|
||||
.It Sy zvol_inhibit_dev Ns = Ns Sy 0 Ns | Ns 1 Pq uint
|
||||
Do not create zvol device nodes.
|
||||
This may slightly improve startup time on
|
||||
|
|
|
@ -127,6 +127,14 @@ Force a vdev error.
|
|||
.
|
||||
.It Xo
|
||||
.Nm zinject
|
||||
.Fl i Ar seconds
|
||||
.Ar pool
|
||||
.Xc
|
||||
Add an artificial delay during the future import of a pool.
|
||||
This injector is automatically cleared after the import is finished.
|
||||
.
|
||||
.It Xo
|
||||
.Nm zinject
|
||||
.Fl I
|
||||
.Op Fl s Ar seconds Ns | Ns Fl g Ar txgs
|
||||
.Ar pool
|
||||
|
|
|
@ -49,9 +49,10 @@ If the pool was suspended it will be brought back online provided the
|
|||
devices can be accessed.
|
||||
Pools with
|
||||
.Sy multihost
|
||||
enabled which have been suspended cannot be resumed.
|
||||
While the pool was suspended, it may have been imported on
|
||||
another host, and resuming I/O could result in pool damage.
|
||||
enabled which have been suspended cannot be resumed when there is evidence
|
||||
that the pool was imported by another host.
|
||||
The same checks performed during an import will be applied before the clear
|
||||
proceeds.
|
||||
.
|
||||
.Sh SEE ALSO
|
||||
.Xr zdb 8 ,
|
||||
|
|
|
@ -411,6 +411,15 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint32_t flags,
|
|||
taskqueue_enqueue(tq->tq_queue, &task->tqent_task);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint32_t flags,
|
||||
taskq_ent_t *task)
|
||||
{
|
||||
/* XXX: implement me -- robn, 2023-10-23 */
|
||||
taskq_dispatch_ent(tq, func, arg, flags, task);
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
void
|
||||
taskq_wait(taskq_t *tq)
|
||||
{
|
||||
|
|
|
@ -417,10 +417,8 @@ abd_iter_init(struct abd_iter *aiter, abd_t *abd)
|
|||
{
|
||||
ASSERT(!abd_is_gang(abd));
|
||||
abd_verify(abd);
|
||||
memset(aiter, 0, sizeof (struct abd_iter));
|
||||
aiter->iter_abd = abd;
|
||||
aiter->iter_pos = 0;
|
||||
aiter->iter_mapaddr = NULL;
|
||||
aiter->iter_mapsize = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -673,17 +673,13 @@ out:
|
|||
}
|
||||
EXPORT_SYMBOL(taskq_dispatch_delay);
|
||||
|
||||
void
|
||||
taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
|
||||
static void
|
||||
taskq_dispatch_ent_impl(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
|
||||
taskq_ent_t *t)
|
||||
{
|
||||
unsigned long irqflags;
|
||||
ASSERT(tq);
|
||||
ASSERT(func);
|
||||
|
||||
spin_lock_irqsave_nested(&tq->tq_lock, irqflags,
|
||||
tq->tq_lock_class);
|
||||
|
||||
/* Taskq being destroyed and all tasks drained */
|
||||
if (!(tq->tq_flags & TASKQ_ACTIVE)) {
|
||||
t->tqent_id = TASKQID_INVALID;
|
||||
|
@ -694,7 +690,7 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
|
|||
/* Dynamic taskq may be able to spawn another thread */
|
||||
if (!(tq->tq_flags & TASKQ_DYNAMIC) ||
|
||||
taskq_thread_spawn(tq) == 0)
|
||||
goto out2;
|
||||
return;
|
||||
flags |= TQ_FRONT;
|
||||
}
|
||||
|
||||
|
@ -734,11 +730,45 @@ out:
|
|||
/* Spawn additional taskq threads if required. */
|
||||
if (tq->tq_nactive == tq->tq_nthreads)
|
||||
(void) taskq_thread_spawn(tq);
|
||||
out2:
|
||||
}
|
||||
|
||||
void
|
||||
taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
|
||||
taskq_ent_t *t)
|
||||
{
|
||||
unsigned long irqflags;
|
||||
|
||||
spin_lock_irqsave_nested(&tq->tq_lock, irqflags,
|
||||
tq->tq_lock_class);
|
||||
|
||||
taskq_dispatch_ent_impl(tq, func, arg, flags, t);
|
||||
|
||||
spin_unlock_irqrestore(&tq->tq_lock, irqflags);
|
||||
}
|
||||
EXPORT_SYMBOL(taskq_dispatch_ent);
|
||||
|
||||
boolean_t
|
||||
taskq_try_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
|
||||
taskq_ent_t *t)
|
||||
{
|
||||
unsigned long irqflags;
|
||||
|
||||
/*
|
||||
* XXX I don't _think_ losing _nested matters, because I think its
|
||||
* only related to lockdep, and we don't have access to that anyway
|
||||
* -- robn, 2023-10-23
|
||||
*/
|
||||
if (!spin_trylock_irqsave(&tq->tq_lock, irqflags))
|
||||
return (B_FALSE);
|
||||
|
||||
taskq_dispatch_ent_impl(tq, func, arg, flags, t);
|
||||
|
||||
spin_unlock_irqrestore(&tq->tq_lock, irqflags);
|
||||
|
||||
return (B_TRUE);
|
||||
}
|
||||
EXPORT_SYMBOL(taskq_try_dispatch_ent);
|
||||
|
||||
int
|
||||
taskq_empty_ent(taskq_ent_t *t)
|
||||
{
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
/*
|
||||
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
|
||||
* Copyright (c) 2019 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2023, 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
|
@ -59,7 +60,9 @@
|
|||
#include <sys/zfs_znode.h>
|
||||
#ifdef _KERNEL
|
||||
#include <linux/kmap_compat.h>
|
||||
#include <linux/mm_compat.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/version.h>
|
||||
#else
|
||||
#define MAX_ORDER 1
|
||||
#endif
|
||||
|
@ -884,14 +887,9 @@ abd_iter_init(struct abd_iter *aiter, abd_t *abd)
|
|||
{
|
||||
ASSERT(!abd_is_gang(abd));
|
||||
abd_verify(abd);
|
||||
memset(aiter, 0, sizeof (struct abd_iter));
|
||||
aiter->iter_abd = abd;
|
||||
aiter->iter_mapaddr = NULL;
|
||||
aiter->iter_mapsize = 0;
|
||||
aiter->iter_pos = 0;
|
||||
if (abd_is_linear(abd)) {
|
||||
aiter->iter_offset = 0;
|
||||
aiter->iter_sg = NULL;
|
||||
} else {
|
||||
if (!abd_is_linear(abd)) {
|
||||
aiter->iter_offset = ABD_SCATTER(abd).abd_offset;
|
||||
aiter->iter_sg = ABD_SCATTER(abd).abd_sgl;
|
||||
}
|
||||
|
@ -904,6 +902,7 @@ abd_iter_init(struct abd_iter *aiter, abd_t *abd)
|
|||
boolean_t
|
||||
abd_iter_at_end(struct abd_iter *aiter)
|
||||
{
|
||||
ASSERT3U(aiter->iter_pos, <=, aiter->iter_abd->abd_size);
|
||||
return (aiter->iter_pos == aiter->iter_abd->abd_size);
|
||||
}
|
||||
|
||||
|
@ -915,8 +914,15 @@ abd_iter_at_end(struct abd_iter *aiter)
|
|||
void
|
||||
abd_iter_advance(struct abd_iter *aiter, size_t amount)
|
||||
{
|
||||
/*
|
||||
* Ensure that last chunk is not in use. abd_iterate_*() must clear
|
||||
* this state (directly or abd_iter_unmap()) before advancing.
|
||||
*/
|
||||
ASSERT3P(aiter->iter_mapaddr, ==, NULL);
|
||||
ASSERT0(aiter->iter_mapsize);
|
||||
ASSERT3P(aiter->iter_page, ==, NULL);
|
||||
ASSERT0(aiter->iter_page_doff);
|
||||
ASSERT0(aiter->iter_page_dsize);
|
||||
|
||||
/* There's nothing left to advance to, so do nothing */
|
||||
if (abd_iter_at_end(aiter))
|
||||
|
@ -998,6 +1004,106 @@ abd_cache_reap_now(void)
|
|||
}
|
||||
|
||||
#if defined(_KERNEL)
|
||||
/*
|
||||
* Yield the next page struct and data offset and size within it, without
|
||||
* mapping it into the address space.
|
||||
*/
|
||||
void
|
||||
abd_iter_page(struct abd_iter *aiter)
|
||||
{
|
||||
if (abd_iter_at_end(aiter)) {
|
||||
aiter->iter_page = NULL;
|
||||
aiter->iter_page_doff = 0;
|
||||
aiter->iter_page_dsize = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
struct page *page;
|
||||
size_t doff, dsize;
|
||||
|
||||
if (abd_is_linear(aiter->iter_abd)) {
|
||||
ASSERT3U(aiter->iter_pos, ==, aiter->iter_offset);
|
||||
|
||||
/* memory address at iter_pos */
|
||||
void *paddr = ABD_LINEAR_BUF(aiter->iter_abd) + aiter->iter_pos;
|
||||
|
||||
/* struct page for address */
|
||||
page = is_vmalloc_addr(paddr) ?
|
||||
vmalloc_to_page(paddr) : virt_to_page(paddr);
|
||||
|
||||
/* offset of address within the page */
|
||||
doff = offset_in_page(paddr);
|
||||
|
||||
/* total data remaining in abd from this position */
|
||||
dsize = aiter->iter_abd->abd_size - aiter->iter_offset;
|
||||
} else {
|
||||
ASSERT(!abd_is_gang(aiter->iter_abd));
|
||||
|
||||
/* current scatter page */
|
||||
page = sg_page(aiter->iter_sg);
|
||||
|
||||
/* position within page */
|
||||
doff = aiter->iter_offset;
|
||||
|
||||
/* remaining data in scatterlist */
|
||||
dsize = MIN(aiter->iter_sg->length - aiter->iter_offset,
|
||||
aiter->iter_abd->abd_size - aiter->iter_pos);
|
||||
}
|
||||
ASSERT(page);
|
||||
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)
|
||||
if (PageTail(page)) {
|
||||
/*
|
||||
* This page is part of a "compound page", which is a group of
|
||||
* pages that can be referenced from a single struct page *.
|
||||
* Its organised as a "head" page, followed by a series of
|
||||
* "tail" pages.
|
||||
*
|
||||
* In OpenZFS, compound pages are allocated using the
|
||||
* __GFP_COMP flag, which we get from scatter ABDs and SPL
|
||||
* vmalloc slabs (ie >16K allocations). So a great many of the
|
||||
* IO buffers we get are going to be of this type.
|
||||
*
|
||||
* The tail pages are just regular PAGE_SIZE pages, and can be
|
||||
* safely used as-is. However, the head page has length
|
||||
* covering itself and all the tail pages. If this ABD chunk
|
||||
* spans multiple pages, then we can use the head page and a
|
||||
* >PAGE_SIZE length, which is far more efficient.
|
||||
*
|
||||
* To do this, we need to adjust the offset to be counted from
|
||||
* the head page. struct page for compound pages are stored
|
||||
* contiguously, so we can just adjust by a simple offset.
|
||||
*
|
||||
* Before kernel 4.5, compound page heads were refcounted
|
||||
* separately, such that moving back to the head page would
|
||||
* require us to take a reference to it and releasing it once
|
||||
* we're completely finished with it. In practice, that means
|
||||
* when our caller is done with the ABD, which we have no
|
||||
* insight into from here. Rather than contort this API to
|
||||
* track head page references on such ancient kernels, we just
|
||||
* compile this block out and use the tail pages directly. This
|
||||
* is slightly less efficient, but makes everything far
|
||||
* simpler.
|
||||
*/
|
||||
struct page *head = compound_head(page);
|
||||
doff += ((page - head) * PAGESIZE);
|
||||
page = head;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* final page and position within it */
|
||||
aiter->iter_page = page;
|
||||
aiter->iter_page_doff = doff;
|
||||
|
||||
/* amount of data in the chunk, up to the end of the page */
|
||||
aiter->iter_page_dsize = MIN(dsize, page_size(page) - doff);
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: ABD BIO functions only needed to support vdev_classic. See comments in
|
||||
* vdev_disk.c.
|
||||
*/
|
||||
|
||||
/*
|
||||
* bio_nr_pages for ABD.
|
||||
* @off is the offset in @abd
|
||||
|
@ -1220,4 +1326,5 @@ MODULE_PARM_DESC(zfs_abd_scatter_min_size,
|
|||
module_param(zfs_abd_scatter_max_order, uint, 0644);
|
||||
MODULE_PARM_DESC(zfs_abd_scatter_max_order,
|
||||
"Maximum order allocation used for a scatter ABD.");
|
||||
#endif
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
#include <sys/trace_dnode.h>
|
||||
#include <sys/trace_multilist.h>
|
||||
#include <sys/trace_rrwlock.h>
|
||||
#include <sys/trace_spa_taskqs.h>
|
||||
#include <sys/trace_txg.h>
|
||||
#include <sys/trace_vdev.h>
|
||||
#include <sys/trace_zil.h>
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
* Rewritten for Linux by Brian Behlendorf <behlendorf1@llnl.gov>.
|
||||
* LLNL-CODE-403049.
|
||||
* Copyright (c) 2012, 2019 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2023, 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
@ -49,11 +50,11 @@ typedef struct vdev_disk {
|
|||
int zio_suppress_zero_writes = B_TRUE;
|
||||
|
||||
/*
|
||||
* Maximum number of segments to add to a bio. If this is higher than the
|
||||
* maximum allowed by the device queue or the kernel itself, it will be
|
||||
* Maximum number of segments to add to a bio (min 4). If this is higher than
|
||||
* the maximum allowed by the device queue or the kernel itself, it will be
|
||||
* clamped. Setting it to zero will cause the kernel's ideal size to be used.
|
||||
*/
|
||||
unsigned long vdev_disk_max_segs = 0;
|
||||
uint_t zfs_vdev_disk_max_segs = 0;
|
||||
|
||||
/*
|
||||
* Unique identifier for the exclusive vdev holder.
|
||||
|
@ -72,20 +73,22 @@ static unsigned zfs_vdev_open_timeout_ms = 1000;
|
|||
*/
|
||||
#define EFI_MIN_RESV_SIZE (16 * 1024)
|
||||
|
||||
/*
|
||||
* Virtual device vector for disks.
|
||||
*/
|
||||
typedef struct dio_request {
|
||||
zio_t *dr_zio; /* Parent ZIO */
|
||||
atomic_t dr_ref; /* References */
|
||||
int dr_error; /* Bio error */
|
||||
int dr_bio_count; /* Count of bio's */
|
||||
struct bio *dr_bio[0]; /* Attached bio's */
|
||||
} dio_request_t;
|
||||
|
||||
#ifdef HAVE_BLK_MODE_T
|
||||
static blk_mode_t
|
||||
#else
|
||||
static fmode_t
|
||||
#endif
|
||||
vdev_bdev_mode(spa_mode_t spa_mode)
|
||||
{
|
||||
#ifdef HAVE_BLK_MODE_T
|
||||
blk_mode_t mode = 0;
|
||||
|
||||
if (spa_mode & SPA_MODE_READ)
|
||||
mode |= BLK_OPEN_READ;
|
||||
|
||||
if (spa_mode & SPA_MODE_WRITE)
|
||||
mode |= BLK_OPEN_WRITE;
|
||||
#else
|
||||
fmode_t mode = 0;
|
||||
|
||||
if (spa_mode & SPA_MODE_READ)
|
||||
|
@ -93,6 +96,7 @@ vdev_bdev_mode(spa_mode_t spa_mode)
|
|||
|
||||
if (spa_mode & SPA_MODE_WRITE)
|
||||
mode |= FMODE_WRITE;
|
||||
#endif
|
||||
|
||||
return (mode);
|
||||
}
|
||||
|
@ -355,98 +359,15 @@ vdev_disk_close(vdev_t *v)
|
|||
if (v->vdev_reopening || vd == NULL)
|
||||
return;
|
||||
|
||||
if (vd->vd_bdev != NULL) {
|
||||
if (vd->vd_bdev != NULL)
|
||||
blkdev_put(vd->vd_bdev,
|
||||
vdev_bdev_mode(spa_mode(v->vdev_spa)) | FMODE_EXCL);
|
||||
}
|
||||
|
||||
rw_destroy(&vd->vd_lock);
|
||||
kmem_free(vd, sizeof (vdev_disk_t));
|
||||
v->vdev_tsd = NULL;
|
||||
}
|
||||
|
||||
static dio_request_t *
|
||||
vdev_disk_dio_alloc(int bio_count)
|
||||
{
|
||||
dio_request_t *dr = kmem_zalloc(sizeof (dio_request_t) +
|
||||
sizeof (struct bio *) * bio_count, KM_SLEEP);
|
||||
atomic_set(&dr->dr_ref, 0);
|
||||
dr->dr_bio_count = bio_count;
|
||||
dr->dr_error = 0;
|
||||
|
||||
for (int i = 0; i < dr->dr_bio_count; i++)
|
||||
dr->dr_bio[i] = NULL;
|
||||
|
||||
return (dr);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_disk_dio_free(dio_request_t *dr)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < dr->dr_bio_count; i++)
|
||||
if (dr->dr_bio[i])
|
||||
bio_put(dr->dr_bio[i]);
|
||||
|
||||
kmem_free(dr, sizeof (dio_request_t) +
|
||||
sizeof (struct bio *) * dr->dr_bio_count);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_disk_dio_get(dio_request_t *dr)
|
||||
{
|
||||
atomic_inc(&dr->dr_ref);
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_disk_dio_put(dio_request_t *dr)
|
||||
{
|
||||
int rc = atomic_dec_return(&dr->dr_ref);
|
||||
|
||||
/*
|
||||
* Free the dio_request when the last reference is dropped and
|
||||
* ensure zio_interpret is called only once with the correct zio
|
||||
*/
|
||||
if (rc == 0) {
|
||||
zio_t *zio = dr->dr_zio;
|
||||
int error = dr->dr_error;
|
||||
|
||||
vdev_disk_dio_free(dr);
|
||||
|
||||
if (zio) {
|
||||
zio->io_error = error;
|
||||
ASSERT3S(zio->io_error, >=, 0);
|
||||
if (zio->io_error)
|
||||
vdev_disk_error(zio);
|
||||
|
||||
zio_delay_interrupt(zio);
|
||||
}
|
||||
}
|
||||
|
||||
return (rc);
|
||||
}
|
||||
|
||||
BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error)
|
||||
{
|
||||
dio_request_t *dr = bio->bi_private;
|
||||
int rc;
|
||||
|
||||
if (dr->dr_error == 0) {
|
||||
#ifdef HAVE_1ARG_BIO_END_IO_T
|
||||
dr->dr_error = BIO_END_IO_ERROR(bio);
|
||||
#else
|
||||
if (error)
|
||||
dr->dr_error = -(error);
|
||||
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
|
||||
dr->dr_error = EIO;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Drop reference acquired by __vdev_disk_physio */
|
||||
rc = vdev_disk_dio_put(dr);
|
||||
}
|
||||
|
||||
static inline void
|
||||
vdev_submit_bio_impl(struct bio *bio)
|
||||
{
|
||||
|
@ -598,13 +519,17 @@ vdev_bio_alloc(struct block_device *bdev, gfp_t gfp_mask,
|
|||
return (bio);
|
||||
}
|
||||
|
||||
static inline unsigned int
|
||||
vdev_bio_max_segs(struct block_device *bdev) {
|
||||
const unsigned long tune_max_segs =
|
||||
vdev_disk_max_segs > 0 ? vdev_disk_max_segs : ULONG_MAX;
|
||||
const unsigned long dev_max_segs =
|
||||
queue_max_segments(bdev_get_queue(bdev));
|
||||
const unsigned long max_segs = MIN(tune_max_segs, dev_max_segs);
|
||||
static inline uint_t
|
||||
vdev_bio_max_segs(struct block_device *bdev)
|
||||
{
|
||||
/*
|
||||
* Smallest of the device max segs and the tuneable max segs. Minimum
|
||||
* 4, so there's room to finish split pages if they come up.
|
||||
*/
|
||||
const uint_t dev_max_segs = queue_max_segments(bdev_get_queue(bdev));
|
||||
const uint_t tune_max_segs = (zfs_vdev_disk_max_segs > 0) ?
|
||||
MAX(4, zfs_vdev_disk_max_segs) : dev_max_segs;
|
||||
const uint_t max_segs = MIN(tune_max_segs, dev_max_segs);
|
||||
|
||||
#ifdef HAVE_BIO_MAX_SEGS
|
||||
return (bio_max_segs(max_segs));
|
||||
|
@ -613,10 +538,461 @@ vdev_bio_max_segs(struct block_device *bdev) {
|
|||
#endif
|
||||
}
|
||||
|
||||
static int
|
||||
__vdev_disk_physio(struct block_device *bdev, zio_t *zio,
|
||||
size_t io_size, uint64_t io_offset, int rw, int flags)
|
||||
static inline uint_t
|
||||
vdev_bio_max_bytes(struct block_device *bdev)
|
||||
{
|
||||
return (queue_max_sectors(bdev_get_queue(bdev)) << 9);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Virtual block IO object (VBIO)
|
||||
*
|
||||
* Linux block IO (BIO) objects have a limit on how many data segments (pages)
|
||||
* they can hold. Depending on how they're allocated and structured, a large
|
||||
* ZIO can require more than one BIO to be submitted to the kernel, which then
|
||||
* all have to complete before we can return the completed ZIO back to ZFS.
|
||||
*
|
||||
* A VBIO is a wrapper around multiple BIOs, carrying everything needed to
|
||||
* translate a ZIO down into the kernel block layer and back again.
|
||||
*
|
||||
* Note that these are only used for data ZIOs (read/write). Meta-operations
|
||||
* (flush/trim) don't need multiple BIOs and so can just make the call
|
||||
* directly.
|
||||
*/
|
||||
typedef struct {
|
||||
zio_t *vbio_zio; /* parent zio */
|
||||
|
||||
struct block_device *vbio_bdev; /* blockdev to submit bios to */
|
||||
|
||||
abd_t *vbio_abd; /* abd carrying borrowed linear buf */
|
||||
|
||||
uint_t vbio_max_segs; /* max segs per bio */
|
||||
|
||||
uint_t vbio_max_bytes; /* max bytes per bio */
|
||||
uint_t vbio_lbs_mask; /* logical block size mask */
|
||||
|
||||
uint64_t vbio_offset; /* start offset of next bio */
|
||||
|
||||
struct bio *vbio_bio; /* pointer to the current bio */
|
||||
int vbio_flags; /* bio flags */
|
||||
} vbio_t;
|
||||
|
||||
static vbio_t *
|
||||
vbio_alloc(zio_t *zio, struct block_device *bdev, int flags)
|
||||
{
|
||||
vbio_t *vbio = kmem_zalloc(sizeof (vbio_t), KM_SLEEP);
|
||||
|
||||
vbio->vbio_zio = zio;
|
||||
vbio->vbio_bdev = bdev;
|
||||
vbio->vbio_abd = NULL;
|
||||
vbio->vbio_max_segs = vdev_bio_max_segs(bdev);
|
||||
vbio->vbio_max_bytes = vdev_bio_max_bytes(bdev);
|
||||
vbio->vbio_lbs_mask = bdev_logical_block_size(bdev)-1;
|
||||
vbio->vbio_offset = zio->io_offset;
|
||||
vbio->vbio_bio = NULL;
|
||||
vbio->vbio_flags = flags;
|
||||
|
||||
return (vbio);
|
||||
}
|
||||
|
||||
BIO_END_IO_PROTO(vbio_completion, bio, error);
|
||||
|
||||
static int
|
||||
vbio_add_page(vbio_t *vbio, struct page *page, uint_t size, uint_t offset)
|
||||
{
|
||||
struct bio *bio = vbio->vbio_bio;
|
||||
uint_t ssize;
|
||||
|
||||
while (size > 0) {
|
||||
if (bio == NULL) {
|
||||
/* New BIO, allocate and set up */
|
||||
bio = vdev_bio_alloc(vbio->vbio_bdev, GFP_NOIO,
|
||||
vbio->vbio_max_segs);
|
||||
VERIFY(bio);
|
||||
|
||||
BIO_BI_SECTOR(bio) = vbio->vbio_offset >> 9;
|
||||
bio_set_op_attrs(bio,
|
||||
vbio->vbio_zio->io_type == ZIO_TYPE_WRITE ?
|
||||
WRITE : READ, vbio->vbio_flags);
|
||||
|
||||
if (vbio->vbio_bio) {
|
||||
bio_chain(vbio->vbio_bio, bio);
|
||||
vdev_submit_bio(vbio->vbio_bio);
|
||||
}
|
||||
vbio->vbio_bio = bio;
|
||||
}
|
||||
|
||||
/*
|
||||
* Only load as much of the current page data as will fit in
|
||||
* the space left in the BIO, respecting lbs alignment. Older
|
||||
* kernels will error if we try to overfill the BIO, while
|
||||
* newer ones will accept it and split the BIO. This ensures
|
||||
* everything works on older kernels, and avoids an additional
|
||||
* overhead on the new.
|
||||
*/
|
||||
ssize = MIN(size, (vbio->vbio_max_bytes - BIO_BI_SIZE(bio)) &
|
||||
~(vbio->vbio_lbs_mask));
|
||||
if (ssize > 0 &&
|
||||
bio_add_page(bio, page, ssize, offset) == ssize) {
|
||||
/* Accepted, adjust and load any remaining. */
|
||||
size -= ssize;
|
||||
offset += ssize;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* No room, set up for a new BIO and loop */
|
||||
vbio->vbio_offset += BIO_BI_SIZE(bio);
|
||||
|
||||
/* Signal new BIO allocation wanted */
|
||||
bio = NULL;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* Iterator callback to submit ABD pages to the vbio. */
|
||||
static int
|
||||
vbio_fill_cb(struct page *page, size_t off, size_t len, void *priv)
|
||||
{
|
||||
vbio_t *vbio = priv;
|
||||
return (vbio_add_page(vbio, page, len, off));
|
||||
}
|
||||
|
||||
/* Create some BIOs, fill them with data and submit them */
|
||||
static void
|
||||
vbio_submit(vbio_t *vbio, abd_t *abd, uint64_t size)
|
||||
{
|
||||
/*
|
||||
* We plug so we can submit the BIOs as we go and only unplug them when
|
||||
* they are fully created and submitted. This is important; if we don't
|
||||
* plug, then the kernel may start executing earlier BIOs while we're
|
||||
* still creating and executing later ones, and if the device goes
|
||||
* away while that's happening, older kernels can get confused and
|
||||
* trample memory.
|
||||
*/
|
||||
struct blk_plug plug;
|
||||
blk_start_plug(&plug);
|
||||
|
||||
(void) abd_iterate_page_func(abd, 0, size, vbio_fill_cb, vbio);
|
||||
ASSERT(vbio->vbio_bio);
|
||||
|
||||
vbio->vbio_bio->bi_end_io = vbio_completion;
|
||||
vbio->vbio_bio->bi_private = vbio;
|
||||
|
||||
/*
|
||||
* Once submitted, vbio_bio now owns vbio (through bi_private) and we
|
||||
* can't touch it again. The bio may complete and vbio_completion() be
|
||||
* called and free the vbio before this task is run again, so we must
|
||||
* consider it invalid from this point.
|
||||
*/
|
||||
vdev_submit_bio(vbio->vbio_bio);
|
||||
|
||||
blk_finish_plug(&plug);
|
||||
}
|
||||
|
||||
/* IO completion callback */
|
||||
BIO_END_IO_PROTO(vbio_completion, bio, error)
|
||||
{
|
||||
vbio_t *vbio = bio->bi_private;
|
||||
zio_t *zio = vbio->vbio_zio;
|
||||
|
||||
ASSERT(zio);
|
||||
|
||||
/* Capture and log any errors */
|
||||
#ifdef HAVE_1ARG_BIO_END_IO_T
|
||||
zio->io_error = BIO_END_IO_ERROR(bio);
|
||||
#else
|
||||
zio->io_error = 0;
|
||||
if (error)
|
||||
zio->io_error = -(error);
|
||||
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
|
||||
zio->io_error = EIO;
|
||||
#endif
|
||||
ASSERT3U(zio->io_error, >=, 0);
|
||||
|
||||
if (zio->io_error)
|
||||
vdev_disk_error(zio);
|
||||
|
||||
/* Return the BIO to the kernel */
|
||||
bio_put(bio);
|
||||
|
||||
/*
|
||||
* If we copied the ABD before issuing it, clean up and return the copy
|
||||
* to the ADB, with changes if appropriate.
|
||||
*/
|
||||
if (vbio->vbio_abd != NULL) {
|
||||
void *buf = abd_to_buf(vbio->vbio_abd);
|
||||
abd_free(vbio->vbio_abd);
|
||||
vbio->vbio_abd = NULL;
|
||||
|
||||
if (zio->io_type == ZIO_TYPE_READ)
|
||||
abd_return_buf_copy(zio->io_abd, buf, zio->io_size);
|
||||
else
|
||||
abd_return_buf(zio->io_abd, buf, zio->io_size);
|
||||
}
|
||||
|
||||
/* Final cleanup */
|
||||
kmem_free(vbio, sizeof (vbio_t));
|
||||
|
||||
/* All done, submit for processing */
|
||||
zio_delay_interrupt(zio);
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterator callback to count ABD pages and check their size & alignment.
|
||||
*
|
||||
* On Linux, each BIO segment can take a page pointer, and an offset+length of
|
||||
* the data within that page. A page can be arbitrarily large ("compound"
|
||||
* pages) but we still have to ensure the data portion is correctly sized and
|
||||
* aligned to the logical block size, to ensure that if the kernel wants to
|
||||
* split the BIO, the two halves will still be properly aligned.
|
||||
*/
|
||||
typedef struct {
|
||||
uint_t bmask;
|
||||
uint_t npages;
|
||||
uint_t end;
|
||||
} vdev_disk_check_pages_t;
|
||||
|
||||
static int
|
||||
vdev_disk_check_pages_cb(struct page *page, size_t off, size_t len, void *priv)
|
||||
{
|
||||
vdev_disk_check_pages_t *s = priv;
|
||||
|
||||
/*
|
||||
* If we didn't finish on a block size boundary last time, then there
|
||||
* would be a gap if we tried to use this ABD as-is, so abort.
|
||||
*/
|
||||
if (s->end != 0)
|
||||
return (1);
|
||||
|
||||
/*
|
||||
* Note if we're taking less than a full block, so we can check it
|
||||
* above on the next call.
|
||||
*/
|
||||
s->end = (off+len) & s->bmask;
|
||||
|
||||
/* All blocks after the first must start on a block size boundary. */
|
||||
if (s->npages != 0 && (off & s->bmask) != 0)
|
||||
return (1);
|
||||
|
||||
s->npages++;
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if we can submit the pages in this ABD to the kernel as-is. Returns
|
||||
* the number of pages, or 0 if it can't be submitted like this.
|
||||
*/
|
||||
static boolean_t
|
||||
vdev_disk_check_pages(abd_t *abd, uint64_t size, struct block_device *bdev)
|
||||
{
|
||||
vdev_disk_check_pages_t s = {
|
||||
.bmask = bdev_logical_block_size(bdev)-1,
|
||||
.npages = 0,
|
||||
.end = 0,
|
||||
};
|
||||
|
||||
if (abd_iterate_page_func(abd, 0, size, vdev_disk_check_pages_cb, &s))
|
||||
return (B_FALSE);
|
||||
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_disk_io_rw(zio_t *zio)
|
||||
{
|
||||
vdev_t *v = zio->io_vd;
|
||||
vdev_disk_t *vd = v->vdev_tsd;
|
||||
struct block_device *bdev = vd->vd_bdev;
|
||||
int flags = 0;
|
||||
|
||||
/*
|
||||
* Accessing outside the block device is never allowed.
|
||||
*/
|
||||
if (zio->io_offset + zio->io_size > bdev->bd_inode->i_size) {
|
||||
vdev_dbgmsg(zio->io_vd,
|
||||
"Illegal access %llu size %llu, device size %llu",
|
||||
(u_longlong_t)zio->io_offset,
|
||||
(u_longlong_t)zio->io_size,
|
||||
(u_longlong_t)i_size_read(bdev->bd_inode));
|
||||
return (SET_ERROR(EIO));
|
||||
}
|
||||
|
||||
if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
|
||||
bio_set_flags_failfast(bdev, &flags);
|
||||
|
||||
/*
|
||||
* Check alignment of the incoming ABD. If any part of it would require
|
||||
* submitting a page that is not aligned to the logical block size,
|
||||
* then we take a copy into a linear buffer and submit that instead.
|
||||
* This should be impossible on a 512b LBS, and fairly rare on 4K,
|
||||
* usually requiring abnormally-small data blocks (eg gang blocks)
|
||||
* mixed into the same ABD as larger ones (eg aggregated).
|
||||
*/
|
||||
abd_t *abd = zio->io_abd;
|
||||
if (!vdev_disk_check_pages(abd, zio->io_size, bdev)) {
|
||||
void *buf;
|
||||
if (zio->io_type == ZIO_TYPE_READ)
|
||||
buf = abd_borrow_buf(zio->io_abd, zio->io_size);
|
||||
else
|
||||
buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
|
||||
|
||||
/*
|
||||
* Wrap the copy in an abd_t, so we can use the same iterators
|
||||
* to count and fill the vbio later.
|
||||
*/
|
||||
abd = abd_get_from_buf(buf, zio->io_size);
|
||||
|
||||
/*
|
||||
* False here would mean the borrowed copy has an invalid
|
||||
* alignment too, which would mean we've somehow been passed a
|
||||
* linear ABD with an interior page that has a non-zero offset
|
||||
* or a size not a multiple of PAGE_SIZE. This is not possible.
|
||||
* It would mean either zio_buf_alloc() or its underlying
|
||||
* allocators have done something extremely strange, or our
|
||||
* math in vdev_disk_check_pages() is wrong. In either case,
|
||||
* something in seriously wrong and its not safe to continue.
|
||||
*/
|
||||
VERIFY(vdev_disk_check_pages(abd, zio->io_size, bdev));
|
||||
}
|
||||
|
||||
/* Allocate vbio, with a pointer to the borrowed ABD if necessary */
|
||||
vbio_t *vbio = vbio_alloc(zio, bdev, flags);
|
||||
if (abd != zio->io_abd)
|
||||
vbio->vbio_abd = abd;
|
||||
|
||||
/* Fill it with data pages and submit it to the kernel */
|
||||
vbio_submit(vbio, abd, zio->io_size);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* ========== */
|
||||
|
||||
/*
|
||||
* This is the classic, battle-tested BIO submission code. Until we're totally
|
||||
* sure that the new code is safe and correct in all cases, this will remain
|
||||
* available and can be enabled by setting zfs_vdev_disk_classic=1 at module
|
||||
* load time.
|
||||
*
|
||||
* These functions have been renamed to vdev_classic_* to make it clear what
|
||||
* they belong to, but their implementations are unchanged.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Virtual device vector for disks.
|
||||
*/
|
||||
typedef struct dio_request {
|
||||
zio_t *dr_zio; /* Parent ZIO */
|
||||
atomic_t dr_ref; /* References */
|
||||
int dr_error; /* Bio error */
|
||||
int dr_bio_count; /* Count of bio's */
|
||||
struct bio *dr_bio[]; /* Attached bio's */
|
||||
} dio_request_t;
|
||||
|
||||
static dio_request_t *
|
||||
vdev_classic_dio_alloc(int bio_count)
|
||||
{
|
||||
dio_request_t *dr = kmem_zalloc(sizeof (dio_request_t) +
|
||||
sizeof (struct bio *) * bio_count, KM_SLEEP);
|
||||
atomic_set(&dr->dr_ref, 0);
|
||||
dr->dr_bio_count = bio_count;
|
||||
dr->dr_error = 0;
|
||||
|
||||
for (int i = 0; i < dr->dr_bio_count; i++)
|
||||
dr->dr_bio[i] = NULL;
|
||||
|
||||
return (dr);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_classic_dio_free(dio_request_t *dr)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < dr->dr_bio_count; i++)
|
||||
if (dr->dr_bio[i])
|
||||
bio_put(dr->dr_bio[i]);
|
||||
|
||||
kmem_free(dr, sizeof (dio_request_t) +
|
||||
sizeof (struct bio *) * dr->dr_bio_count);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_classic_dio_get(dio_request_t *dr)
|
||||
{
|
||||
atomic_inc(&dr->dr_ref);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_classic_dio_put(dio_request_t *dr)
|
||||
{
|
||||
int rc = atomic_dec_return(&dr->dr_ref);
|
||||
|
||||
/*
|
||||
* Free the dio_request when the last reference is dropped and
|
||||
* ensure zio_interpret is called only once with the correct zio
|
||||
*/
|
||||
if (rc == 0) {
|
||||
zio_t *zio = dr->dr_zio;
|
||||
int error = dr->dr_error;
|
||||
|
||||
vdev_classic_dio_free(dr);
|
||||
|
||||
if (zio) {
|
||||
zio->io_error = error;
|
||||
ASSERT3S(zio->io_error, >=, 0);
|
||||
if (zio->io_error)
|
||||
vdev_disk_error(zio);
|
||||
|
||||
zio_delay_interrupt(zio);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BIO_END_IO_PROTO(vdev_classic_physio_completion, bio, error)
|
||||
{
|
||||
dio_request_t *dr = bio->bi_private;
|
||||
|
||||
if (dr->dr_error == 0) {
|
||||
#ifdef HAVE_1ARG_BIO_END_IO_T
|
||||
dr->dr_error = BIO_END_IO_ERROR(bio);
|
||||
#else
|
||||
if (error)
|
||||
dr->dr_error = -(error);
|
||||
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
|
||||
dr->dr_error = EIO;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Drop reference acquired by vdev_classic_physio */
|
||||
vdev_classic_dio_put(dr);
|
||||
}
|
||||
|
||||
static inline unsigned int
|
||||
vdev_classic_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
|
||||
{
|
||||
unsigned long nr_segs = abd_nr_pages_off(zio->io_abd,
|
||||
bio_size, abd_offset);
|
||||
|
||||
#ifdef HAVE_BIO_MAX_SEGS
|
||||
return (bio_max_segs(nr_segs));
|
||||
#else
|
||||
return (MIN(nr_segs, BIO_MAX_PAGES));
|
||||
#endif
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_classic_physio(zio_t *zio)
|
||||
{
|
||||
vdev_t *v = zio->io_vd;
|
||||
vdev_disk_t *vd = v->vdev_tsd;
|
||||
struct block_device *bdev = vd->vd_bdev;
|
||||
size_t io_size = zio->io_size;
|
||||
uint64_t io_offset = zio->io_offset;
|
||||
int rw = zio->io_type == ZIO_TYPE_READ ? READ : WRITE;
|
||||
int flags = 0;
|
||||
|
||||
dio_request_t *dr;
|
||||
uint64_t abd_offset;
|
||||
uint64_t bio_offset;
|
||||
|
@ -637,7 +1013,7 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio,
|
|||
}
|
||||
|
||||
retry:
|
||||
dr = vdev_disk_dio_alloc(bio_count);
|
||||
dr = vdev_classic_dio_alloc(bio_count);
|
||||
|
||||
if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
|
||||
bio_set_flags_failfast(bdev, &flags);
|
||||
|
@ -669,23 +1045,23 @@ retry:
|
|||
* this should be rare - see the comment above.
|
||||
*/
|
||||
if (dr->dr_bio_count == i) {
|
||||
vdev_disk_dio_free(dr);
|
||||
vdev_classic_dio_free(dr);
|
||||
bio_count *= 2;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
nr_vecs = vdev_bio_max_segs(bdev);
|
||||
nr_vecs = vdev_classic_bio_max_segs(zio, bio_size, abd_offset);
|
||||
dr->dr_bio[i] = vdev_bio_alloc(bdev, GFP_NOIO, nr_vecs);
|
||||
if (unlikely(dr->dr_bio[i] == NULL)) {
|
||||
vdev_disk_dio_free(dr);
|
||||
vdev_classic_dio_free(dr);
|
||||
return (SET_ERROR(ENOMEM));
|
||||
}
|
||||
|
||||
/* Matching put called by vdev_disk_physio_completion */
|
||||
vdev_disk_dio_get(dr);
|
||||
/* Matching put called by vdev_classic_physio_completion */
|
||||
vdev_classic_dio_get(dr);
|
||||
|
||||
BIO_BI_SECTOR(dr->dr_bio[i]) = bio_offset >> 9;
|
||||
dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion;
|
||||
dr->dr_bio[i]->bi_end_io = vdev_classic_physio_completion;
|
||||
dr->dr_bio[i]->bi_private = dr;
|
||||
bio_set_op_attrs(dr->dr_bio[i], rw, flags);
|
||||
|
||||
|
@ -707,7 +1083,7 @@ retry:
|
|||
}
|
||||
|
||||
/* Extra reference to protect dio_request during vdev_submit_bio */
|
||||
vdev_disk_dio_get(dr);
|
||||
vdev_classic_dio_get(dr);
|
||||
|
||||
if (dr->dr_bio_count > 1)
|
||||
blk_start_plug(&plug);
|
||||
|
@ -721,11 +1097,13 @@ retry:
|
|||
if (dr->dr_bio_count > 1)
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
(void) vdev_disk_dio_put(dr);
|
||||
(void) vdev_classic_dio_put(dr);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/* ========== */
|
||||
|
||||
BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error)
|
||||
{
|
||||
zio_t *zio = bio->bi_private;
|
||||
|
@ -795,12 +1173,14 @@ vdev_disk_io_trim(zio_t *zio)
|
|||
#endif
|
||||
}
|
||||
|
||||
int (*vdev_disk_io_rw_fn)(zio_t *zio) = NULL;
|
||||
|
||||
static void
|
||||
vdev_disk_io_start(zio_t *zio)
|
||||
{
|
||||
vdev_t *v = zio->io_vd;
|
||||
vdev_disk_t *vd = v->vdev_tsd;
|
||||
int rw, error;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* If the vdev is closed, it's likely in the REMOVED or FAULTED state.
|
||||
|
@ -879,13 +1259,6 @@ vdev_disk_io_start(zio_t *zio)
|
|||
rw_exit(&vd->vd_lock);
|
||||
zio_execute(zio);
|
||||
return;
|
||||
case ZIO_TYPE_WRITE:
|
||||
rw = WRITE;
|
||||
break;
|
||||
|
||||
case ZIO_TYPE_READ:
|
||||
rw = READ;
|
||||
break;
|
||||
|
||||
case ZIO_TYPE_TRIM:
|
||||
zio->io_error = vdev_disk_io_trim(zio);
|
||||
|
@ -893,23 +1266,34 @@ vdev_disk_io_start(zio_t *zio)
|
|||
zio_interrupt(zio);
|
||||
return;
|
||||
|
||||
case ZIO_TYPE_READ:
|
||||
case ZIO_TYPE_WRITE:
|
||||
zio->io_target_timestamp = zio_handle_io_delay(zio);
|
||||
error = vdev_disk_io_rw_fn(zio);
|
||||
rw_exit(&vd->vd_lock);
|
||||
if (error) {
|
||||
zio->io_error = error;
|
||||
zio_interrupt(zio);
|
||||
}
|
||||
return;
|
||||
|
||||
default:
|
||||
/*
|
||||
* Getting here means our parent vdev has made a very strange
|
||||
* request of us, and shouldn't happen. Assert here to force a
|
||||
* crash in dev builds, but in production return the IO
|
||||
* unhandled. The pool will likely suspend anyway but that's
|
||||
* nicer than crashing the kernel.
|
||||
*/
|
||||
ASSERT3S(zio->io_type, ==, -1);
|
||||
|
||||
rw_exit(&vd->vd_lock);
|
||||
zio->io_error = SET_ERROR(ENOTSUP);
|
||||
zio_interrupt(zio);
|
||||
return;
|
||||
}
|
||||
|
||||
zio->io_target_timestamp = zio_handle_io_delay(zio);
|
||||
error = __vdev_disk_physio(vd->vd_bdev, zio,
|
||||
zio->io_size, zio->io_offset, rw, 0);
|
||||
rw_exit(&vd->vd_lock);
|
||||
|
||||
if (error) {
|
||||
zio->io_error = error;
|
||||
zio_interrupt(zio);
|
||||
return;
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -958,8 +1342,49 @@ vdev_disk_rele(vdev_t *vd)
|
|||
/* XXX: Implement me as a vnode rele for the device */
|
||||
}
|
||||
|
||||
/*
|
||||
* BIO submission method. See comment above about vdev_classic.
|
||||
* Set zfs_vdev_disk_classic=0 for new, =1 for classic
|
||||
*/
|
||||
static uint_t zfs_vdev_disk_classic = 0; /* default new */
|
||||
|
||||
/* Set submission function from module parameter */
|
||||
static int
|
||||
vdev_disk_param_set_classic(const char *buf, zfs_kernel_param_t *kp)
|
||||
{
|
||||
int err = param_set_uint(buf, kp);
|
||||
if (err < 0)
|
||||
return (SET_ERROR(err));
|
||||
|
||||
vdev_disk_io_rw_fn =
|
||||
zfs_vdev_disk_classic ? vdev_classic_physio : vdev_disk_io_rw;
|
||||
|
||||
printk(KERN_INFO "ZFS: forcing %s BIO submission\n",
|
||||
zfs_vdev_disk_classic ? "classic" : "new");
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* At first use vdev use, set the submission function from the default value if
|
||||
* it hasn't been set already.
|
||||
*/
|
||||
static int
|
||||
vdev_disk_init(spa_t *spa, nvlist_t *nv, void **tsd)
|
||||
{
|
||||
(void) spa;
|
||||
(void) nv;
|
||||
(void) tsd;
|
||||
|
||||
if (vdev_disk_io_rw_fn == NULL)
|
||||
vdev_disk_io_rw_fn = zfs_vdev_disk_classic ?
|
||||
vdev_classic_physio : vdev_disk_io_rw;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
vdev_ops_t vdev_disk_ops = {
|
||||
.vdev_op_init = NULL,
|
||||
.vdev_op_init = vdev_disk_init,
|
||||
.vdev_op_fini = NULL,
|
||||
.vdev_op_open = vdev_disk_open,
|
||||
.vdev_op_close = vdev_disk_close,
|
||||
|
@ -1049,5 +1474,12 @@ param_set_max_auto_ashift(const char *buf, zfs_kernel_param_t *kp)
|
|||
ZFS_MODULE_PARAM(zfs_zio, zio_, suppress_zero_writes, INT, ZMOD_RW,
|
||||
"Do not send zero byte writes to hardware");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_vdev_disk, vdev_disk_, max_segs, ULONG, ZMOD_RW,
|
||||
"Maximum number of data segments to add to an IO request");
|
||||
ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, open_timeout_ms, UINT, ZMOD_RW,
|
||||
"Timeout before determining that a device is missing");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_vdev_disk, zfs_vdev_disk_, max_segs, UINT, ZMOD_RW,
|
||||
"Maximum number of data segments to add to an IO request (min 4)");
|
||||
|
||||
ZFS_MODULE_PARAM_CALL(zfs_vdev_disk, zfs_vdev_disk_, classic,
|
||||
vdev_disk_param_set_classic, param_get_uint, ZMOD_RD,
|
||||
"Use classic BIO submission method");
|
||||
|
|
|
@ -816,6 +816,48 @@ abd_iterate_func(abd_t *abd, size_t off, size_t size,
|
|||
return (ret);
|
||||
}
|
||||
|
||||
#if defined(__linux__) && defined(_KERNEL)
|
||||
int
|
||||
abd_iterate_page_func(abd_t *abd, size_t off, size_t size,
|
||||
abd_iter_page_func_t *func, void *private)
|
||||
{
|
||||
struct abd_iter aiter;
|
||||
int ret = 0;
|
||||
|
||||
if (size == 0)
|
||||
return (0);
|
||||
|
||||
abd_verify(abd);
|
||||
ASSERT3U(off + size, <=, abd->abd_size);
|
||||
|
||||
abd_t *c_abd = abd_init_abd_iter(abd, &aiter, off);
|
||||
|
||||
while (size > 0) {
|
||||
IMPLY(abd_is_gang(abd), c_abd != NULL);
|
||||
|
||||
abd_iter_page(&aiter);
|
||||
|
||||
size_t len = MIN(aiter.iter_page_dsize, size);
|
||||
ASSERT3U(len, >, 0);
|
||||
|
||||
ret = func(aiter.iter_page, aiter.iter_page_doff,
|
||||
len, private);
|
||||
|
||||
aiter.iter_page = NULL;
|
||||
aiter.iter_page_doff = 0;
|
||||
aiter.iter_page_dsize = 0;
|
||||
|
||||
if (ret != 0)
|
||||
break;
|
||||
|
||||
size -= len;
|
||||
c_abd = abd_advance_abd_iter(abd, c_abd, &aiter, len);
|
||||
}
|
||||
|
||||
return (ret);
|
||||
}
|
||||
#endif
|
||||
|
||||
struct buf_arg {
|
||||
void *arg_buf;
|
||||
};
|
||||
|
|
|
@ -8491,11 +8491,11 @@ l2arc_dev_get_next(void)
|
|||
break;
|
||||
|
||||
} while (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild ||
|
||||
next->l2ad_trim_all);
|
||||
next->l2ad_trim_all || next->l2ad_spa->spa_is_exporting);
|
||||
|
||||
/* if we were unable to find any usable vdevs, return NULL */
|
||||
if (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild ||
|
||||
next->l2ad_trim_all)
|
||||
next->l2ad_trim_all || next->l2ad_spa->spa_is_exporting)
|
||||
next = NULL;
|
||||
|
||||
l2arc_dev_last = next;
|
||||
|
@ -10145,7 +10145,8 @@ l2arc_spa_rebuild_start(spa_t *spa)
|
|||
void
|
||||
l2arc_spa_rebuild_stop(spa_t *spa)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_export_thread == curthread);
|
||||
|
||||
/*
|
||||
* Locate the spa's l2arc devices and kick off rebuild threads.
|
||||
|
|
|
@ -554,6 +554,21 @@ dbuf_evict_user(dmu_buf_impl_t *db)
|
|||
*dbu->dbu_clear_on_evict_dbufp = NULL;
|
||||
#endif
|
||||
|
||||
if (db->db_caching_status != DB_NO_CACHE) {
|
||||
/*
|
||||
* This is a cached dbuf, so the size of the user data is
|
||||
* included in its cached amount. We adjust it here because the
|
||||
* user data has already been detached from the dbuf, and the
|
||||
* sync functions are not supposed to touch it (the dbuf might
|
||||
* not exist anymore by the time the sync functions run.
|
||||
*/
|
||||
uint64_t size = dbu->dbu_size;
|
||||
(void) zfs_refcount_remove_many(
|
||||
&dbuf_caches[db->db_caching_status].size, size, db);
|
||||
if (db->db_caching_status == DB_DBUF_CACHE)
|
||||
DBUF_STAT_DECR(cache_levels_bytes[db->db_level], size);
|
||||
}
|
||||
|
||||
/*
|
||||
* There are two eviction callbacks - one that we call synchronously
|
||||
* and one that we invoke via a taskq. The async one is useful for
|
||||
|
@ -693,12 +708,12 @@ dbuf_evict_one(void)
|
|||
if (db != NULL) {
|
||||
multilist_sublist_remove(mls, db);
|
||||
multilist_sublist_unlock(mls);
|
||||
uint64_t size = db->db.db_size + dmu_buf_user_size(&db->db);
|
||||
(void) zfs_refcount_remove_many(
|
||||
&dbuf_caches[DB_DBUF_CACHE].size, db->db.db_size, db);
|
||||
&dbuf_caches[DB_DBUF_CACHE].size, size, db);
|
||||
DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]);
|
||||
DBUF_STAT_BUMPDOWN(cache_count);
|
||||
DBUF_STAT_DECR(cache_levels_bytes[db->db_level],
|
||||
db->db.db_size);
|
||||
DBUF_STAT_DECR(cache_levels_bytes[db->db_level], size);
|
||||
ASSERT3U(db->db_caching_status, ==, DB_DBUF_CACHE);
|
||||
db->db_caching_status = DB_NO_CACHE;
|
||||
dbuf_destroy(db);
|
||||
|
@ -2808,6 +2823,8 @@ dbuf_destroy(dmu_buf_impl_t *db)
|
|||
db->db_caching_status == DB_DBUF_METADATA_CACHE);
|
||||
|
||||
multilist_remove(&dbuf_caches[db->db_caching_status].cache, db);
|
||||
|
||||
ASSERT0(dmu_buf_user_size(&db->db));
|
||||
(void) zfs_refcount_remove_many(
|
||||
&dbuf_caches[db->db_caching_status].size,
|
||||
db->db.db_size, db);
|
||||
|
@ -3540,17 +3557,17 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
|
|||
db->db_caching_status == DB_DBUF_METADATA_CACHE);
|
||||
|
||||
multilist_remove(&dbuf_caches[db->db_caching_status].cache, db);
|
||||
|
||||
uint64_t size = db->db.db_size + dmu_buf_user_size(&db->db);
|
||||
(void) zfs_refcount_remove_many(
|
||||
&dbuf_caches[db->db_caching_status].size,
|
||||
db->db.db_size, db);
|
||||
&dbuf_caches[db->db_caching_status].size, size, db);
|
||||
|
||||
if (db->db_caching_status == DB_DBUF_METADATA_CACHE) {
|
||||
DBUF_STAT_BUMPDOWN(metadata_cache_count);
|
||||
} else {
|
||||
DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]);
|
||||
DBUF_STAT_BUMPDOWN(cache_count);
|
||||
DBUF_STAT_DECR(cache_levels_bytes[db->db_level],
|
||||
db->db.db_size);
|
||||
DBUF_STAT_DECR(cache_levels_bytes[db->db_level], size);
|
||||
}
|
||||
db->db_caching_status = DB_NO_CACHE;
|
||||
}
|
||||
|
@ -3782,7 +3799,8 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting)
|
|||
db->db_caching_status = dcs;
|
||||
|
||||
multilist_insert(&dbuf_caches[dcs].cache, db);
|
||||
uint64_t db_size = db->db.db_size;
|
||||
uint64_t db_size = db->db.db_size +
|
||||
dmu_buf_user_size(&db->db);
|
||||
size = zfs_refcount_add_many(
|
||||
&dbuf_caches[dcs].size, db_size, db);
|
||||
uint8_t db_level = db->db_level;
|
||||
|
@ -3885,6 +3903,35 @@ dmu_buf_get_user(dmu_buf_t *db_fake)
|
|||
return (db->db_user);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
dmu_buf_user_size(dmu_buf_t *db_fake)
|
||||
{
|
||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
|
||||
if (db->db_user == NULL)
|
||||
return (0);
|
||||
return (atomic_load_64(&db->db_user->dbu_size));
|
||||
}
|
||||
|
||||
void
|
||||
dmu_buf_add_user_size(dmu_buf_t *db_fake, uint64_t nadd)
|
||||
{
|
||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
|
||||
ASSERT3U(db->db_caching_status, ==, DB_NO_CACHE);
|
||||
ASSERT3P(db->db_user, !=, NULL);
|
||||
ASSERT3U(atomic_load_64(&db->db_user->dbu_size), <, UINT64_MAX - nadd);
|
||||
atomic_add_64(&db->db_user->dbu_size, nadd);
|
||||
}
|
||||
|
||||
void
|
||||
dmu_buf_sub_user_size(dmu_buf_t *db_fake, uint64_t nsub)
|
||||
{
|
||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
|
||||
ASSERT3U(db->db_caching_status, ==, DB_NO_CACHE);
|
||||
ASSERT3P(db->db_user, !=, NULL);
|
||||
ASSERT3U(atomic_load_64(&db->db_user->dbu_size), >=, nsub);
|
||||
atomic_sub_64(&db->db_user->dbu_size, nsub);
|
||||
}
|
||||
|
||||
void
|
||||
dmu_buf_user_evict_wait(void)
|
||||
{
|
||||
|
|
|
@ -46,14 +46,14 @@ static int
|
|||
dbuf_stats_hash_table_headers(char *buf, size_t size)
|
||||
{
|
||||
(void) snprintf(buf, size,
|
||||
"%-96s | %-119s | %s\n"
|
||||
"%-16s %-8s %-8s %-8s %-8s %-10s %-8s %-5s %-5s %-7s %3s | "
|
||||
"%-105s | %-119s | %s\n"
|
||||
"%-16s %-8s %-8s %-8s %-8s %-10s %-8s %-8s %-5s %-5s %-7s %3s | "
|
||||
"%-5s %-5s %-9s %-6s %-8s %-12s "
|
||||
"%-6s %-6s %-6s %-6s %-6s %-8s %-8s %-8s %-6s | "
|
||||
"%-6s %-6s %-8s %-8s %-6s %-6s %-6s %-8s %-8s\n",
|
||||
"dbuf", "arcbuf", "dnode", "pool", "objset", "object", "level",
|
||||
"blkid", "offset", "dbsize", "meta", "state", "dbholds", "dbc",
|
||||
"list", "atype", "flags", "count", "asize", "access",
|
||||
"blkid", "offset", "dbsize", "usize", "meta", "state", "dbholds",
|
||||
"dbc", "list", "atype", "flags", "count", "asize", "access",
|
||||
"mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr", "l2_asize",
|
||||
"l2_comp", "aholds", "dtype", "btype", "data_bs", "meta_bs",
|
||||
"bsize", "lvls", "dholds", "blocks", "dsize");
|
||||
|
@ -75,8 +75,8 @@ __dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db)
|
|||
__dmu_object_info_from_dnode(dn, &doi);
|
||||
|
||||
nwritten = snprintf(buf, size,
|
||||
"%-16s %-8llu %-8lld %-8lld %-8lld %-10llu %-8llu %-5d %-5d "
|
||||
"%-7lu %-3d | %-5d %-5d 0x%-7x %-6lu %-8llu %-12llu "
|
||||
"%-16s %-8llu %-8lld %-8lld %-8lld %-10llu %-8llu %-8llu "
|
||||
"%-5d %-5d %-7lu %-3d | %-5d %-5d 0x%-7x %-6lu %-8llu %-12llu "
|
||||
"%-6lu %-6lu %-6lu %-6lu %-6lu %-8llu %-8llu %-8d %-6lu | "
|
||||
"%-6d %-6d %-8lu %-8lu %-6llu %-6lu %-6lu %-8llu %-8llu\n",
|
||||
/* dmu_buf_impl_t */
|
||||
|
@ -87,6 +87,7 @@ __dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db)
|
|||
(longlong_t)db->db_blkid,
|
||||
(u_longlong_t)db->db.db_offset,
|
||||
(u_longlong_t)db->db.db_size,
|
||||
(u_longlong_t)dmu_buf_user_size(&db->db),
|
||||
!!dbuf_is_metadata(db),
|
||||
db->db_state,
|
||||
(ulong_t)zfs_refcount_count(&db->db_holds),
|
||||
|
|
|
@ -1120,9 +1120,11 @@ dnode_check_slots_free(dnode_children_t *children, int idx, int slots)
|
|||
return (B_TRUE);
|
||||
}
|
||||
|
||||
static void
|
||||
static uint_t
|
||||
dnode_reclaim_slots(dnode_children_t *children, int idx, int slots)
|
||||
{
|
||||
uint_t reclaimed = 0;
|
||||
|
||||
ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
|
||||
|
||||
for (int i = idx; i < idx + slots; i++) {
|
||||
|
@ -1134,8 +1136,11 @@ dnode_reclaim_slots(dnode_children_t *children, int idx, int slots)
|
|||
ASSERT3S(dnh->dnh_dnode->dn_type, ==, DMU_OT_NONE);
|
||||
dnode_destroy(dnh->dnh_dnode);
|
||||
dnh->dnh_dnode = DN_SLOT_FREE;
|
||||
reclaimed++;
|
||||
}
|
||||
}
|
||||
|
||||
return (reclaimed);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1448,6 +1453,8 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
|
|||
} else {
|
||||
dn = dnode_create(os, dn_block + idx, db,
|
||||
object, dnh);
|
||||
dmu_buf_add_user_size(&db->db,
|
||||
sizeof (dnode_t));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1505,8 +1512,13 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
|
|||
* to be freed. Single slot dnodes can be safely
|
||||
* re-purposed as a performance optimization.
|
||||
*/
|
||||
if (slots > 1)
|
||||
dnode_reclaim_slots(dnc, idx + 1, slots - 1);
|
||||
if (slots > 1) {
|
||||
uint_t reclaimed =
|
||||
dnode_reclaim_slots(dnc, idx + 1, slots - 1);
|
||||
if (reclaimed > 0)
|
||||
dmu_buf_sub_user_size(&db->db,
|
||||
reclaimed * sizeof (dnode_t));
|
||||
}
|
||||
|
||||
dnh = &dnc->dnc_children[idx];
|
||||
if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) {
|
||||
|
@ -1514,6 +1526,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
|
|||
} else {
|
||||
dn = dnode_create(os, dn_block + idx, db,
|
||||
object, dnh);
|
||||
dmu_buf_add_user_size(&db->db, sizeof (dnode_t));
|
||||
}
|
||||
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
|
|
|
@ -662,12 +662,13 @@ mmp_thread(void *arg)
|
|||
(gethrtime() - mmp->mmp_last_write) > mmp_fail_ns) {
|
||||
zfs_dbgmsg("MMP suspending pool '%s': gethrtime %llu "
|
||||
"mmp_last_write %llu mmp_interval %llu "
|
||||
"mmp_fail_intervals %llu mmp_fail_ns %llu",
|
||||
"mmp_fail_intervals %llu mmp_fail_ns %llu txg %llu",
|
||||
spa_name(spa), (u_longlong_t)gethrtime(),
|
||||
(u_longlong_t)mmp->mmp_last_write,
|
||||
(u_longlong_t)mmp_interval,
|
||||
(u_longlong_t)mmp_fail_intervals,
|
||||
(u_longlong_t)mmp_fail_ns);
|
||||
(u_longlong_t)mmp_fail_ns,
|
||||
(u_longlong_t)spa->spa_uberblock.ub_txg);
|
||||
cmn_err(CE_WARN, "MMP writes to pool '%s' have not "
|
||||
"succeeded in over %llu ms; suspending pool. "
|
||||
"Hrtime %llu",
|
||||
|
|
|
@ -54,3 +54,11 @@ slack_compress(void *src, void *dst, size_t s_len, size_t d_len, int level)
|
|||
memcpy(dst, src, c_len);
|
||||
return (c_len);
|
||||
}
|
||||
|
||||
int
|
||||
slack_decompress(void *src, void *dst, size_t s_len, size_t d_len, int level)
|
||||
{
|
||||
ASSERT3U(d_len, >=, s_len);
|
||||
memcpy(dst, src, s_len);
|
||||
return (0);
|
||||
}
|
||||
|
|
576
module/zfs/spa.c
576
module/zfs/spa.c
|
@ -33,6 +33,7 @@
|
|||
* Copyright 2017 Joyent, Inc.
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
* Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
|
||||
* Copyright (c) 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
|
@ -87,6 +88,7 @@
|
|||
#include <sys/zfeature.h>
|
||||
#include <sys/dsl_destroy.h>
|
||||
#include <sys/zvol.h>
|
||||
#include <sys/trace_zfs.h>
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/fm/protocol.h>
|
||||
|
@ -150,7 +152,7 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
|
|||
* and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that
|
||||
* need to be handled with minimum delay.
|
||||
*/
|
||||
const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
|
||||
static zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
|
||||
/* ISSUE ISSUE_HIGH INTR INTR_HIGH */
|
||||
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */
|
||||
{ ZTI_N(8), ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* READ */
|
||||
|
@ -172,6 +174,14 @@ uint_t zio_taskq_batch_tpq; /* threads per taskq */
|
|||
boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */
|
||||
uint_t zio_taskq_basedc = 80; /* base duty cycle */
|
||||
|
||||
/*
|
||||
* If enabled, try to find an unlocked IO taskq to dispatch an IO onto before
|
||||
* falling back to waiting on a lock. This should only be enabled in
|
||||
* conjunction with careful performance testing, and will likely require
|
||||
* zio_taskq_read/zio_taskq_write to be adjusted as well.
|
||||
*/
|
||||
boolean_t zio_taskq_trylock = B_FALSE;
|
||||
|
||||
boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */
|
||||
|
||||
/*
|
||||
|
@ -982,6 +992,9 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
|
|||
uint_t cpus, flags = TASKQ_DYNAMIC;
|
||||
boolean_t batch = B_FALSE;
|
||||
|
||||
tqs->stqs_type = q;
|
||||
tqs->stqs_zio_type = t;
|
||||
|
||||
switch (mode) {
|
||||
case ZTI_MODE_FIXED:
|
||||
ASSERT3U(value, >, 0);
|
||||
|
@ -1114,29 +1127,313 @@ spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
|
|||
tqs->stqs_taskq = NULL;
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
/*
|
||||
* The READ and WRITE rows of zio_taskqs are configurable at module load time
|
||||
* by setting zio_taskq_read or zio_taskq_write.
|
||||
*
|
||||
* Example (the defaults for READ and WRITE)
|
||||
* zio_taskq_read='fixed,1,8 null scale null'
|
||||
* zio_taskq_write='batch fixed,1,5 scale fixed,1,5'
|
||||
*
|
||||
* Each sets the entire row at a time.
|
||||
*
|
||||
* 'fixed' is parameterised: fixed,Q,T where Q is number of taskqs, T is number
|
||||
* of threads per taskq.
|
||||
*
|
||||
* 'null' can only be set on the high-priority queues (queue selection for
|
||||
* high-priority queues will fall back to the regular queue if the high-pri
|
||||
* is NULL.
|
||||
*/
|
||||
static const char *const modes[ZTI_NMODES] = {
|
||||
"fixed", "batch", "scale", "null"
|
||||
};
|
||||
|
||||
/* Parse the incoming config string. Modifies cfg */
|
||||
static int
|
||||
spa_taskq_param_set(zio_type_t t, char *cfg)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
zio_taskq_info_t row[ZIO_TASKQ_TYPES] = {{0}};
|
||||
|
||||
char *next = cfg, *tok, *c;
|
||||
|
||||
/*
|
||||
* Parse out each element from the string and fill `row`. The entire
|
||||
* row has to be set at once, so any errors are flagged by just
|
||||
* breaking out of this loop early.
|
||||
*/
|
||||
uint_t q;
|
||||
for (q = 0; q < ZIO_TASKQ_TYPES; q++) {
|
||||
/* `next` is the start of the config */
|
||||
if (next == NULL)
|
||||
break;
|
||||
|
||||
/* Eat up leading space */
|
||||
while (isspace(*next))
|
||||
next++;
|
||||
if (*next == '\0')
|
||||
break;
|
||||
|
||||
/* Mode ends at space or end of string */
|
||||
tok = next;
|
||||
next = strchr(tok, ' ');
|
||||
if (next != NULL) *next++ = '\0';
|
||||
|
||||
/* Parameters start after a comma */
|
||||
c = strchr(tok, ',');
|
||||
if (c != NULL) *c++ = '\0';
|
||||
|
||||
/* Match mode string */
|
||||
uint_t mode;
|
||||
for (mode = 0; mode < ZTI_NMODES; mode++)
|
||||
if (strcmp(tok, modes[mode]) == 0)
|
||||
break;
|
||||
if (mode == ZTI_NMODES)
|
||||
break;
|
||||
|
||||
/* Invalid canary */
|
||||
row[q].zti_mode = ZTI_NMODES;
|
||||
|
||||
/* Per-mode setup */
|
||||
switch (mode) {
|
||||
|
||||
/*
|
||||
* FIXED is parameterised: number of queues, and number of
|
||||
* threads per queue.
|
||||
*/
|
||||
case ZTI_MODE_FIXED: {
|
||||
/* No parameters? */
|
||||
if (c == NULL || *c == '\0')
|
||||
break;
|
||||
|
||||
/* Find next parameter */
|
||||
tok = c;
|
||||
c = strchr(tok, ',');
|
||||
if (c == NULL)
|
||||
break;
|
||||
|
||||
/* Take digits and convert */
|
||||
unsigned long long nq;
|
||||
if (!(isdigit(*tok)))
|
||||
break;
|
||||
err = ddi_strtoull(tok, &tok, 10, &nq);
|
||||
/* Must succeed and also end at the next param sep */
|
||||
if (err != 0 || tok != c)
|
||||
break;
|
||||
|
||||
/* Move past the comma */
|
||||
tok++;
|
||||
/* Need another number */
|
||||
if (!(isdigit(*tok)))
|
||||
break;
|
||||
/* Remember start to make sure we moved */
|
||||
c = tok;
|
||||
|
||||
/* Take digits */
|
||||
unsigned long long ntpq;
|
||||
err = ddi_strtoull(tok, &tok, 10, &ntpq);
|
||||
/* Must succeed, and moved forward */
|
||||
if (err != 0 || tok == c || *tok != '\0')
|
||||
break;
|
||||
|
||||
/*
|
||||
* sanity; zero queues/threads make no sense, and
|
||||
* 16K is almost certainly more than anyone will ever
|
||||
* need and avoids silly numbers like UINT32_MAX
|
||||
*/
|
||||
if (nq == 0 || nq >= 16384 ||
|
||||
ntpq == 0 || ntpq >= 16384)
|
||||
break;
|
||||
|
||||
const zio_taskq_info_t zti = ZTI_P(ntpq, nq);
|
||||
row[q] = zti;
|
||||
break;
|
||||
}
|
||||
|
||||
case ZTI_MODE_BATCH: {
|
||||
const zio_taskq_info_t zti = ZTI_BATCH;
|
||||
row[q] = zti;
|
||||
break;
|
||||
}
|
||||
|
||||
case ZTI_MODE_SCALE: {
|
||||
const zio_taskq_info_t zti = ZTI_SCALE;
|
||||
row[q] = zti;
|
||||
break;
|
||||
}
|
||||
|
||||
case ZTI_MODE_NULL: {
|
||||
/*
|
||||
* Can only null the high-priority queues; the general-
|
||||
* purpose ones have to exist.
|
||||
*/
|
||||
if (q != ZIO_TASKQ_ISSUE_HIGH &&
|
||||
q != ZIO_TASKQ_INTERRUPT_HIGH)
|
||||
break;
|
||||
|
||||
const zio_taskq_info_t zti = ZTI_NULL;
|
||||
row[q] = zti;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* Ensure we set a mode */
|
||||
if (row[q].zti_mode == ZTI_NMODES)
|
||||
break;
|
||||
}
|
||||
|
||||
/* Didn't get a full row, fail */
|
||||
if (q < ZIO_TASKQ_TYPES)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
/* Eat trailing space */
|
||||
if (next != NULL)
|
||||
while (isspace(*next))
|
||||
next++;
|
||||
|
||||
/* If there's anything left over then fail */
|
||||
if (next != NULL && *next != '\0')
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
/* Success! Copy it into the real config */
|
||||
for (q = 0; q < ZIO_TASKQ_TYPES; q++)
|
||||
zio_taskqs[t][q] = row[q];
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
spa_taskq_param_get(zio_type_t t, char *buf, boolean_t add_newline)
|
||||
{
|
||||
int pos = 0;
|
||||
|
||||
/* Build paramater string from live config */
|
||||
const char *sep = "";
|
||||
for (uint_t q = 0; q < ZIO_TASKQ_TYPES; q++) {
|
||||
const zio_taskq_info_t *zti = &zio_taskqs[t][q];
|
||||
if (zti->zti_mode == ZTI_MODE_FIXED)
|
||||
pos += sprintf(&buf[pos], "%s%s,%u,%u", sep,
|
||||
modes[zti->zti_mode], zti->zti_count,
|
||||
zti->zti_value);
|
||||
else
|
||||
pos += sprintf(&buf[pos], "%s%s", sep,
|
||||
modes[zti->zti_mode]);
|
||||
sep = " ";
|
||||
}
|
||||
|
||||
if (add_newline)
|
||||
buf[pos++] = '\n';
|
||||
buf[pos] = '\0';
|
||||
|
||||
return (pos);
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
static int
|
||||
spa_taskq_read_param_set(const char *val, zfs_kernel_param_t *kp)
|
||||
{
|
||||
char *cfg = kmem_strdup(val);
|
||||
int err = spa_taskq_param_set(ZIO_TYPE_READ, cfg);
|
||||
kmem_free(cfg, strlen(val)+1);
|
||||
return (-err);
|
||||
}
|
||||
static int
|
||||
spa_taskq_read_param_get(char *buf, zfs_kernel_param_t *kp)
|
||||
{
|
||||
return (spa_taskq_param_get(ZIO_TYPE_READ, buf, TRUE));
|
||||
}
|
||||
|
||||
static int
|
||||
spa_taskq_write_param_set(const char *val, zfs_kernel_param_t *kp)
|
||||
{
|
||||
char *cfg = kmem_strdup(val);
|
||||
int err = spa_taskq_param_set(ZIO_TYPE_WRITE, cfg);
|
||||
kmem_free(cfg, strlen(val)+1);
|
||||
return (-err);
|
||||
}
|
||||
static int
|
||||
spa_taskq_write_param_get(char *buf, zfs_kernel_param_t *kp)
|
||||
{
|
||||
return (spa_taskq_param_get(ZIO_TYPE_WRITE, buf, TRUE));
|
||||
}
|
||||
#else
|
||||
/*
|
||||
* On FreeBSD load-time parameters can be set up before malloc() is available,
|
||||
* so we have to do all the parsing work on the stack.
|
||||
*/
|
||||
#define SPA_TASKQ_PARAM_MAX (128)
|
||||
|
||||
static int
|
||||
spa_taskq_read_param(ZFS_MODULE_PARAM_ARGS)
|
||||
{
|
||||
char buf[SPA_TASKQ_PARAM_MAX];
|
||||
int err;
|
||||
|
||||
(void) spa_taskq_param_get(ZIO_TYPE_READ, buf, FALSE);
|
||||
err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
|
||||
if (err || req->newptr == NULL)
|
||||
return (err);
|
||||
return (spa_taskq_param_set(ZIO_TYPE_READ, buf));
|
||||
}
|
||||
|
||||
static int
|
||||
spa_taskq_write_param(ZFS_MODULE_PARAM_ARGS)
|
||||
{
|
||||
char buf[SPA_TASKQ_PARAM_MAX];
|
||||
int err;
|
||||
|
||||
(void) spa_taskq_param_get(ZIO_TYPE_WRITE, buf, FALSE);
|
||||
err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
|
||||
if (err || req->newptr == NULL)
|
||||
return (err);
|
||||
return (spa_taskq_param_set(ZIO_TYPE_WRITE, buf));
|
||||
}
|
||||
#endif
|
||||
#endif /* _KERNEL */
|
||||
|
||||
/*
|
||||
* Dispatch a task to the appropriate taskq for the ZFS I/O type and priority.
|
||||
* Note that a type may have multiple discrete taskqs to avoid lock contention
|
||||
* on the taskq itself. In that case we choose which taskq at random by using
|
||||
* the low bits of gethrtime().
|
||||
* on the taskq itself. In that case we try each one until it goes in, before
|
||||
* falling back to waiting on a lock.
|
||||
*/
|
||||
void
|
||||
spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
|
||||
task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent)
|
||||
{
|
||||
spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
|
||||
taskq_t *tq;
|
||||
|
||||
ASSERT3P(tqs->stqs_taskq, !=, NULL);
|
||||
ASSERT3U(tqs->stqs_count, !=, 0);
|
||||
|
||||
DTRACE_PROBE2(spa_taskqs_ent__dispatch,
|
||||
spa_taskqs_t *, tqs, taskq_ent_t *, ent);
|
||||
|
||||
if (tqs->stqs_count == 1) {
|
||||
tq = tqs->stqs_taskq[0];
|
||||
} else {
|
||||
tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count];
|
||||
taskq_dispatch_ent(tqs->stqs_taskq[0], func, arg, flags, ent);
|
||||
goto out;
|
||||
}
|
||||
|
||||
taskq_dispatch_ent(tq, func, arg, flags, ent);
|
||||
int select = ((uint64_t)gethrtime()) % tqs->stqs_count;
|
||||
if (zio_taskq_trylock) {
|
||||
for (int i = 0; i < tqs->stqs_count; i++) {
|
||||
if (taskq_try_dispatch_ent(
|
||||
tqs->stqs_taskq[select], func, arg, flags, ent))
|
||||
goto out;
|
||||
select = (select+1) % tqs->stqs_count;
|
||||
}
|
||||
}
|
||||
|
||||
taskq_dispatch_ent(tqs->stqs_taskq[select], func, arg, flags, ent);
|
||||
|
||||
out:
|
||||
DTRACE_PROBE2(spa_taskqs_ent__dispatched,
|
||||
spa_taskqs_t *, tqs, taskq_ent_t *, ent);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1619,7 +1916,8 @@ spa_unload(spa_t *spa, txg_wait_flag_t txg_how)
|
|||
vdev_t *vd;
|
||||
uint64_t t, txg;
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_export_thread == curthread);
|
||||
ASSERT(spa_state(spa) != POOL_STATE_UNINITIALIZED);
|
||||
|
||||
spa_import_progress_remove(spa_guid(spa));
|
||||
|
@ -2931,8 +3229,6 @@ spa_spawn_aux_threads(spa_t *spa)
|
|||
{
|
||||
ASSERT(spa_writeable(spa));
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
|
||||
spa_start_indirect_condensing_thread(spa);
|
||||
spa_start_livelist_destroy_thread(spa);
|
||||
spa_start_livelist_condensing_thread(spa);
|
||||
|
@ -3035,6 +3331,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type)
|
|||
spa->spa_load_state = state;
|
||||
(void) spa_import_progress_set_state(spa_guid(spa),
|
||||
spa_load_state(spa));
|
||||
spa_import_progress_set_notes(spa, "spa_load()");
|
||||
|
||||
gethrestime(&spa->spa_loaded_ts);
|
||||
error = spa_load_impl(spa, type, &ereport);
|
||||
|
@ -3244,18 +3541,23 @@ spa_activity_check_duration(spa_t *spa, uberblock_t *ub)
|
|||
}
|
||||
|
||||
/*
|
||||
* Perform the import activity check. If the user canceled the import or
|
||||
* we detected activity then fail.
|
||||
* Remote host activity check.
|
||||
*
|
||||
* error results:
|
||||
* 0 - no activity detected
|
||||
* EREMOTEIO - remote activity detected
|
||||
* EINTR - user canceled the operation
|
||||
*/
|
||||
static int
|
||||
spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config)
|
||||
spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config,
|
||||
boolean_t importing)
|
||||
{
|
||||
uint64_t txg = ub->ub_txg;
|
||||
uint64_t timestamp = ub->ub_timestamp;
|
||||
uint64_t mmp_config = ub->ub_mmp_config;
|
||||
uint16_t mmp_seq = MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0;
|
||||
uint64_t import_delay;
|
||||
hrtime_t import_expire;
|
||||
hrtime_t import_expire, now;
|
||||
nvlist_t *mmp_label = NULL;
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
kcondvar_t cv;
|
||||
|
@ -3293,9 +3595,23 @@ spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config)
|
|||
|
||||
import_expire = gethrtime() + import_delay;
|
||||
|
||||
while (gethrtime() < import_expire) {
|
||||
(void) spa_import_progress_set_mmp_check(spa_guid(spa),
|
||||
NSEC2SEC(import_expire - gethrtime()));
|
||||
if (importing) {
|
||||
spa_import_progress_set_notes(spa, "Checking MMP activity, "
|
||||
"waiting %llu ms", (u_longlong_t)NSEC2MSEC(import_delay));
|
||||
}
|
||||
|
||||
int iterations = 0;
|
||||
while ((now = gethrtime()) < import_expire) {
|
||||
if (importing && iterations++ % 30 == 0) {
|
||||
spa_import_progress_set_notes(spa, "Checking MMP "
|
||||
"activity, %llu ms remaining",
|
||||
(u_longlong_t)NSEC2MSEC(import_expire - now));
|
||||
}
|
||||
|
||||
if (importing) {
|
||||
(void) spa_import_progress_set_mmp_check(spa_guid(spa),
|
||||
NSEC2SEC(import_expire - gethrtime()));
|
||||
}
|
||||
|
||||
vdev_uberblock_load(rvd, ub, &mmp_label);
|
||||
|
||||
|
@ -3377,6 +3693,61 @@ out:
|
|||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from zfs_ioc_clear for a pool that was suspended
|
||||
* after failing mmp write checks.
|
||||
*/
|
||||
boolean_t
|
||||
spa_mmp_remote_host_activity(spa_t *spa)
|
||||
{
|
||||
ASSERT(spa_multihost(spa) && spa_suspended(spa));
|
||||
|
||||
nvlist_t *best_label;
|
||||
uberblock_t best_ub;
|
||||
|
||||
/*
|
||||
* Locate the best uberblock on disk
|
||||
*/
|
||||
vdev_uberblock_load(spa->spa_root_vdev, &best_ub, &best_label);
|
||||
if (best_label) {
|
||||
/*
|
||||
* confirm that the best hostid matches our hostid
|
||||
*/
|
||||
if (nvlist_exists(best_label, ZPOOL_CONFIG_HOSTID) &&
|
||||
spa_get_hostid(spa) !=
|
||||
fnvlist_lookup_uint64(best_label, ZPOOL_CONFIG_HOSTID)) {
|
||||
nvlist_free(best_label);
|
||||
return (B_TRUE);
|
||||
}
|
||||
nvlist_free(best_label);
|
||||
} else {
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
if (!MMP_VALID(&best_ub) ||
|
||||
!MMP_FAIL_INT_VALID(&best_ub) ||
|
||||
MMP_FAIL_INT(&best_ub) == 0) {
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
if (best_ub.ub_txg != spa->spa_uberblock.ub_txg ||
|
||||
best_ub.ub_timestamp != spa->spa_uberblock.ub_timestamp) {
|
||||
zfs_dbgmsg("txg mismatch detected during pool clear "
|
||||
"txg %llu ub_txg %llu timestamp %llu ub_timestamp %llu",
|
||||
(u_longlong_t)spa->spa_uberblock.ub_txg,
|
||||
(u_longlong_t)best_ub.ub_txg,
|
||||
(u_longlong_t)spa->spa_uberblock.ub_timestamp,
|
||||
(u_longlong_t)best_ub.ub_timestamp);
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform an activity check looking for any remote writer
|
||||
*/
|
||||
return (spa_activity_check(spa, &spa->spa_uberblock, spa->spa_config,
|
||||
B_FALSE) != 0);
|
||||
}
|
||||
|
||||
static int
|
||||
spa_verify_host(spa_t *spa, nvlist_t *mos_config)
|
||||
{
|
||||
|
@ -3697,7 +4068,8 @@ spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type)
|
|||
return (spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO));
|
||||
}
|
||||
|
||||
int error = spa_activity_check(spa, ub, spa->spa_config);
|
||||
int error =
|
||||
spa_activity_check(spa, ub, spa->spa_config, B_TRUE);
|
||||
if (error) {
|
||||
nvlist_free(label);
|
||||
return (error);
|
||||
|
@ -3904,6 +4276,24 @@ spa_ld_trusted_config(spa_t *spa, spa_import_type_t type,
|
|||
rvd = mrvd;
|
||||
spa_config_exit(spa, SCL_ALL, FTAG);
|
||||
|
||||
/*
|
||||
* If 'zpool import' used a cached config, then the on-disk hostid and
|
||||
* hostname may be different to the cached config in ways that should
|
||||
* prevent import. Userspace can't discover this without a scan, but
|
||||
* we know, so we add these values to LOAD_INFO so the caller can know
|
||||
* the difference.
|
||||
*
|
||||
* Note that we have to do this before the config is regenerated,
|
||||
* because the new config will have the hostid and hostname for this
|
||||
* host, in readiness for import.
|
||||
*/
|
||||
if (nvlist_exists(mos_config, ZPOOL_CONFIG_HOSTID))
|
||||
fnvlist_add_uint64(spa->spa_load_info, ZPOOL_CONFIG_HOSTID,
|
||||
fnvlist_lookup_uint64(mos_config, ZPOOL_CONFIG_HOSTID));
|
||||
if (nvlist_exists(mos_config, ZPOOL_CONFIG_HOSTNAME))
|
||||
fnvlist_add_string(spa->spa_load_info, ZPOOL_CONFIG_HOSTNAME,
|
||||
fnvlist_lookup_string(mos_config, ZPOOL_CONFIG_HOSTNAME));
|
||||
|
||||
/*
|
||||
* We will use spa_config if we decide to reload the spa or if spa_load
|
||||
* fails and we rewind. We must thus regenerate the config using the
|
||||
|
@ -4580,7 +4970,8 @@ spa_ld_read_checkpoint_txg(spa_t *spa)
|
|||
int error = 0;
|
||||
|
||||
ASSERT0(spa->spa_checkpoint_txg);
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_load_thread == curthread);
|
||||
|
||||
error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t),
|
||||
|
@ -4827,6 +5218,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
|
|||
boolean_t checkpoint_rewind =
|
||||
(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT);
|
||||
boolean_t update_config_cache = B_FALSE;
|
||||
hrtime_t load_start = gethrtime();
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE);
|
||||
|
@ -4871,12 +5263,19 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
|
|||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop the namespace lock for the rest of the function.
|
||||
*/
|
||||
spa->spa_load_thread = curthread;
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
|
||||
/*
|
||||
* Retrieve the checkpoint txg if the pool has a checkpoint.
|
||||
*/
|
||||
spa_import_progress_set_notes(spa, "Loading checkpoint txg");
|
||||
error = spa_ld_read_checkpoint_txg(spa);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Retrieve the mapping of indirect vdevs. Those vdevs were removed
|
||||
|
@ -4886,60 +5285,68 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
|
|||
* initiated. Otherwise we could be reading from indirect vdevs before
|
||||
* we have loaded their mappings.
|
||||
*/
|
||||
spa_import_progress_set_notes(spa, "Loading indirect vdev metadata");
|
||||
error = spa_ld_open_indirect_vdev_metadata(spa);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Retrieve the full list of active features from the MOS and check if
|
||||
* they are all supported.
|
||||
*/
|
||||
spa_import_progress_set_notes(spa, "Checking feature flags");
|
||||
error = spa_ld_check_features(spa, &missing_feat_write);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Load several special directories from the MOS needed by the dsl_pool
|
||||
* layer.
|
||||
*/
|
||||
spa_import_progress_set_notes(spa, "Loading special MOS directories");
|
||||
error = spa_ld_load_special_directories(spa);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Retrieve pool properties from the MOS.
|
||||
*/
|
||||
spa_import_progress_set_notes(spa, "Loading properties");
|
||||
error = spa_ld_get_props(spa);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Retrieve the list of auxiliary devices - cache devices and spares -
|
||||
* and open them.
|
||||
*/
|
||||
spa_import_progress_set_notes(spa, "Loading AUX vdevs");
|
||||
error = spa_ld_open_aux_vdevs(spa, type);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Load the metadata for all vdevs. Also check if unopenable devices
|
||||
* should be autoreplaced.
|
||||
*/
|
||||
spa_import_progress_set_notes(spa, "Loading vdev metadata");
|
||||
error = spa_ld_load_vdev_metadata(spa);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
spa_import_progress_set_notes(spa, "Loading dedup tables");
|
||||
error = spa_ld_load_dedup_tables(spa);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Verify the logs now to make sure we don't have any unexpected errors
|
||||
* when we claim log blocks later.
|
||||
*/
|
||||
spa_import_progress_set_notes(spa, "Verifying Log Devices");
|
||||
error = spa_ld_verify_logs(spa, type, ereport);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
if (missing_feat_write) {
|
||||
ASSERT(spa->spa_load_state == SPA_LOAD_TRYIMPORT);
|
||||
|
@ -4949,8 +5356,9 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
|
|||
* read-only mode but not read-write mode. We now have enough
|
||||
* information and can return to userland.
|
||||
*/
|
||||
return (spa_vdev_err(spa->spa_root_vdev, VDEV_AUX_UNSUP_FEAT,
|
||||
ENOTSUP));
|
||||
error = spa_vdev_err(spa->spa_root_vdev, VDEV_AUX_UNSUP_FEAT,
|
||||
ENOTSUP);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -4958,15 +5366,17 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
|
|||
* state. When performing an extreme rewind, we verify the whole pool,
|
||||
* which can take a very long time.
|
||||
*/
|
||||
spa_import_progress_set_notes(spa, "Verifying pool data");
|
||||
error = spa_ld_verify_pool_data(spa);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Calculate the deflated space for the pool. This must be done before
|
||||
* we write anything to the pool because we'd need to update the space
|
||||
* accounting using the deflated sizes.
|
||||
*/
|
||||
spa_import_progress_set_notes(spa, "Calculating deflated space");
|
||||
spa_update_dspace(spa);
|
||||
|
||||
/*
|
||||
|
@ -4974,6 +5384,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
|
|||
* pool. If we are importing the pool in read-write mode, a few
|
||||
* additional steps must be performed to finish the import.
|
||||
*/
|
||||
spa_import_progress_set_notes(spa, "Starting import");
|
||||
if (spa_writeable(spa) && (spa->spa_load_state == SPA_LOAD_RECOVER ||
|
||||
spa->spa_load_max_txg == UINT64_MAX)) {
|
||||
uint64_t config_cache_txg = spa->spa_config_txg;
|
||||
|
@ -4990,6 +5401,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
|
|||
(u_longlong_t)spa->spa_uberblock.ub_checkpoint_txg);
|
||||
}
|
||||
|
||||
spa_import_progress_set_notes(spa, "Claiming ZIL blocks");
|
||||
/*
|
||||
* Traverse the ZIL and claim all blocks.
|
||||
*/
|
||||
|
@ -5009,6 +5421,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
|
|||
* will have been set for us by ZIL traversal operations
|
||||
* performed above.
|
||||
*/
|
||||
spa_import_progress_set_notes(spa, "Syncing ZIL claims");
|
||||
txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg);
|
||||
|
||||
/*
|
||||
|
@ -5016,6 +5429,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
|
|||
* next sync, we would update the config stored in vdev labels
|
||||
* and the cachefile (by default /etc/zfs/zpool.cache).
|
||||
*/
|
||||
spa_import_progress_set_notes(spa, "Updating configs");
|
||||
spa_ld_check_for_config_update(spa, config_cache_txg,
|
||||
update_config_cache);
|
||||
|
||||
|
@ -5024,6 +5438,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
|
|||
* Then check all DTLs to see if anything needs resilvering.
|
||||
* The resilver will be deferred if a rebuild was started.
|
||||
*/
|
||||
spa_import_progress_set_notes(spa, "Starting resilvers");
|
||||
if (vdev_rebuild_active(spa->spa_root_vdev)) {
|
||||
vdev_rebuild_restart(spa);
|
||||
} else if (!dsl_scan_resilvering(spa->spa_dsl_pool) &&
|
||||
|
@ -5037,6 +5452,8 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
|
|||
*/
|
||||
spa_history_log_version(spa, "open", NULL);
|
||||
|
||||
spa_import_progress_set_notes(spa,
|
||||
"Restarting device removals");
|
||||
spa_restart_removal(spa);
|
||||
spa_spawn_aux_threads(spa);
|
||||
|
||||
|
@ -5049,27 +5466,40 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
|
|||
* auxiliary threads above (from which the livelist
|
||||
* deletion zthr is part of).
|
||||
*/
|
||||
spa_import_progress_set_notes(spa,
|
||||
"Cleaning up inconsistent objsets");
|
||||
(void) dmu_objset_find(spa_name(spa),
|
||||
dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN);
|
||||
|
||||
/*
|
||||
* Clean up any stale temporary dataset userrefs.
|
||||
*/
|
||||
spa_import_progress_set_notes(spa,
|
||||
"Cleaning up temporary userrefs");
|
||||
dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
|
||||
|
||||
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
|
||||
spa_import_progress_set_notes(spa, "Restarting initialize");
|
||||
vdev_initialize_restart(spa->spa_root_vdev);
|
||||
spa_import_progress_set_notes(spa, "Restarting TRIM");
|
||||
vdev_trim_restart(spa->spa_root_vdev);
|
||||
vdev_autotrim_restart(spa);
|
||||
spa_config_exit(spa, SCL_CONFIG, FTAG);
|
||||
spa_import_progress_set_notes(spa, "Finished importing");
|
||||
}
|
||||
zio_handle_import_delay(spa, gethrtime() - load_start);
|
||||
|
||||
spa_import_progress_remove(spa_guid(spa));
|
||||
spa_async_request(spa, SPA_ASYNC_L2CACHE_REBUILD);
|
||||
|
||||
spa_load_note(spa, "LOADED");
|
||||
fail:
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
spa->spa_load_thread = NULL;
|
||||
cv_broadcast(&spa_namespace_cv);
|
||||
|
||||
return (error);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -6337,9 +6767,14 @@ spa_tryimport(nvlist_t *tryconfig)
|
|||
/*
|
||||
* Create and initialize the spa structure.
|
||||
*/
|
||||
char *name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
|
||||
(void) snprintf(name, MAXPATHLEN, "%s-%llx-%s",
|
||||
TRYIMPORT_NAME, (u_longlong_t)curthread, poolname);
|
||||
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL);
|
||||
spa = spa_add(name, tryconfig, NULL);
|
||||
spa_activate(spa, SPA_MODE_READ);
|
||||
kmem_free(name, MAXPATHLEN);
|
||||
|
||||
/*
|
||||
* Rewind pool if a max txg was provided.
|
||||
|
@ -6476,9 +6911,10 @@ static int
|
|||
spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
|
||||
boolean_t force, boolean_t hardforce)
|
||||
{
|
||||
int error;
|
||||
int error = 0;
|
||||
spa_t *spa;
|
||||
boolean_t force_removal, modifying;
|
||||
hrtime_t export_start = gethrtime();
|
||||
|
||||
if (oldconfig)
|
||||
*oldconfig = NULL;
|
||||
|
@ -6509,8 +6945,8 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
|
|||
new_state == POOL_STATE_EXPORTED);
|
||||
|
||||
/*
|
||||
* Put a hold on the pool, drop the namespace lock, stop async tasks,
|
||||
* reacquire the namespace lock, and see if we can export.
|
||||
* Put a hold on the pool, drop the namespace lock, stop async tasks
|
||||
* and see if we can export.
|
||||
*/
|
||||
spa_open_ref(spa, FTAG);
|
||||
|
||||
|
@ -6547,10 +6983,13 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
|
|||
taskq_wait(spa->spa_zvol_taskq);
|
||||
}
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
spa->spa_export_thread = curthread;
|
||||
spa_close(spa, FTAG);
|
||||
|
||||
if (spa->spa_state == POOL_STATE_UNINITIALIZED)
|
||||
if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
goto export_spa;
|
||||
}
|
||||
|
||||
/*
|
||||
* The pool will be in core if it's openable, in which case we can
|
||||
|
@ -6594,6 +7033,14 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
|
|||
goto fail;
|
||||
}
|
||||
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
/*
|
||||
* At this point we no longer hold the spa_namespace_lock and
|
||||
* there were no references on the spa. Future spa_lookups will
|
||||
* notice the spa->spa_export_thread and wait until we signal
|
||||
* that we are finshed.
|
||||
*/
|
||||
|
||||
if (spa->spa_sync_on) {
|
||||
/*
|
||||
* A pool cannot be exported if it has an active shared spare.
|
||||
|
@ -6604,7 +7051,7 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
|
|||
if (!force && new_state == POOL_STATE_EXPORTED &&
|
||||
spa_has_active_shared_spare(spa)) {
|
||||
error = SET_ERROR(EXDEV);
|
||||
goto fail;
|
||||
goto fail_unlocked;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -6670,13 +7117,20 @@ export_spa:
|
|||
error = spa_unload(spa, hardforce ?
|
||||
TXG_WAIT_F_FORCE_EXPORT : TXG_WAIT_F_NOSUSPEND);
|
||||
if (error != 0)
|
||||
goto fail;
|
||||
goto fail_unlocked;
|
||||
spa_deactivate(spa);
|
||||
}
|
||||
|
||||
if (oldconfig && spa->spa_config)
|
||||
VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0);
|
||||
|
||||
if (new_state == POOL_STATE_EXPORTED)
|
||||
zio_handle_export_delay(spa, gethrtime() - export_start);
|
||||
|
||||
/*
|
||||
* Take the namewspace lock for the actual spa_t removal
|
||||
*/
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
if (new_state != POOL_STATE_UNINITIALIZED) {
|
||||
if (!force_removal)
|
||||
spa_write_cachefile(spa, B_TRUE, B_TRUE);
|
||||
|
@ -6688,16 +7142,29 @@ export_spa:
|
|||
* we make sure to reset the exporting flag.
|
||||
*/
|
||||
spa->spa_is_exporting = B_FALSE;
|
||||
spa->spa_export_thread = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wake up any waiters in spa_lookup()
|
||||
*/
|
||||
cv_broadcast(&spa_namespace_cv);
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
return (0);
|
||||
|
||||
fail_unlocked:
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
fail:
|
||||
if (force_removal)
|
||||
spa_set_export_initiator(spa, NULL);
|
||||
spa->spa_is_exporting = B_FALSE;
|
||||
spa->spa_export_thread = NULL;
|
||||
|
||||
spa_async_resume(spa);
|
||||
/*
|
||||
* Wake up any waiters in spa_lookup()
|
||||
*/
|
||||
cv_broadcast(&spa_namespace_cv);
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
return (error);
|
||||
}
|
||||
|
@ -8311,15 +8778,16 @@ spa_async_remove(spa_t *spa, vdev_t *vd)
|
|||
}
|
||||
|
||||
static void
|
||||
spa_async_probe(spa_t *spa, vdev_t *vd)
|
||||
spa_async_fault_vdev(spa_t *spa, vdev_t *vd)
|
||||
{
|
||||
if (vd->vdev_probe_wanted) {
|
||||
vd->vdev_probe_wanted = B_FALSE;
|
||||
vdev_reopen(vd); /* vdev_open() does the actual probe */
|
||||
if (vd->vdev_fault_wanted) {
|
||||
vd->vdev_fault_wanted = B_FALSE;
|
||||
vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED,
|
||||
VDEV_AUX_ERR_EXCEEDED);
|
||||
}
|
||||
|
||||
for (int c = 0; c < vd->vdev_children; c++)
|
||||
spa_async_probe(spa, vd->vdev_child[c]);
|
||||
spa_async_fault_vdev(spa, vd->vdev_child[c]);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -8408,11 +8876,11 @@ spa_async_thread(void *arg)
|
|||
}
|
||||
|
||||
/*
|
||||
* See if any devices need to be probed.
|
||||
* See if any devices need to be marked faulted.
|
||||
*/
|
||||
if (tasks & SPA_ASYNC_PROBE) {
|
||||
if (tasks & SPA_ASYNC_FAULT_VDEV) {
|
||||
spa_vdev_state_enter(spa, SCL_NONE);
|
||||
spa_async_probe(spa, spa->spa_root_vdev);
|
||||
spa_async_fault_vdev(spa, spa->spa_root_vdev);
|
||||
(void) spa_vdev_state_exit(spa, NULL, 0);
|
||||
}
|
||||
|
||||
|
@ -10199,6 +10667,9 @@ ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_batch_pct, UINT, ZMOD_RD,
|
|||
ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_batch_tpq, UINT, ZMOD_RD,
|
||||
"Number of threads per IO worker taskqueue");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_trylock, UINT, ZMOD_RD,
|
||||
"Try to dispatch IO to an unlocked IO taskqueue before sleeping");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds, ULONG, ZMOD_RW,
|
||||
"Allow importing pool with up to this number of missing top-level "
|
||||
"vdevs (in read-only mode)");
|
||||
|
@ -10218,4 +10689,13 @@ ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, zthr_cancel, INT
|
|||
ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, new_alloc, INT, ZMOD_RW,
|
||||
"Whether extra ALLOC blkptrs were added to a livelist entry while it "
|
||||
"was being condensed");
|
||||
|
||||
#ifdef _KERNEL
|
||||
ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_read,
|
||||
spa_taskq_read_param_set, spa_taskq_read_param_get, ZMOD_RD,
|
||||
"Configure IO queues for read IO");
|
||||
ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_write,
|
||||
spa_taskq_write_param_set, spa_taskq_write_param_get, ZMOD_RD,
|
||||
"Configure IO queues for write IO");
|
||||
#endif
|
||||
/* END CSTYLED */
|
||||
|
|
|
@ -1155,6 +1155,7 @@ spa_ld_log_sm_data(spa_t *spa)
|
|||
|
||||
uint_t pn = 0;
|
||||
uint64_t ps = 0;
|
||||
uint64_t nsm = 0;
|
||||
psls = sls = avl_first(&spa->spa_sm_logs_by_txg);
|
||||
while (sls != NULL) {
|
||||
/* Prefetch log spacemaps up to 16 TXGs or MBs ahead. */
|
||||
|
@ -1187,6 +1188,10 @@ spa_ld_log_sm_data(spa_t *spa)
|
|||
summary_add_data(spa, sls->sls_txg,
|
||||
sls->sls_mscount, 0, sls->sls_nblocks);
|
||||
|
||||
spa_import_progress_set_notes_nolog(spa,
|
||||
"Read %llu of %lu log space maps", (u_longlong_t)nsm,
|
||||
avl_numnodes(&spa->spa_sm_logs_by_txg));
|
||||
|
||||
struct spa_ld_log_sm_arg vla = {
|
||||
.slls_spa = spa,
|
||||
.slls_txg = sls->sls_txg
|
||||
|
@ -1202,6 +1207,7 @@ spa_ld_log_sm_data(spa_t *spa)
|
|||
|
||||
pn--;
|
||||
ps -= space_map_length(sls->sls_sm);
|
||||
nsm++;
|
||||
space_map_close(sls->sls_sm);
|
||||
sls->sls_sm = NULL;
|
||||
sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls);
|
||||
|
@ -1212,11 +1218,11 @@ spa_ld_log_sm_data(spa_t *spa)
|
|||
|
||||
hrtime_t read_logs_endtime = gethrtime();
|
||||
spa_load_note(spa,
|
||||
"read %llu log space maps (%llu total blocks - blksz = %llu bytes) "
|
||||
"in %lld ms", (u_longlong_t)avl_numnodes(&spa->spa_sm_logs_by_txg),
|
||||
"Read %lu log space maps (%llu total blocks - blksz = %llu bytes) "
|
||||
"in %lld ms", avl_numnodes(&spa->spa_sm_logs_by_txg),
|
||||
(u_longlong_t)spa_log_sm_nblocks(spa),
|
||||
(u_longlong_t)zfs_log_sm_blksz,
|
||||
(longlong_t)((read_logs_endtime - read_logs_starttime) / 1000000));
|
||||
(longlong_t)NSEC2MSEC(read_logs_endtime - read_logs_starttime));
|
||||
|
||||
out:
|
||||
if (error != 0) {
|
||||
|
|
|
@ -20,13 +20,14 @@
|
|||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, 2019 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011, 2024 by Delphix. All rights reserved.
|
||||
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
* Copyright 2013 Saso Kiselkov. All rights reserved.
|
||||
* Copyright (c) 2017 Datto Inc.
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
* Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
|
||||
* Copyright (c) 2023, 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
@ -79,7 +80,8 @@
|
|||
* - Check if spa_refcount is zero
|
||||
* - Rename a spa_t
|
||||
* - add/remove/attach/detach devices
|
||||
* - Held for the duration of create/destroy/import/export
|
||||
* - Held for the duration of create/destroy
|
||||
* - Held at the start and end of import and export
|
||||
*
|
||||
* It does not need to handle recursion. A create or destroy may
|
||||
* reference objects (files or zvols) in other pools, but by
|
||||
|
@ -232,9 +234,9 @@
|
|||
* locking is, always, based on spa_namespace_lock and spa_config_lock[].
|
||||
*/
|
||||
|
||||
static avl_tree_t spa_namespace_avl;
|
||||
avl_tree_t spa_namespace_avl;
|
||||
kmutex_t spa_namespace_lock;
|
||||
static kcondvar_t spa_namespace_cv;
|
||||
kcondvar_t spa_namespace_cv;
|
||||
int spa_max_replication_override = SPA_DVAS_PER_BP;
|
||||
|
||||
static kmutex_t spa_spare_lock;
|
||||
|
@ -417,6 +419,8 @@ spa_load_note(spa_t *spa, const char *fmt, ...)
|
|||
|
||||
zfs_dbgmsg("spa_load(%s, config %s): %s", spa->spa_name,
|
||||
spa->spa_trust_config ? "trusted" : "untrusted", buf);
|
||||
|
||||
spa_import_progress_set_notes_nolog(spa, "%s", buf);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -604,6 +608,7 @@ spa_lookup(const char *name)
|
|||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
|
||||
retry:
|
||||
(void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
|
||||
|
||||
/*
|
||||
|
@ -615,6 +620,20 @@ spa_lookup(const char *name)
|
|||
*cp = '\0';
|
||||
|
||||
spa = avl_find(&spa_namespace_avl, &search, &where);
|
||||
if (spa == NULL)
|
||||
return (NULL);
|
||||
|
||||
/*
|
||||
* Avoid racing with import/export, which don't hold the namespace
|
||||
* lock for their entire duration.
|
||||
*/
|
||||
if ((spa->spa_load_thread != NULL &&
|
||||
spa->spa_load_thread != curthread) ||
|
||||
(spa->spa_export_thread != NULL &&
|
||||
spa->spa_export_thread != curthread)) {
|
||||
cv_wait(&spa_namespace_cv, &spa_namespace_lock);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
return (spa);
|
||||
}
|
||||
|
@ -712,6 +731,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
|
|||
spa_config_lock_init(spa);
|
||||
spa_stats_init(spa);
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
avl_add(&spa_namespace_avl, spa);
|
||||
|
||||
/*
|
||||
|
@ -806,7 +826,6 @@ spa_remove(spa_t *spa)
|
|||
nvlist_free(spa->spa_config_splitting);
|
||||
|
||||
avl_remove(&spa_namespace_avl, spa);
|
||||
cv_broadcast(&spa_namespace_cv);
|
||||
|
||||
if (spa->spa_root)
|
||||
spa_strfree(spa->spa_root);
|
||||
|
@ -901,7 +920,8 @@ void
|
|||
spa_open_ref(spa_t *spa, void *tag)
|
||||
{
|
||||
ASSERT(zfs_refcount_count(&spa->spa_refcount) >= spa->spa_minref ||
|
||||
MUTEX_HELD(&spa_namespace_lock));
|
||||
MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_load_thread == curthread);
|
||||
(void) zfs_refcount_add(&spa->spa_refcount, tag);
|
||||
}
|
||||
|
||||
|
@ -921,13 +941,15 @@ spa_close_common(spa_t *spa, const void *tag)
|
|||
|
||||
/*
|
||||
* Remove a reference to the given spa_t. Must have at least one reference, or
|
||||
* have the namespace lock held.
|
||||
* have the namespace lock held or be part of a pool import/export.
|
||||
*/
|
||||
void
|
||||
spa_close(spa_t *spa, void *tag)
|
||||
{
|
||||
ASSERT(zfs_refcount_count(&spa->spa_refcount) > spa->spa_minref ||
|
||||
MUTEX_HELD(&spa_namespace_lock));
|
||||
MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_load_thread == curthread ||
|
||||
spa->spa_export_thread == curthread);
|
||||
spa_close_common(spa, tag);
|
||||
}
|
||||
|
||||
|
@ -947,13 +969,15 @@ spa_async_close(spa_t *spa, void *tag)
|
|||
|
||||
/*
|
||||
* Check to see if the spa refcount is zero. Must be called with
|
||||
* spa_namespace_lock held. We really compare against spa_minref, which is the
|
||||
* number of references acquired when opening a pool
|
||||
* spa_namespace_lock held or be the spa export thread. We really
|
||||
* compare against spa_minref, which is the number of references
|
||||
* acquired when opening a pool
|
||||
*/
|
||||
boolean_t
|
||||
spa_refcount_zero(spa_t *spa)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_export_thread == curthread);
|
||||
|
||||
return (zfs_refcount_count(&spa->spa_refcount) == spa->spa_minref);
|
||||
}
|
||||
|
@ -1201,6 +1225,8 @@ spa_vdev_enter(spa_t *spa)
|
|||
mutex_enter(&spa->spa_vdev_top_lock);
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
|
||||
ASSERT0(spa->spa_export_thread);
|
||||
|
||||
vdev_autotrim_stop_all(spa);
|
||||
|
||||
return (spa_vdev_config_enter(spa));
|
||||
|
@ -1218,6 +1244,8 @@ spa_vdev_detach_enter(spa_t *spa, uint64_t guid)
|
|||
mutex_enter(&spa->spa_vdev_top_lock);
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
|
||||
ASSERT0(spa->spa_export_thread);
|
||||
|
||||
vdev_autotrim_stop_all(spa);
|
||||
|
||||
if (guid != 0) {
|
||||
|
@ -2215,6 +2243,7 @@ typedef struct spa_import_progress {
|
|||
uint64_t pool_guid; /* unique id for updates */
|
||||
char *pool_name;
|
||||
spa_load_state_t spa_load_state;
|
||||
char *spa_load_notes;
|
||||
uint64_t mmp_sec_remaining; /* MMP activity check */
|
||||
uint64_t spa_load_max_txg; /* rewind txg */
|
||||
procfs_list_node_t smh_node;
|
||||
|
@ -2225,9 +2254,9 @@ spa_history_list_t *spa_import_progress_list = NULL;
|
|||
static int
|
||||
spa_import_progress_show_header(struct seq_file *f)
|
||||
{
|
||||
seq_printf(f, "%-20s %-14s %-14s %-12s %s\n", "pool_guid",
|
||||
seq_printf(f, "%-20s %-14s %-14s %-12s %-16s %s\n", "pool_guid",
|
||||
"load_state", "multihost_secs", "max_txg",
|
||||
"pool_name");
|
||||
"pool_name", "notes");
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
@ -2236,11 +2265,12 @@ spa_import_progress_show(struct seq_file *f, void *data)
|
|||
{
|
||||
spa_import_progress_t *sip = (spa_import_progress_t *)data;
|
||||
|
||||
seq_printf(f, "%-20llu %-14llu %-14llu %-12llu %s\n",
|
||||
seq_printf(f, "%-20llu %-14llu %-14llu %-12llu %-16s %s\n",
|
||||
(u_longlong_t)sip->pool_guid, (u_longlong_t)sip->spa_load_state,
|
||||
(u_longlong_t)sip->mmp_sec_remaining,
|
||||
(u_longlong_t)sip->spa_load_max_txg,
|
||||
(sip->pool_name ? sip->pool_name : "-"));
|
||||
(sip->pool_name ? sip->pool_name : "-"),
|
||||
(sip->spa_load_notes ? sip->spa_load_notes : "-"));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
@ -2254,6 +2284,8 @@ spa_import_progress_truncate(spa_history_list_t *shl, unsigned int size)
|
|||
sip = list_remove_head(&shl->procfs_list.pl_list);
|
||||
if (sip->pool_name)
|
||||
spa_strfree(sip->pool_name);
|
||||
if (sip->spa_load_notes)
|
||||
kmem_strfree(sip->spa_load_notes);
|
||||
kmem_free(sip, sizeof (spa_import_progress_t));
|
||||
shl->size--;
|
||||
}
|
||||
|
@ -2309,6 +2341,10 @@ spa_import_progress_set_state(uint64_t pool_guid,
|
|||
sip = list_prev(&shl->procfs_list.pl_list, sip)) {
|
||||
if (sip->pool_guid == pool_guid) {
|
||||
sip->spa_load_state = load_state;
|
||||
if (sip->spa_load_notes != NULL) {
|
||||
kmem_strfree(sip->spa_load_notes);
|
||||
sip->spa_load_notes = NULL;
|
||||
}
|
||||
error = 0;
|
||||
break;
|
||||
}
|
||||
|
@ -2318,6 +2354,59 @@ spa_import_progress_set_state(uint64_t pool_guid,
|
|||
return (error);
|
||||
}
|
||||
|
||||
static void
|
||||
spa_import_progress_set_notes_impl(spa_t *spa, boolean_t log_dbgmsg,
|
||||
const char *fmt, va_list adx)
|
||||
{
|
||||
spa_history_list_t *shl = spa_import_progress_list;
|
||||
spa_import_progress_t *sip;
|
||||
uint64_t pool_guid = spa_guid(spa);
|
||||
|
||||
if (shl->size == 0)
|
||||
return;
|
||||
|
||||
char *notes = kmem_vasprintf(fmt, adx);
|
||||
|
||||
mutex_enter(&shl->procfs_list.pl_lock);
|
||||
for (sip = list_tail(&shl->procfs_list.pl_list); sip != NULL;
|
||||
sip = list_prev(&shl->procfs_list.pl_list, sip)) {
|
||||
if (sip->pool_guid == pool_guid) {
|
||||
if (sip->spa_load_notes != NULL) {
|
||||
kmem_strfree(sip->spa_load_notes);
|
||||
sip->spa_load_notes = NULL;
|
||||
}
|
||||
sip->spa_load_notes = notes;
|
||||
if (log_dbgmsg)
|
||||
zfs_dbgmsg("'%s' %s", sip->pool_name, notes);
|
||||
notes = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_exit(&shl->procfs_list.pl_lock);
|
||||
if (notes != NULL)
|
||||
kmem_strfree(notes);
|
||||
}
|
||||
|
||||
void
|
||||
spa_import_progress_set_notes(spa_t *spa, const char *fmt, ...)
|
||||
{
|
||||
va_list adx;
|
||||
|
||||
va_start(adx, fmt);
|
||||
spa_import_progress_set_notes_impl(spa, B_TRUE, fmt, adx);
|
||||
va_end(adx);
|
||||
}
|
||||
|
||||
void
|
||||
spa_import_progress_set_notes_nolog(spa_t *spa, const char *fmt, ...)
|
||||
{
|
||||
va_list adx;
|
||||
|
||||
va_start(adx, fmt);
|
||||
spa_import_progress_set_notes_impl(spa, B_FALSE, fmt, adx);
|
||||
va_end(adx);
|
||||
}
|
||||
|
||||
int
|
||||
spa_import_progress_set_max_txg(uint64_t pool_guid, uint64_t load_max_txg)
|
||||
{
|
||||
|
@ -2386,6 +2475,7 @@ spa_import_progress_add(spa_t *spa)
|
|||
poolname = spa_name(spa);
|
||||
sip->pool_name = spa_strdup(poolname);
|
||||
sip->spa_load_state = spa_load_state(spa);
|
||||
sip->spa_load_notes = NULL;
|
||||
|
||||
mutex_enter(&shl->procfs_list.pl_lock);
|
||||
procfs_list_add(&shl->procfs_list, sip);
|
||||
|
@ -2405,6 +2495,8 @@ spa_import_progress_remove(uint64_t pool_guid)
|
|||
if (sip->pool_guid == pool_guid) {
|
||||
if (sip->pool_name)
|
||||
spa_strfree(sip->pool_name);
|
||||
if (sip->spa_load_notes)
|
||||
spa_strfree(sip->spa_load_notes);
|
||||
list_remove(&shl->procfs_list.pl_list, sip);
|
||||
shl->size--;
|
||||
kmem_free(sip, sizeof (spa_import_progress_t));
|
||||
|
@ -2801,8 +2893,7 @@ spa_state_to_name(spa_t *spa)
|
|||
vdev_state_t state = rvd->vdev_state;
|
||||
vdev_aux_t aux = rvd->vdev_stat.vs_aux;
|
||||
|
||||
if (spa_suspended(spa) &&
|
||||
(spa_get_failmode(spa) != ZIO_FAILURE_MODE_CONTINUE))
|
||||
if (spa_suspended(spa))
|
||||
return ("SUSPENDED");
|
||||
|
||||
switch (state) {
|
||||
|
|
|
@ -585,6 +585,15 @@ txg_sync_thread(void *arg)
|
|||
timer = (delta > timeout ? 0 : timeout - delta);
|
||||
}
|
||||
|
||||
/*
|
||||
* When we're suspended, nothing should be changing and for
|
||||
* MMP we don't want to bump anything that would make it
|
||||
* harder to detect if another host is changing it when
|
||||
* resuming after a MMP suspend.
|
||||
*/
|
||||
if (spa_suspended(spa))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Wait until the quiesce thread hands off a txg to us,
|
||||
* prompting it to do so if necessary.
|
||||
|
|
|
@ -1584,6 +1584,7 @@ vdev_metaslab_fini(vdev_t *vd)
|
|||
typedef struct vdev_probe_stats {
|
||||
boolean_t vps_readable;
|
||||
boolean_t vps_writeable;
|
||||
boolean_t vps_zio_done_probe;
|
||||
int vps_flags;
|
||||
} vdev_probe_stats_t;
|
||||
|
||||
|
@ -1627,6 +1628,17 @@ vdev_probe_done(zio_t *zio)
|
|||
(void) zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE,
|
||||
spa, vd, NULL, NULL, 0);
|
||||
zio->io_error = SET_ERROR(ENXIO);
|
||||
|
||||
/*
|
||||
* If this probe was initiated from zio pipeline, then
|
||||
* change the state in a spa_async_request. Probes that
|
||||
* were initiated from a vdev_open can change the state
|
||||
* as part of the open call.
|
||||
*/
|
||||
if (vps->vps_zio_done_probe) {
|
||||
vd->vdev_fault_wanted = B_TRUE;
|
||||
spa_async_request(spa, SPA_ASYNC_FAULT_VDEV);
|
||||
}
|
||||
}
|
||||
|
||||
mutex_enter(&vd->vdev_probe_lock);
|
||||
|
@ -1678,6 +1690,7 @@ vdev_probe(vdev_t *vd, zio_t *zio)
|
|||
vps->vps_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_PROBE |
|
||||
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE |
|
||||
ZIO_FLAG_TRYHARD;
|
||||
vps->vps_zio_done_probe = (zio != NULL);
|
||||
|
||||
if (spa_config_held(spa, SCL_ZIO, RW_WRITER)) {
|
||||
/*
|
||||
|
@ -1704,15 +1717,6 @@ vdev_probe(vdev_t *vd, zio_t *zio)
|
|||
vd->vdev_probe_zio = pio = zio_null(NULL, spa, vd,
|
||||
vdev_probe_done, vps,
|
||||
vps->vps_flags | ZIO_FLAG_DONT_PROPAGATE);
|
||||
|
||||
/*
|
||||
* We can't change the vdev state in this context, so we
|
||||
* kick off an async task to do it on our behalf.
|
||||
*/
|
||||
if (zio != NULL) {
|
||||
vd->vdev_probe_wanted = B_TRUE;
|
||||
spa_async_request(spa, SPA_ASYNC_PROBE);
|
||||
}
|
||||
}
|
||||
|
||||
if (zio != NULL)
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2016, 2019 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2016, 2024 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/spa.h>
|
||||
|
@ -636,7 +636,8 @@ vdev_initialize_stop_wait(spa_t *spa, list_t *vd_list)
|
|||
(void) spa;
|
||||
vdev_t *vd;
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_export_thread == curthread);
|
||||
|
||||
while ((vd = list_remove_head(vd_list)) != NULL) {
|
||||
mutex_enter(&vd->vdev_initialize_lock);
|
||||
|
@ -678,7 +679,8 @@ vdev_initialize_stop(vdev_t *vd, vdev_initializing_state_t tgt_state,
|
|||
if (vd_list == NULL) {
|
||||
vdev_initialize_stop_wait_impl(vd);
|
||||
} else {
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
vd->vdev_spa->spa_export_thread == curthread);
|
||||
list_insert_tail(vd_list, vd);
|
||||
}
|
||||
}
|
||||
|
@ -710,7 +712,8 @@ vdev_initialize_stop_all(vdev_t *vd, vdev_initializing_state_t tgt_state)
|
|||
spa_t *spa = vd->vdev_spa;
|
||||
list_t vd_list;
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_export_thread == curthread);
|
||||
|
||||
list_create(&vd_list, sizeof (vdev_t),
|
||||
offsetof(vdev_t, vdev_initialize_node));
|
||||
|
@ -729,7 +732,8 @@ vdev_initialize_stop_all(vdev_t *vd, vdev_initializing_state_t tgt_state)
|
|||
void
|
||||
vdev_initialize_restart(vdev_t *vd)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
vd->vdev_spa->spa_load_thread == curthread);
|
||||
ASSERT(!spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER));
|
||||
|
||||
if (vd->vdev_leaf_zap != 0) {
|
||||
|
|
|
@ -1894,6 +1894,7 @@ retry:
|
|||
/*
|
||||
* If this isn't a resync due to I/O errors,
|
||||
* and nothing changed in this transaction group,
|
||||
* and multihost protection isn't enabled,
|
||||
* and the vdev configuration hasn't changed,
|
||||
* then there's nothing to do.
|
||||
*/
|
||||
|
@ -1901,7 +1902,8 @@ retry:
|
|||
boolean_t changed = uberblock_update(ub, spa->spa_root_vdev,
|
||||
txg, spa->spa_mmp.mmp_delay);
|
||||
|
||||
if (!changed && list_is_empty(&spa->spa_config_dirty_list))
|
||||
if (!changed && list_is_empty(&spa->spa_config_dirty_list) &&
|
||||
!spa_multihost(spa))
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
*
|
||||
* Copyright (c) 2018, Intel Corporation.
|
||||
* Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
|
||||
* Copyright (c) 2024 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/vdev_impl.h>
|
||||
|
@ -1067,7 +1068,8 @@ vdev_rebuild_restart_impl(vdev_t *vd)
|
|||
void
|
||||
vdev_rebuild_restart(spa_t *spa)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_load_thread == curthread);
|
||||
|
||||
vdev_rebuild_restart_impl(spa->spa_root_vdev);
|
||||
}
|
||||
|
@ -1081,7 +1083,8 @@ vdev_rebuild_stop_wait(vdev_t *vd)
|
|||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_export_thread == curthread);
|
||||
|
||||
if (vd == spa->spa_root_vdev) {
|
||||
for (uint64_t i = 0; i < vd->vdev_children; i++)
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2016 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2016, 2024 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2019 by Lawrence Livermore National Security, LLC.
|
||||
* Copyright (c) 2021 Hewlett Packard Enterprise Development LP
|
||||
*/
|
||||
|
@ -1021,7 +1021,8 @@ vdev_trim_stop_wait(spa_t *spa, list_t *vd_list)
|
|||
(void) spa;
|
||||
vdev_t *vd;
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_export_thread == curthread);
|
||||
|
||||
while ((vd = list_remove_head(vd_list)) != NULL) {
|
||||
mutex_enter(&vd->vdev_trim_lock);
|
||||
|
@ -1060,7 +1061,8 @@ vdev_trim_stop(vdev_t *vd, vdev_trim_state_t tgt_state, list_t *vd_list)
|
|||
if (vd_list == NULL) {
|
||||
vdev_trim_stop_wait_impl(vd);
|
||||
} else {
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
vd->vdev_spa->spa_export_thread == curthread);
|
||||
list_insert_tail(vd_list, vd);
|
||||
}
|
||||
}
|
||||
|
@ -1096,7 +1098,8 @@ vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state)
|
|||
list_t vd_list;
|
||||
vdev_t *vd_l2cache;
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_export_thread == curthread);
|
||||
|
||||
list_create(&vd_list, sizeof (vdev_t),
|
||||
offsetof(vdev_t, vdev_trim_node));
|
||||
|
@ -1129,7 +1132,8 @@ vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state)
|
|||
void
|
||||
vdev_trim_restart(vdev_t *vd)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
vd->vdev_spa->spa_load_thread == curthread);
|
||||
ASSERT(!spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER));
|
||||
|
||||
if (vd->vdev_leaf_zap != 0) {
|
||||
|
@ -1523,8 +1527,8 @@ vdev_autotrim_stop_all(spa_t *spa)
|
|||
void
|
||||
vdev_autotrim_restart(spa_t *spa)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_load_thread == curthread);
|
||||
if (spa->spa_autotrim)
|
||||
vdev_autotrim(spa);
|
||||
}
|
||||
|
|
|
@ -241,6 +241,11 @@ unsigned long zfs_max_nvlist_src_size = 0;
|
|||
*/
|
||||
unsigned long zfs_history_output_max = 1024 * 1024;
|
||||
|
||||
/*
|
||||
* Whether or not to allow compression=slack to be set on a dataset.
|
||||
*/
|
||||
int zfs_slack_compress_enabled = 0;
|
||||
|
||||
uint_t zfs_fsyncer_key;
|
||||
uint_t zfs_allow_log_key;
|
||||
|
||||
|
@ -4573,6 +4578,9 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
|
|||
if (compval == ZIO_COMPRESS_SLACK) {
|
||||
spa_t *spa;
|
||||
|
||||
if (!zfs_slack_compress_enabled)
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
|
||||
if ((err = spa_open(dsname, &spa, FTAG)) != 0)
|
||||
return (err);
|
||||
|
||||
|
@ -5715,10 +5723,13 @@ zfs_ioc_clear(zfs_cmd_t *zc)
|
|||
|
||||
/*
|
||||
* If multihost is enabled, resuming I/O is unsafe as another
|
||||
* host may have imported the pool.
|
||||
* host may have imported the pool. Check for remote activity.
|
||||
*/
|
||||
if (spa_multihost(spa) && spa_suspended(spa))
|
||||
return (SET_ERROR(EINVAL));
|
||||
if (spa_multihost(spa) && spa_suspended(spa) &&
|
||||
spa_mmp_remote_host_activity(spa)) {
|
||||
spa_close(spa, FTAG);
|
||||
return (SET_ERROR(EREMOTEIO));
|
||||
}
|
||||
|
||||
spa_vdev_state_enter(spa, SCL_NONE);
|
||||
|
||||
|
@ -7770,4 +7781,7 @@ ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, ULONG, ZMOD_RW,
|
|||
|
||||
ZFS_MODULE_PARAM(zfs, zfs_, history_output_max, ULONG, ZMOD_RW,
|
||||
"Maximum size in bytes of ZFS ioctl output that will be logged");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs, zfs_, slack_compress_enabled, INT, ZMOD_RW,
|
||||
"Allow slack compression feature to be set on a dataset");
|
||||
/* END CSTYLED */
|
||||
|
|
|
@ -3607,7 +3607,7 @@ zil_commit(zilog_t *zilog, uint64_t foid)
|
|||
int
|
||||
zil_commit_impl(zilog_t *zilog, uint64_t foid)
|
||||
{
|
||||
ASSERT0(zil_failed(zilog) || zilog->zl_suspend > 0);
|
||||
ASSERT0(zil_failed(zilog));
|
||||
|
||||
ZIL_STAT_BUMP(zil_commit_count);
|
||||
|
||||
|
|
|
@ -2535,8 +2535,11 @@ zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason)
|
|||
"is set to panic.", spa_name(spa));
|
||||
|
||||
if (!spa_suspended(spa)) {
|
||||
cmn_err(CE_WARN, "Pool '%s' has encountered an uncorrectable "
|
||||
"I/O failure and has been suspended.\n", spa_name(spa));
|
||||
if (reason != ZIO_SUSPEND_MMP) {
|
||||
cmn_err(CE_WARN, "Pool '%s' has encountered an "
|
||||
"uncorrectable I/O failure and has been "
|
||||
"suspended.\n", spa_name(spa));
|
||||
}
|
||||
|
||||
(void) zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL,
|
||||
NULL, NULL, 0);
|
||||
|
|
|
@ -68,7 +68,7 @@ zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = {
|
|||
{"lz4", 0, lz4_compress_zfs, lz4_decompress_zfs, NULL},
|
||||
{"zstd", ZIO_ZSTD_LEVEL_DEFAULT, zfs_zstd_compress,
|
||||
zfs_zstd_decompress, zfs_zstd_decompress_level},
|
||||
{"slack", 0, slack_compress, NULL, NULL },
|
||||
{"slack", 0, slack_compress, slack_decompress, NULL },
|
||||
};
|
||||
|
||||
uint8_t
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
* Copyright (c) 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
|
@ -59,6 +60,7 @@ uint32_t zio_injection_enabled = 0;
|
|||
typedef struct inject_handler {
|
||||
int zi_id;
|
||||
spa_t *zi_spa;
|
||||
char *zi_spa_name; /* ZINJECT_DELAY_IMPORT only */
|
||||
zinject_record_t zi_record;
|
||||
uint64_t *zi_lanes;
|
||||
int zi_next_lane;
|
||||
|
@ -699,6 +701,63 @@ zio_handle_io_delay(zio_t *zio)
|
|||
return (min_target);
|
||||
}
|
||||
|
||||
static void
|
||||
zio_handle_pool_delay(spa_t *spa, hrtime_t elapsed, zinject_type_t command)
|
||||
{
|
||||
inject_handler_t *handler;
|
||||
hrtime_t delay = 0;
|
||||
int id = 0;
|
||||
|
||||
rw_enter(&inject_lock, RW_READER);
|
||||
|
||||
for (handler = list_head(&inject_handlers);
|
||||
handler != NULL && handler->zi_record.zi_cmd == command;
|
||||
handler = list_next(&inject_handlers, handler)) {
|
||||
ASSERT3P(handler->zi_spa_name, !=, NULL);
|
||||
if (strcmp(spa_name(spa), handler->zi_spa_name) == 0) {
|
||||
uint64_t pause =
|
||||
SEC2NSEC(handler->zi_record.zi_duration);
|
||||
if (pause > elapsed) {
|
||||
delay = pause - elapsed;
|
||||
}
|
||||
id = handler->zi_id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
rw_exit(&inject_lock);
|
||||
|
||||
if (delay) {
|
||||
if (command == ZINJECT_DELAY_IMPORT) {
|
||||
spa_import_progress_set_notes(spa, "injecting %llu "
|
||||
"sec delay", (u_longlong_t)NSEC2SEC(delay));
|
||||
}
|
||||
zfs_sleep_until(gethrtime() + delay);
|
||||
}
|
||||
if (id) {
|
||||
/* all done with this one-shot handler */
|
||||
zio_clear_fault(id);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* For testing, inject a delay during an import
|
||||
*/
|
||||
void
|
||||
zio_handle_import_delay(spa_t *spa, hrtime_t elapsed)
|
||||
{
|
||||
zio_handle_pool_delay(spa, elapsed, ZINJECT_DELAY_IMPORT);
|
||||
}
|
||||
|
||||
/*
|
||||
* For testing, inject a delay during an export
|
||||
*/
|
||||
void
|
||||
zio_handle_export_delay(spa_t *spa, hrtime_t elapsed)
|
||||
{
|
||||
zio_handle_pool_delay(spa, elapsed, ZINJECT_DELAY_EXPORT);
|
||||
}
|
||||
|
||||
static int
|
||||
zio_calculate_range(const char *pool, zinject_record_t *record)
|
||||
{
|
||||
|
@ -756,6 +815,28 @@ zio_calculate_range(const char *pool, zinject_record_t *record)
|
|||
return (0);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
zio_pool_handler_exists(const char *name, zinject_type_t command)
|
||||
{
|
||||
boolean_t exists = B_FALSE;
|
||||
|
||||
rw_enter(&inject_lock, RW_READER);
|
||||
for (inject_handler_t *handler = list_head(&inject_handlers);
|
||||
handler != NULL; handler = list_next(&inject_handlers, handler)) {
|
||||
if (command != handler->zi_record.zi_cmd)
|
||||
continue;
|
||||
|
||||
const char *pool = (handler->zi_spa_name != NULL) ?
|
||||
handler->zi_spa_name : spa_name(handler->zi_spa);
|
||||
if (strcmp(name, pool) == 0) {
|
||||
exists = B_TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
rw_exit(&inject_lock);
|
||||
|
||||
return (exists);
|
||||
}
|
||||
/*
|
||||
* Create a new handler for the given record. We add it to the list, adding
|
||||
* a reference to the spa_t in the process. We increment zio_injection_enabled,
|
||||
|
@ -806,16 +887,42 @@ zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
|
|||
|
||||
if (!(flags & ZINJECT_NULL)) {
|
||||
/*
|
||||
* spa_inject_ref() will add an injection reference, which will
|
||||
* prevent the pool from being removed from the namespace while
|
||||
* still allowing it to be unloaded.
|
||||
* Pool delays for import or export don't take an
|
||||
* injection reference on the spa. Instead they
|
||||
* rely on matching by name.
|
||||
*/
|
||||
if ((spa = spa_inject_addref(name)) == NULL)
|
||||
return (SET_ERROR(ENOENT));
|
||||
if (record->zi_cmd == ZINJECT_DELAY_IMPORT ||
|
||||
record->zi_cmd == ZINJECT_DELAY_EXPORT) {
|
||||
if (record->zi_duration <= 0)
|
||||
return (SET_ERROR(EINVAL));
|
||||
/*
|
||||
* Only one import | export delay handler per pool.
|
||||
*/
|
||||
if (zio_pool_handler_exists(name, record->zi_cmd))
|
||||
return (SET_ERROR(EEXIST));
|
||||
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
boolean_t has_spa = spa_lookup(name) != NULL;
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
|
||||
if (record->zi_cmd == ZINJECT_DELAY_IMPORT && has_spa)
|
||||
return (SET_ERROR(EEXIST));
|
||||
if (record->zi_cmd == ZINJECT_DELAY_EXPORT && !has_spa)
|
||||
return (SET_ERROR(ENOENT));
|
||||
spa = NULL;
|
||||
} else {
|
||||
/*
|
||||
* spa_inject_ref() will add an injection reference,
|
||||
* which will prevent the pool from being removed
|
||||
* from the namespace while still allowing it to be
|
||||
* unloaded.
|
||||
*/
|
||||
if ((spa = spa_inject_addref(name)) == NULL)
|
||||
return (SET_ERROR(ENOENT));
|
||||
}
|
||||
|
||||
handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP);
|
||||
|
||||
handler->zi_spa = spa;
|
||||
handler->zi_spa = spa; /* note: can be NULL */
|
||||
handler->zi_record = *record;
|
||||
|
||||
if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) {
|
||||
|
@ -828,6 +935,11 @@ zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
|
|||
handler->zi_next_lane = 0;
|
||||
}
|
||||
|
||||
if (handler->zi_spa == NULL)
|
||||
handler->zi_spa_name = spa_strdup(name);
|
||||
else
|
||||
handler->zi_spa_name = NULL;
|
||||
|
||||
rw_enter(&inject_lock, RW_WRITER);
|
||||
|
||||
/*
|
||||
|
@ -887,7 +999,11 @@ zio_inject_list_next(int *id, char *name, size_t buflen,
|
|||
if (handler) {
|
||||
*record = handler->zi_record;
|
||||
*id = handler->zi_id;
|
||||
(void) strncpy(name, spa_name(handler->zi_spa), buflen);
|
||||
ASSERT(handler->zi_spa || handler->zi_spa_name);
|
||||
if (handler->zi_spa != NULL)
|
||||
(void) strncpy(name, spa_name(handler->zi_spa), buflen);
|
||||
else
|
||||
(void) strncpy(name, handler->zi_spa_name, buflen);
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = SET_ERROR(ENOENT);
|
||||
|
@ -937,7 +1053,11 @@ zio_clear_fault(int id)
|
|||
ASSERT3P(handler->zi_lanes, ==, NULL);
|
||||
}
|
||||
|
||||
spa_inject_delref(handler->zi_spa);
|
||||
if (handler->zi_spa_name != NULL)
|
||||
spa_strfree(handler->zi_spa_name);
|
||||
|
||||
if (handler->zi_spa != NULL)
|
||||
spa_inject_delref(handler->zi_spa);
|
||||
kmem_free(handler, sizeof (inject_handler_t));
|
||||
atomic_dec_32(&zio_injection_enabled);
|
||||
|
||||
|
|
|
@ -376,7 +376,8 @@ tags = ['functional', 'cli_root', 'zpool_events']
|
|||
[tests/functional/cli_root/zpool_export]
|
||||
tests = ['zpool_export_001_pos', 'zpool_export_002_pos',
|
||||
'zpool_export_003_neg', 'zpool_export_004_pos', 'zpool_export_005_pos',
|
||||
'zpool_export_006_pos', 'zpool_export_007_pos']
|
||||
'zpool_export_006_pos', 'zpool_export_007_pos',
|
||||
'zpool_export_parallel_pos', 'zpool_export_parallel_admin']
|
||||
tags = ['functional', 'cli_root', 'zpool_export']
|
||||
|
||||
[tests/functional/cli_root/zpool_get]
|
||||
|
@ -401,6 +402,10 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos',
|
|||
'zpool_import_rename_001_pos', 'zpool_import_all_001_pos',
|
||||
'zpool_import_encrypted', 'zpool_import_encrypted_load',
|
||||
'zpool_import_errata3', 'zpool_import_errata4',
|
||||
'zpool_import_hostid_changed',
|
||||
'zpool_import_hostid_changed_unclean_export',
|
||||
'zpool_import_hostid_changed_cachefile',
|
||||
'zpool_import_hostid_changed_cachefile_unclean_export',
|
||||
'import_cachefile_device_added',
|
||||
'import_cachefile_device_removed',
|
||||
'import_cachefile_device_replaced',
|
||||
|
@ -411,7 +416,9 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos',
|
|||
'import_devices_missing',
|
||||
'import_paths_changed',
|
||||
'import_rewind_config_changed',
|
||||
'import_rewind_device_replaced']
|
||||
'import_rewind_device_replaced',
|
||||
'zpool_import_status', 'zpool_import_parallel_pos',
|
||||
'zpool_import_parallel_neg', 'zpool_import_parallel_admin']
|
||||
tags = ['functional', 'cli_root', 'zpool_import']
|
||||
timeout = 1200
|
||||
|
||||
|
|
|
@ -127,7 +127,7 @@ tags = ['functional', 'mmap']
|
|||
tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval',
|
||||
'mmp_active_import', 'mmp_inactive_import', 'mmp_exported_import',
|
||||
'mmp_write_uberblocks', 'mmp_reset_interval', 'multihost_history',
|
||||
'mmp_on_zdb', 'mmp_write_distribution', 'mmp_hostid']
|
||||
'mmp_on_zdb', 'mmp_write_distribution', 'mmp_hostid', 'mmp_write_slow_disk']
|
||||
tags = ['functional', 'mmp']
|
||||
|
||||
[tests/functional/mount:Linux]
|
||||
|
|
|
@ -8,7 +8,9 @@ dist_pkgdata_SCRIPTS = \
|
|||
zpool_export_004_pos.ksh \
|
||||
zpool_export_005_pos.ksh \
|
||||
zpool_export_006_pos.ksh \
|
||||
zpool_export_007_pos.ksh
|
||||
zpool_export_007_pos.ksh \
|
||||
zpool_export_parallel_admin.ksh \
|
||||
zpool_export_parallel_pos.ksh
|
||||
|
||||
dist_pkgdata_DATA = \
|
||||
zpool_export.cfg \
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2024 Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Verify that admin commands cannot race a pool export
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a pool
|
||||
# 2. Import the pool with an injected delay in the background
|
||||
# 3. Execute some admin commands against the pool
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
DEVICE_DIR=$TEST_BASE_DIR/dev_export-test
|
||||
|
||||
function cleanup
|
||||
{
|
||||
zinject -c all
|
||||
poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
|
||||
[[ -d $DEVICE_DIR ]] && log_must rm -rf $DEVICE_DIR
|
||||
}
|
||||
|
||||
log_assert "admin commands cannot race a pool export"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
[[ ! -d $DEVICE_DIR ]] && log_must mkdir -p $DEVICE_DIR
|
||||
log_must truncate -s $MINVDEVSIZE ${DEVICE_DIR}/disk0 ${DEVICE_DIR}/disk1
|
||||
|
||||
log_must zpool create -f $TESTPOOL1 mirror ${DEVICE_DIR}/disk0 ${DEVICE_DIR}/disk1
|
||||
|
||||
log_must zinject -P export -s 10 $TESTPOOL1
|
||||
|
||||
log_must zpool export $TESTPOOL1 &
|
||||
|
||||
zpool set comment=hello $TESTPOOL1
|
||||
zpool reguid $TESTPOOL1 &
|
||||
zpool split $TESTPOOL1 &
|
||||
|
||||
log_pass "admin commands cannot race a pool export"
|
|
@ -0,0 +1,129 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2024 Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
|
||||
|
||||
# test uses 8 vdevs
|
||||
MAX_NUM=8
|
||||
DEVICE_DIR=$TEST_BASE_DIR/dev_import-test
|
||||
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Verify that pool exports can occur in parallel
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create 8 pools
|
||||
# 2. Inject an export delay using zinject
|
||||
# 3. Export half of the pools synchronously to baseline sequential cost
|
||||
# 4. Export the other half asynchronously to demonstrate parallel savings
|
||||
# 6. Import 4 pools
|
||||
# 7. Test zpool export -a
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
#
|
||||
# override the minimum sized vdevs
|
||||
#
|
||||
|
||||
POOLNAME="test_pool"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
zinject -c all
|
||||
|
||||
for i in {0..$(($MAX_NUM - 1))}; do
|
||||
poolexists $POOLNAME-$i && destroy_pool $POOLNAME-$i
|
||||
done
|
||||
|
||||
[[ -d $DEVICE_DIR ]] && log_must rm -rf $DEVICE_DIR
|
||||
}
|
||||
|
||||
log_assert "Pool exports can occur in parallel"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
[[ ! -d $DEVICE_DIR ]] && log_must mkdir -p $DEVICE_DIR
|
||||
|
||||
#
|
||||
# Create some pools with export delay injectors
|
||||
#
|
||||
for i in {0..$(($MAX_NUM - 1))}; do
|
||||
log_must truncate -s $MINVDEVSIZE ${DEVICE_DIR}/disk$i
|
||||
log_must zpool create $POOLNAME-$i $DEVICE_DIR/disk$i
|
||||
log_must zinject -P export -s 8 $POOLNAME-$i
|
||||
done
|
||||
|
||||
#
|
||||
# Export half of the pools synchronously
|
||||
#
|
||||
SECONDS=0
|
||||
for i in {0..3}; do
|
||||
log_must zpool export $POOLNAME-$i
|
||||
done
|
||||
sequential_time=$SECONDS
|
||||
log_note "sequentially exported 4 pools in $sequential_time seconds"
|
||||
|
||||
#
|
||||
# Export half of the pools in parallel
|
||||
#
|
||||
SECONDS=0
|
||||
for i in {4..7}; do
|
||||
log_must zpool export $POOLNAME-$i &
|
||||
done
|
||||
wait
|
||||
parallel_time=$SECONDS
|
||||
log_note "asyncronously exported 4 pools in $parallel_time seconds"
|
||||
|
||||
log_must test $parallel_time -lt $(($sequential_time / 3))
|
||||
|
||||
#
|
||||
# import 4 pools with export delay injectors
|
||||
#
|
||||
for i in {4..7}; do
|
||||
log_must zpool import -d $DEVICE_DIR/disk$i $POOLNAME-$i
|
||||
log_must zinject -P export -s 8 $POOLNAME-$i
|
||||
done
|
||||
|
||||
#
|
||||
# now test zpool export -a
|
||||
#
|
||||
SECONDS=0
|
||||
log_must zpool export -a
|
||||
parallel_time=$SECONDS
|
||||
log_note "asyncronously exported 4 pools, using '-a', in $parallel_time seconds"
|
||||
|
||||
log_must test $parallel_time -lt $(($sequential_time / 3))
|
||||
|
||||
log_pass "Pool exports occur in parallel"
|
|
@ -36,10 +36,17 @@ dist_pkgdata_SCRIPTS = \
|
|||
zpool_import_features_001_pos.ksh \
|
||||
zpool_import_features_002_neg.ksh \
|
||||
zpool_import_features_003_pos.ksh \
|
||||
zpool_import_hostid_changed.ksh \
|
||||
zpool_import_hostid_changed_unclean_export.ksh \
|
||||
zpool_import_hostid_changed_cachefile.ksh \
|
||||
zpool_import_hostid_changed_cachefile_unclean_export.ksh \
|
||||
zpool_import_missing_001_pos.ksh \
|
||||
zpool_import_missing_002_pos.ksh \
|
||||
zpool_import_missing_003_pos.ksh \
|
||||
zpool_import_rename_001_pos.ksh \
|
||||
zpool_import_parallel_admin.ksh \
|
||||
zpool_import_parallel_neg.ksh \
|
||||
zpool_import_parallel_pos.ksh \
|
||||
zpool_import_encrypted.ksh \
|
||||
zpool_import_encrypted_load.ksh \
|
||||
zpool_import_errata3.ksh \
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
|
||||
#
|
||||
# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2023 by Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
@ -63,3 +64,7 @@ export VDEV4=$DEVICE_DIR/${DEVICE_FILE}4
|
|||
export VDEV5=$DEVICE_DIR/${DEVICE_FILE}5
|
||||
|
||||
export ALTER_ROOT=/alter_import-test
|
||||
|
||||
export HOSTID_FILE="/etc/hostid"
|
||||
export HOSTID1=01234567
|
||||
export HOSTID2=89abcdef
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
#
|
||||
# Copyright (c) 2016 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2023 by Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
#!/bin/ksh -p
|
||||
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2021 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2023 by Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# A pool that was cleanly exported should be importable without force even if
|
||||
# the local hostid doesn't match the on-disk hostid.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Set a hostid.
|
||||
# 2. Create a pool.
|
||||
# 3. Export the pool.
|
||||
# 4. Change the hostid.
|
||||
# 5. Verify that importing the pool without force succeeds.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
function custom_cleanup
|
||||
{
|
||||
rm -f $HOSTID_FILE
|
||||
cleanup
|
||||
}
|
||||
|
||||
log_onexit custom_cleanup
|
||||
|
||||
# 1. Set a hostid.
|
||||
log_must zgenhostid -f $HOSTID1
|
||||
|
||||
# 2. Create a pool.
|
||||
log_must zpool create $TESTPOOL1 $VDEV0
|
||||
|
||||
# 3. Export the pool.
|
||||
log_must zpool export $TESTPOOL1
|
||||
|
||||
# 4. Change the hostid.
|
||||
log_must zgenhostid -f $HOSTID2
|
||||
|
||||
# 5. Verify that importing the pool without force succeeds.
|
||||
log_must zpool import -d $DEVICE_DIR $TESTPOOL1
|
||||
|
||||
log_pass "zpool import can import cleanly exported pool when hostid changes."
|
|
@ -0,0 +1,65 @@
|
|||
#!/bin/ksh -p
|
||||
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2021 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2023 by Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# A pool that was cleanly exported should be importable from a cachefile
|
||||
# without force even if the local hostid doesn't match the on-disk hostid.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Set a hostid.
|
||||
# 2. Create a pool with a cachefile.
|
||||
# 3. Backup the cachfile.
|
||||
# 4. Export the pool.
|
||||
# 5. Change the hostid.
|
||||
# 6. Verify that importing the pool from the cachefile succeeds
|
||||
# without force.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
function custom_cleanup
|
||||
{
|
||||
rm -f $HOSTID_FILE $CPATH $CPATHBKP
|
||||
cleanup
|
||||
}
|
||||
|
||||
log_onexit custom_cleanup
|
||||
|
||||
# 1. Set a hostid.
|
||||
log_must zgenhostid -f $HOSTID1
|
||||
|
||||
# 2. Create a pool.
|
||||
log_must zpool create -o cachefile=$CPATH $TESTPOOL1 $VDEV0
|
||||
|
||||
# 3. Backup the cachfile.
|
||||
log_must cp $CPATH $CPATHBKP
|
||||
|
||||
# 4. Export the pool.
|
||||
log_must zpool export $TESTPOOL1
|
||||
|
||||
# 5. Change the hostid.
|
||||
log_must zgenhostid -f $HOSTID2
|
||||
|
||||
# 6. Verify that importing the pool from the cachefile succeeds without force.
|
||||
log_must zpool import -c $CPATHBKP $TESTPOOL1
|
||||
|
||||
log_pass "zpool import can import cleanly exported pool from cachefile " \
|
||||
"when hostid changes."
|
|
@ -0,0 +1,75 @@
|
|||
#!/bin/ksh -p
|
||||
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2021 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2023 by Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# A pool that wasn't cleanly exported should not be importable from a cachefile
|
||||
# without force if the local hostid doesn't match the on-disk hostid.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Set a hostid.
|
||||
# 2. Create a pool.
|
||||
# 3. Backup the cachefile.
|
||||
# 4. Simulate the pool being torn down without export:
|
||||
# 4.1. Copy the underlying device state.
|
||||
# 4.2. Export the pool.
|
||||
# 4.3. Restore the device state from the copy.
|
||||
# 5. Change the hostid.
|
||||
# 6. Verify that importing the pool from the cachefile fails.
|
||||
# 7. Verify that importing the pool from the cachefile with force
|
||||
# succeeds.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
function custom_cleanup
|
||||
{
|
||||
rm -f $HOSTID_FILE $CPATH $CPATHBKP $VDEV0.bak
|
||||
cleanup
|
||||
}
|
||||
|
||||
log_onexit custom_cleanup
|
||||
|
||||
# 1. Set a hostid.
|
||||
log_must zgenhostid -f $HOSTID1
|
||||
|
||||
# 2. Create a pool.
|
||||
log_must zpool create -o cachefile=$CPATH $TESTPOOL1 $VDEV0
|
||||
|
||||
# 3. Backup the cachfile.
|
||||
log_must cp $CPATH $CPATHBKP
|
||||
|
||||
# 4. Simulate the pool being torn down without export.
|
||||
log_must cp $VDEV0 $VDEV0.bak
|
||||
log_must zpool export $TESTPOOL1
|
||||
log_must cp -f $VDEV0.bak $VDEV0
|
||||
log_must rm -f $VDEV0.bak
|
||||
|
||||
# 5. Change the hostid.
|
||||
log_must zgenhostid -f $HOSTID2
|
||||
|
||||
# 6. Verify that importing the pool from the cachefile fails.
|
||||
log_mustnot zpool import -c $CPATHBKP $TESTPOOL1
|
||||
|
||||
# 7. Verify that importing the pool from the cachefile with force succeeds.
|
||||
log_must zpool import -f -c $CPATHBKP $TESTPOOL1
|
||||
|
||||
log_pass "zpool import from cachefile requires force if not cleanly " \
|
||||
"exported and hostid changes."
|
|
@ -0,0 +1,70 @@
|
|||
#!/bin/ksh -p
|
||||
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2021 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2023 by Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# A pool that wasn't cleanly exported should not be importable without force if
|
||||
# the local hostid doesn't match the on-disk hostid.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Set a hostid.
|
||||
# 2. Create a pool.
|
||||
# 3. Simulate the pool being torn down without export:
|
||||
# 3.1. Copy the underlying device state.
|
||||
# 3.2. Export the pool.
|
||||
# 3.3. Restore the device state from the copy.
|
||||
# 4. Change the hostid.
|
||||
# 5. Verify that importing the pool fails.
|
||||
# 6. Verify that importing the pool with force succeeds.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
function custom_cleanup
|
||||
{
|
||||
rm -f $HOSTID_FILE $VDEV0.bak
|
||||
cleanup
|
||||
}
|
||||
|
||||
log_onexit custom_cleanup
|
||||
|
||||
# 1. Set a hostid.
|
||||
log_must zgenhostid -f $HOSTID1
|
||||
|
||||
# 2. Create a pool.
|
||||
log_must zpool create $TESTPOOL1 $VDEV0
|
||||
|
||||
# 3. Simulate the pool being torn down without export.
|
||||
log_must cp $VDEV0 $VDEV0.bak
|
||||
log_must zpool export $TESTPOOL1
|
||||
log_must cp -f $VDEV0.bak $VDEV0
|
||||
log_must rm -f $VDEV0.bak
|
||||
|
||||
# 4. Change the hostid.
|
||||
log_must zgenhostid -f $HOSTID2
|
||||
|
||||
# 5. Verify that importing the pool fails.
|
||||
log_mustnot zpool import -d $DEVICE_DIR $TESTPOOL1
|
||||
|
||||
# 6. Verify that importing the pool with force succeeds.
|
||||
log_must zpool import -d $DEVICE_DIR -f $TESTPOOL1
|
||||
|
||||
log_pass "zpool import requires force if not cleanly exported " \
|
||||
"and hostid changed."
|
|
@ -0,0 +1,165 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2023 Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Verify that admin commands to different pool are not blocked by import
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create 2 pools
|
||||
# 2. Export one of the pools
|
||||
# 4. Import the pool with an injected delay
|
||||
# 5. Execute some admin commands against both pools
|
||||
# 6. Verify that the admin commands to the non-imported pool don't stall
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
zinject -c all
|
||||
destroy_pool $TESTPOOL1
|
||||
destroy_pool $TESTPOOL2
|
||||
}
|
||||
|
||||
function pool_import
|
||||
{
|
||||
typeset dir=$1
|
||||
typeset pool=$2
|
||||
|
||||
SECONDS=0
|
||||
errmsg=$(zpool import -d $dir -f $pool 2>&1 > /dev/null)
|
||||
if [[ $? -eq 0 ]]; then
|
||||
echo ${pool}: imported in $SECONDS secs
|
||||
echo $SECONDS > ${DEVICE_DIR}/${pool}-import
|
||||
else
|
||||
echo ${pool}: import failed $errmsg in $SECONDS secs
|
||||
fi
|
||||
}
|
||||
|
||||
function pool_add_device
|
||||
{
|
||||
typeset pool=$1
|
||||
typeset device=$2
|
||||
typeset devtype=$3
|
||||
|
||||
SECONDS=0
|
||||
errmsg=$(zpool add $pool $devtype $device 2>&1 > /dev/null)
|
||||
if [[ $? -eq 0 ]]; then
|
||||
echo ${pool}: added $devtype vdev in $SECONDS secs
|
||||
echo $SECONDS > ${DEVICE_DIR}/${pool}-add
|
||||
else
|
||||
echo ${pool}: add $devtype vdev failed ${errmsg}, in $SECONDS secs
|
||||
fi
|
||||
}
|
||||
|
||||
function pool_stats
|
||||
{
|
||||
typeset stats=$1
|
||||
typeset pool=$2
|
||||
|
||||
SECONDS=0
|
||||
errmsg=$(zpool $stats $pool 2>&1 > /dev/null)
|
||||
if [[ $? -eq 0 ]]; then
|
||||
echo ${pool}: $stats in $SECONDS secs
|
||||
echo $SECONDS > ${DEVICE_DIR}/${pool}-${stats}
|
||||
else
|
||||
echo ${pool}: $stats failed ${errmsg}, in $SECONDS secs
|
||||
fi
|
||||
}
|
||||
|
||||
function pool_create
|
||||
{
|
||||
typeset pool=$1
|
||||
typeset device=$2
|
||||
|
||||
SECONDS=0
|
||||
errmsg=$(zpool create $pool $device 2>&1 > /dev/null)
|
||||
if [[ $? -eq 0 ]]; then
|
||||
echo ${pool}: created in $SECONDS secs
|
||||
echo $SECONDS > ${DEVICE_DIR}/${pool}-create
|
||||
else
|
||||
echo ${pool}: create failed ${errmsg}, in $SECONDS secs
|
||||
fi
|
||||
}
|
||||
|
||||
log_assert "Simple admin commands to different pool not blocked by import"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
#
|
||||
# create two pools and export one
|
||||
#
|
||||
log_must zpool create $TESTPOOL1 $VDEV0
|
||||
log_must zpool export $TESTPOOL1
|
||||
log_must zpool create $TESTPOOL2 $VDEV1
|
||||
|
||||
#
|
||||
# import pool asyncronously with an injected 10 second delay
|
||||
#
|
||||
log_must zinject -P import -s 10 $TESTPOOL1
|
||||
pool_import $DEVICE_DIR $TESTPOOL1 &
|
||||
|
||||
sleep 2
|
||||
|
||||
#
|
||||
# run some admin commands on the pools while the import is in progress
|
||||
#
|
||||
|
||||
pool_add_device $TESTPOOL1 $VDEV2 "log" &
|
||||
pool_add_device $TESTPOOL2 $VDEV3 "cache" &
|
||||
pool_stats "status" $TESTPOOL1 &
|
||||
pool_stats "status" $TESTPOOL2 &
|
||||
pool_stats "list" $TESTPOOL1 &
|
||||
pool_stats "list" $TESTPOOL2 &
|
||||
pool_create $TESTPOOL1 $VDEV4 &
|
||||
wait
|
||||
|
||||
log_must zpool sync $TESTPOOL1 $TESTPOOL2
|
||||
|
||||
zpool history $TESTPOOL1
|
||||
zpool history $TESTPOOL2
|
||||
|
||||
log_must test "5" -lt $(<${DEVICE_DIR}/${TESTPOOL1}-import)
|
||||
|
||||
#
|
||||
# verify that commands to second pool did not wait for import to finish
|
||||
#
|
||||
log_must test "2" -gt $(<${DEVICE_DIR}/${TESTPOOL2}-status)
|
||||
log_must test "2" -gt $(<${DEVICE_DIR}/${TESTPOOL2}-list)
|
||||
log_must test "2" -gt $(<${DEVICE_DIR}/${TESTPOOL2}-add)
|
||||
[[ -e ${DEVICE_DIR}/${TESTPOOL1}-create ]] && log_fail "unexpected pool create"
|
||||
|
||||
log_pass "Simple admin commands to different pool not blocked by import"
|
|
@ -0,0 +1,130 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2023 Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Verify that pool imports by same name only have one winner
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create 4 single disk pools with the same name
|
||||
# 2. Generate some ZIL records (for a longer import)
|
||||
# 3. Export the pools
|
||||
# 4. Import the pools in parallel
|
||||
# 5. Repeat with using matching guids
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
POOLNAME="import_pool"
|
||||
DEV_DIR_PREFIX="$DEVICE_DIR/$POOLNAME"
|
||||
VDEVSIZE=$((512 * 1024 * 1024))
|
||||
|
||||
log_assert "parallel pool imports by same name only have one winner"
|
||||
|
||||
# each pool has its own device directory
|
||||
for i in {0..3}; do
|
||||
log_must mkdir -p ${DEV_DIR_PREFIX}$i
|
||||
log_must truncate -s $VDEVSIZE ${DEV_DIR_PREFIX}$i/${DEVICE_FILE}$i
|
||||
done
|
||||
|
||||
function cleanup
|
||||
{
|
||||
zinject -c all
|
||||
log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 0
|
||||
log_must set_tunable64 METASLAB_DEBUG_LOAD 0
|
||||
|
||||
destroy_pool $POOLNAME
|
||||
|
||||
log_must rm -rf $DEV_DIR_PREFIX*
|
||||
}
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 1
|
||||
log_must set_tunable64 METASLAB_DEBUG_LOAD 1
|
||||
|
||||
function import_pool
|
||||
{
|
||||
typeset dir=$1
|
||||
typeset pool=$2
|
||||
typeset newname=$3
|
||||
|
||||
SECONDS=0
|
||||
errmsg=$(zpool import -N -d $dir -f $pool $newname 2>&1 > /dev/null)
|
||||
if [[ $? -eq 0 ]]; then
|
||||
touch $dir/imported
|
||||
echo "imported $pool in $SECONDS secs"
|
||||
elif [[ $errmsg == *"cannot import"* ]]; then
|
||||
echo "pool import failed: $errmsg, waited $SECONDS secs"
|
||||
touch $dir/failed
|
||||
fi
|
||||
}
|
||||
|
||||
#
|
||||
# create four exported pools with the same name
|
||||
#
|
||||
for i in {0..3}; do
|
||||
log_must zpool create $POOLNAME ${DEV_DIR_PREFIX}$i/${DEVICE_FILE}$i
|
||||
log_must zpool export $POOLNAME
|
||||
done
|
||||
log_must zinject -P import -s 10 $POOLNAME
|
||||
|
||||
#
|
||||
# import the pools in parallel, expecting only one winner
|
||||
#
|
||||
for i in {0..3}; do
|
||||
import_pool ${DEV_DIR_PREFIX}$i $POOLNAME &
|
||||
done
|
||||
wait
|
||||
|
||||
# check the result of background imports
|
||||
typeset num_imports=0
|
||||
typeset num_cannot=0
|
||||
for i in {0..3}; do
|
||||
if [[ -f ${DEV_DIR_PREFIX}$i/imported ]]; then
|
||||
((num_imports += 1))
|
||||
fi
|
||||
if [[ -f ${DEV_DIR_PREFIX}$i/failed ]]; then
|
||||
((num_cannot += 1))
|
||||
loser=$i
|
||||
fi
|
||||
done
|
||||
[[ $num_imports -eq "1" ]] || log_fail "expecting an import"
|
||||
[[ $num_cannot -eq "3" ]] || \
|
||||
log_fail "expecting 3 pool exists errors, found $num_cannot"
|
||||
|
||||
log_note "$num_imports imported and $num_cannot failed (expected)"
|
||||
|
||||
log_pass "parallel pool imports by same name only have one winner"
|
|
@ -0,0 +1,137 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2023 Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
|
||||
|
||||
# test uses 8 vdevs
|
||||
export MAX_NUM=8
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Verify that pool imports can occur in parallel
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create 8 pools
|
||||
# 2. Generate some ZIL records
|
||||
# 3. Export the pools
|
||||
# 4. Import half of the pools synchronously to baseline sequential cost
|
||||
# 5. Import the other half asynchronously to demonstrate parallel savings
|
||||
# 6. Export 4 pools
|
||||
# 7. Test zpool import -a
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
#
|
||||
# override the minimum sized vdevs
|
||||
#
|
||||
VDEVSIZE=$((512 * 1024 * 1024))
|
||||
increase_device_sizes $VDEVSIZE
|
||||
|
||||
POOLNAME="import_pool"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
zinject -c all
|
||||
log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 0
|
||||
log_must set_tunable64 METASLAB_DEBUG_LOAD 0
|
||||
|
||||
for i in {0..$(($MAX_NUM - 1))}; do
|
||||
destroy_pool $POOLNAME-$i
|
||||
done
|
||||
# reset the devices
|
||||
increase_device_sizes 0
|
||||
increase_device_sizes $FILE_SIZE
|
||||
}
|
||||
|
||||
log_assert "Pool imports can occur in parallel"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 1
|
||||
log_must set_tunable64 METASLAB_DEBUG_LOAD 1
|
||||
|
||||
|
||||
#
|
||||
# create some exported pools with import delay injectors
|
||||
#
|
||||
for i in {0..$(($MAX_NUM - 1))}; do
|
||||
log_must zpool create $POOLNAME-$i $DEVICE_DIR/${DEVICE_FILE}$i
|
||||
log_must zpool export $POOLNAME-$i
|
||||
log_must zinject -P import -s 12 $POOLNAME-$i
|
||||
done
|
||||
wait
|
||||
|
||||
#
|
||||
# import half of the pools synchronously
|
||||
#
|
||||
SECONDS=0
|
||||
for i in {0..3}; do
|
||||
log_must zpool import -d $DEVICE_DIR -f $POOLNAME-$i
|
||||
done
|
||||
sequential_time=$SECONDS
|
||||
log_note "sequentially imported 4 pools in $sequential_time seconds"
|
||||
|
||||
#
|
||||
# import half of the pools in parallel
|
||||
#
|
||||
SECONDS=0
|
||||
for i in {4..7}; do
|
||||
log_must zpool import -d $DEVICE_DIR -f $POOLNAME-$i &
|
||||
done
|
||||
wait
|
||||
parallel_time=$SECONDS
|
||||
log_note "asyncronously imported 4 pools in $parallel_time seconds"
|
||||
|
||||
log_must test $parallel_time -lt $(($sequential_time / 3))
|
||||
|
||||
#
|
||||
# export pools with import delay injectors
|
||||
#
|
||||
for i in {4..7}; do
|
||||
log_must zpool export $POOLNAME-$i
|
||||
log_must zinject -P import -s 12 $POOLNAME-$i
|
||||
done
|
||||
wait
|
||||
|
||||
#
|
||||
# now test zpool import -a
|
||||
#
|
||||
SECONDS=0
|
||||
log_must zpool import -a -d $DEVICE_DIR -f
|
||||
parallel_time=$SECONDS
|
||||
log_note "asyncronously imported 4 pools in $parallel_time seconds"
|
||||
|
||||
log_must test $parallel_time -lt $(($sequential_time / 3))
|
||||
|
||||
log_pass "Pool imports occur in parallel"
|
|
@ -0,0 +1,132 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2023 Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# During a pool import, the 'import_progress' kstat contains details
|
||||
# on the import progress.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create test pool with several devices
|
||||
# 2. Generate some ZIL records and spacemap logs
|
||||
# 3. Export the pool
|
||||
# 4. Import the pool in the background and monitor the kstat content
|
||||
# 5. Check the zfs debug messages for import progress
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 0
|
||||
log_must set_tunable64 METASLAB_DEBUG_LOAD 0
|
||||
|
||||
destroy_pool $TESTPOOL1
|
||||
}
|
||||
|
||||
log_assert "During a pool import, the 'import_progress' kstat contains " \
|
||||
"notes on the progress"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must zpool create $TESTPOOL1 $VDEV0 $VDEV1 $VDEV2
|
||||
typeset guid=$(zpool get -H -o value guid $TESTPOOL1)
|
||||
|
||||
log_must zfs create -o recordsize=8k $TESTPOOL1/fs
|
||||
#
|
||||
# This dd command works around an issue where ZIL records aren't created
|
||||
# after freezing the pool unless a ZIL header already exists. Create a file
|
||||
# synchronously to force ZFS to write one out.
|
||||
#
|
||||
log_must dd if=/dev/zero of=/$TESTPOOL1/fs/sync conv=fsync bs=1 count=1
|
||||
|
||||
#
|
||||
# Overwrite some blocks to populate spacemap logs
|
||||
#
|
||||
log_must dd if=/dev/urandom of=/$TESTPOOL1/fs/00 bs=1M count=200
|
||||
sync_all_pools
|
||||
log_must dd if=/dev/urandom of=/$TESTPOOL1/fs/00 bs=1M count=200
|
||||
sync_all_pools
|
||||
|
||||
#
|
||||
# Freeze the pool to retain intent log records
|
||||
#
|
||||
log_must zpool freeze $TESTPOOL1
|
||||
|
||||
# fill_fs [destdir] [dirnum] [filenum] [bytes] [num_writes] [data]
|
||||
log_must fill_fs /$TESTPOOL1/fs 1 2000 100 1024 R
|
||||
|
||||
log_must zpool list -v $TESTPOOL1
|
||||
|
||||
#
|
||||
# Unmount filesystem and export the pool
|
||||
#
|
||||
# At this stage the zfs intent log contains
|
||||
# a set of records to replay.
|
||||
#
|
||||
log_must zfs unmount /$TESTPOOL1/fs
|
||||
|
||||
log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 1
|
||||
log_must zpool export $TESTPOOL1
|
||||
|
||||
log_must set_tunable64 METASLAB_DEBUG_LOAD 1
|
||||
log_note "Starting zpool import in background at" $(date +'%H:%M:%S')
|
||||
zpool import -d $DEVICE_DIR -f $guid &
|
||||
pid=$!
|
||||
|
||||
#
|
||||
# capture progress until import is finished
|
||||
#
|
||||
log_note waiting for pid $pid to exit
|
||||
kstat import_progress
|
||||
while [[ -d /proc/"$pid" ]]; do
|
||||
line=$(kstat import_progress | grep -v pool_guid)
|
||||
if [[ -n $line ]]; then
|
||||
echo $line
|
||||
fi
|
||||
if [[ -f /$TESTPOOL1/fs/00 ]]; then
|
||||
break;
|
||||
fi
|
||||
sleep 0.0001
|
||||
done
|
||||
log_note "zpool import completed at" $(date +'%H:%M:%S')
|
||||
|
||||
entries=$(kstat dbgmsg | grep "spa_import_progress_set_notes_impl(): 'testpool1'" | wc -l)
|
||||
log_note "found $entries progress notes in dbgmsg"
|
||||
log_must test $entries -gt 20
|
||||
|
||||
log_must zpool status $TESTPOOL1
|
||||
|
||||
log_pass "During a pool import, the 'import_progress' kstat contains " \
|
||||
"notes on the progress"
|
|
@ -67,7 +67,15 @@ log_must zpool create -f $TESTPOOL $VDEV cache $VDEV_CACHE
|
|||
log_must fio $FIO_SCRIPTS/mkfiles.fio
|
||||
log_must fio $FIO_SCRIPTS/random_reads.fio
|
||||
|
||||
timeout_handler() {
|
||||
log_fail "${TIMEOUT_MESSAGE}"
|
||||
}
|
||||
|
||||
TIMEOUT_MESSAGE="Time out arcstat_quiescence_noecho l2_size before zpool offline"
|
||||
trap timeout_handler USR1
|
||||
ppid="$$" && (sleep 600 && kill -USR1 "$ppid") & timeout_pid="$!"
|
||||
arcstat_quiescence_noecho l2_size
|
||||
trap - USR1
|
||||
log_must zpool offline $TESTPOOL $VDEV_CACHE
|
||||
arcstat_quiescence_noecho l2_size
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ dist_pkgdata_SCRIPTS = \
|
|||
mmp_active_import.ksh \
|
||||
mmp_inactive_import.ksh \
|
||||
mmp_exported_import.ksh \
|
||||
mmp_write_slow_disk.ksh \
|
||||
mmp_write_uberblocks.ksh \
|
||||
mmp_reset_interval.ksh \
|
||||
mmp_on_zdb.ksh \
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2024, Klara Inc
|
||||
#
|
||||
|
||||
# DESCRIPTION:
|
||||
# Verify that long VDEV probes do not cause MMP checks to suspend pool
|
||||
# Note: without PR-15839 fix, this test will suspend the pool.
|
||||
#
|
||||
# A device that is returning unexpected errors will trigger a vdev_probe.
|
||||
# When the device additionally has slow response times, the probe can hold
|
||||
# the spa config lock as a writer for a long period of time such that the
|
||||
# mmp uberblock updates stall when trying to acquire the spa config lock.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a pool with multiple leaf vdevs
|
||||
# 2. Enable multihost and multihost_history
|
||||
# 3. Delay for MMP writes to occur
|
||||
# 4. Verify that a long VDEV probe didn't cause MMP check to suspend pool
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/mmp/mmp.cfg
|
||||
. $STF_SUITE/tests/functional/mmp/mmp.kshlib
|
||||
|
||||
verify_runnable "both"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
log_must zinject -c all
|
||||
|
||||
if [[ $(zpool list -H -o health $MMP_POOL) == "SUSPENDED" ]]; then
|
||||
log_must zpool clear $MMP_POOL
|
||||
zpool get state $MMP_POOL $MMP_DIR/file.3
|
||||
zpool events | grep ".fs.zfs." | grep -v "history_event"
|
||||
fi
|
||||
|
||||
poolexists $MMP_POOL && destroy_pool $MMP_POOL
|
||||
log_must rm -r $MMP_DIR
|
||||
log_must mmp_clear_hostid
|
||||
}
|
||||
|
||||
log_assert "A long VDEV probe doesn't cause a MMP check suspend"
|
||||
log_onexit cleanup
|
||||
|
||||
MMP_HISTORY_URL=/proc/spl/kstat/zfs/$MMP_POOL/multihost
|
||||
|
||||
# Create a multiple drive pool
|
||||
log_must zpool events -c
|
||||
log_must mkdir -p $MMP_DIR
|
||||
log_must truncate -s 128M $MMP_DIR/file.{0,1,2,3,4,5}
|
||||
log_must zpool create -f $MMP_POOL \
|
||||
mirror $MMP_DIR/file.{0,1,2} \
|
||||
mirror $MMP_DIR/file.{3,4,5}
|
||||
|
||||
# Enable MMP
|
||||
log_must mmp_set_hostid $HOSTID1
|
||||
log_must zpool set multihost=on $MMP_POOL
|
||||
clear_mmp_history
|
||||
|
||||
# Inject vdev write error along with a delay
|
||||
log_must zinject -f 33 -e io -L pad2 -T write -d $MMP_DIR/file.3 $MMP_POOL
|
||||
log_must zinject -f 50 -e io -L uber -T write -d $MMP_DIR/file.3 $MMP_POOL
|
||||
log_must zinject -D 2000:4 -T write -d $MMP_DIR/file.3 $MMP_POOL
|
||||
|
||||
log_must dd if=/dev/urandom of=/$MMP_POOL/data bs=1M count=5
|
||||
sleep 10
|
||||
sync_pool $MMP_POOL
|
||||
|
||||
# Confirm mmp writes to the non-slow disks have taken place
|
||||
for x in {0,1,2,4}; do
|
||||
write_count=$(grep -c file.${x} $MMP_HISTORY_URL)
|
||||
[[ $write_count -gt 0 ]] || log_fail "expecting mmp writes"
|
||||
done
|
||||
|
||||
# Expect that the pool was not suspended
|
||||
log_must check_state $MMP_POOL "" "ONLINE"
|
||||
health=$(zpool list -H -o health $MMP_POOL)
|
||||
log_note "$MMP_POOL health is $health"
|
||||
[[ "$health" == "SUSPENDED" ]] && log_fail "$MMP_POOL $health unexpected"
|
||||
|
||||
log_pass "A long VDEV probe doesn't cause a MMP check suspend"
|
Loading…
Reference in New Issue