2018-11-05 19:22:33 +00:00
|
|
|
/*
|
|
|
|
* CDDL HEADER START
|
|
|
|
*
|
|
|
|
* The contents of this file are subject to the terms of the
|
|
|
|
* Common Development and Distribution License (the "License").
|
|
|
|
* You may not use this file except in compliance with the License.
|
|
|
|
*
|
|
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
2022-07-11 21:16:13 +00:00
|
|
|
* or https://opensource.org/licenses/CDDL-1.0.
|
2018-11-05 19:22:33 +00:00
|
|
|
* See the License for the specific language governing permissions
|
|
|
|
* and limitations under the License.
|
|
|
|
*
|
|
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
|
|
*
|
|
|
|
* CDDL HEADER END
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
|
|
|
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
|
|
|
* Copyright 2015 RackTop Systems.
|
|
|
|
* Copyright (c) 2016, Intel Corporation.
|
2021-02-18 05:30:45 +00:00
|
|
|
* Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
|
2018-11-05 19:22:33 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Pool import support functions.
|
|
|
|
*
|
|
|
|
* Used by zpool, ztest, zdb, and zhack to locate importable configs. Since
|
|
|
|
* these commands are expected to run in the global zone, we can assume
|
|
|
|
* that the devices are all readable when called.
|
|
|
|
*
|
|
|
|
* To import a pool, we rely on reading the configuration information from the
|
|
|
|
* ZFS label of each device. If we successfully read the label, then we
|
|
|
|
* organize the configuration information in the following hierarchy:
|
|
|
|
*
|
|
|
|
* pool guid -> toplevel vdev guid -> label txg
|
|
|
|
*
|
|
|
|
* Duplicate entries matching this same tuple will be discarded. Once we have
|
|
|
|
* examined every device, we pick the best label txg config for each toplevel
|
|
|
|
* vdev. We then arrange these toplevel vdevs into a complete pool config, and
|
|
|
|
* update any paths that have changed. Finally, we attempt to import the pool
|
|
|
|
* using our derived config, and record the results.
|
|
|
|
*/
|
|
|
|
|
2022-03-28 17:24:22 +00:00
|
|
|
#ifdef HAVE_AIO_H
|
2021-01-13 17:00:12 +00:00
|
|
|
#include <aio.h>
|
2022-03-28 17:24:22 +00:00
|
|
|
#endif
|
2018-11-05 19:22:33 +00:00
|
|
|
#include <ctype.h>
|
|
|
|
#include <dirent.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <libintl.h>
|
|
|
|
#include <libgen.h>
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <sys/dktp/fdisk.h>
|
|
|
|
#include <sys/vdev_impl.h>
|
|
|
|
#include <sys/fs/zfs.h>
|
|
|
|
|
|
|
|
#include <thread_pool.h>
|
|
|
|
#include <libzutil.h>
|
|
|
|
#include <libnvpair.h>
|
|
|
|
|
2019-10-03 17:20:44 +00:00
|
|
|
#include "zutil_import.h"
|
2018-11-05 19:22:33 +00:00
|
|
|
|
2022-09-26 13:40:43 +00:00
|
|
|
const char *
|
|
|
|
libpc_error_description(libpc_handle_t *hdl)
|
|
|
|
{
|
|
|
|
if (hdl->lpc_desc[0] != '\0')
|
|
|
|
return (hdl->lpc_desc);
|
|
|
|
|
|
|
|
switch (hdl->lpc_error) {
|
|
|
|
case LPC_BADCACHE:
|
|
|
|
return (dgettext(TEXT_DOMAIN, "invalid or missing cache file"));
|
|
|
|
case LPC_BADPATH:
|
|
|
|
return (dgettext(TEXT_DOMAIN, "must be an absolute path"));
|
|
|
|
case LPC_NOMEM:
|
|
|
|
return (dgettext(TEXT_DOMAIN, "out of memory"));
|
|
|
|
case LPC_EACCESS:
|
|
|
|
return (dgettext(TEXT_DOMAIN, "some devices require root "
|
|
|
|
"privileges"));
|
|
|
|
case LPC_UNKNOWN:
|
|
|
|
return (dgettext(TEXT_DOMAIN, "unknown error"));
|
|
|
|
default:
|
|
|
|
assert(hdl->lpc_error == 0);
|
|
|
|
return (dgettext(TEXT_DOMAIN, "no error"));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-05 11:14:12 +00:00
|
|
|
static __attribute__((format(printf, 2, 3))) void
|
2019-10-03 17:20:44 +00:00
|
|
|
zutil_error_aux(libpc_handle_t *hdl, const char *fmt, ...)
|
2018-11-05 19:22:33 +00:00
|
|
|
{
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
va_start(ap, fmt);
|
|
|
|
|
|
|
|
(void) vsnprintf(hdl->lpc_desc, sizeof (hdl->lpc_desc), fmt, ap);
|
|
|
|
hdl->lpc_desc_active = B_TRUE;
|
|
|
|
|
|
|
|
va_end(ap);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2022-09-26 13:40:43 +00:00
|
|
|
zutil_verror(libpc_handle_t *hdl, lpc_error_t error, const char *fmt,
|
2019-10-03 17:20:44 +00:00
|
|
|
va_list ap)
|
2018-11-05 19:22:33 +00:00
|
|
|
{
|
|
|
|
char action[1024];
|
|
|
|
|
|
|
|
(void) vsnprintf(action, sizeof (action), fmt, ap);
|
2022-09-26 13:40:43 +00:00
|
|
|
hdl->lpc_error = error;
|
2018-11-05 19:22:33 +00:00
|
|
|
|
|
|
|
if (hdl->lpc_desc_active)
|
|
|
|
hdl->lpc_desc_active = B_FALSE;
|
|
|
|
else
|
|
|
|
hdl->lpc_desc[0] = '\0';
|
|
|
|
|
2022-09-26 13:40:43 +00:00
|
|
|
if (hdl->lpc_printerr)
|
|
|
|
(void) fprintf(stderr, "%s: %s\n", action,
|
|
|
|
libpc_error_description(hdl));
|
2018-11-05 19:22:33 +00:00
|
|
|
}
|
|
|
|
|
2021-06-05 11:14:12 +00:00
|
|
|
static __attribute__((format(printf, 3, 4))) int
|
2022-09-26 13:40:43 +00:00
|
|
|
zutil_error_fmt(libpc_handle_t *hdl, lpc_error_t error,
|
|
|
|
const char *fmt, ...)
|
2018-11-05 19:22:33 +00:00
|
|
|
{
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
va_start(ap, fmt);
|
|
|
|
|
2019-10-03 17:20:44 +00:00
|
|
|
zutil_verror(hdl, error, fmt, ap);
|
2018-11-05 19:22:33 +00:00
|
|
|
|
|
|
|
va_end(ap);
|
|
|
|
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2022-09-26 13:40:43 +00:00
|
|
|
zutil_error(libpc_handle_t *hdl, lpc_error_t error, const char *msg)
|
2018-11-05 19:22:33 +00:00
|
|
|
{
|
2019-10-03 17:20:44 +00:00
|
|
|
return (zutil_error_fmt(hdl, error, "%s", msg));
|
2018-11-05 19:22:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2019-10-03 17:20:44 +00:00
|
|
|
zutil_no_memory(libpc_handle_t *hdl)
|
2018-11-05 19:22:33 +00:00
|
|
|
{
|
2022-09-26 13:40:43 +00:00
|
|
|
zutil_error(hdl, LPC_NOMEM, "internal error");
|
2018-11-05 19:22:33 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2019-10-03 17:20:44 +00:00
|
|
|
void *
|
|
|
|
zutil_alloc(libpc_handle_t *hdl, size_t size)
|
2018-11-05 19:22:33 +00:00
|
|
|
{
|
|
|
|
void *data;
|
|
|
|
|
|
|
|
if ((data = calloc(1, size)) == NULL)
|
2019-10-03 17:20:44 +00:00
|
|
|
(void) zutil_no_memory(hdl);
|
2018-11-05 19:22:33 +00:00
|
|
|
|
|
|
|
return (data);
|
|
|
|
}
|
|
|
|
|
2019-10-03 17:20:44 +00:00
|
|
|
char *
|
|
|
|
zutil_strdup(libpc_handle_t *hdl, const char *str)
|
2018-11-05 19:22:33 +00:00
|
|
|
{
|
|
|
|
char *ret;
|
|
|
|
|
|
|
|
if ((ret = strdup(str)) == NULL)
|
2019-10-03 17:20:44 +00:00
|
|
|
(void) zutil_no_memory(hdl);
|
2018-11-05 19:22:33 +00:00
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
2021-05-22 15:19:14 +00:00
|
|
|
static char *
|
|
|
|
zutil_strndup(libpc_handle_t *hdl, const char *str, size_t n)
|
|
|
|
{
|
|
|
|
char *ret;
|
|
|
|
|
|
|
|
if ((ret = strndup(str, n)) == NULL)
|
|
|
|
(void) zutil_no_memory(hdl);
|
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
2018-11-05 19:22:33 +00:00
|
|
|
/*
|
|
|
|
* Intermediate structures used to gather configuration information.
|
|
|
|
*/
|
|
|
|
typedef struct config_entry {
|
|
|
|
uint64_t ce_txg;
|
|
|
|
nvlist_t *ce_config;
|
|
|
|
struct config_entry *ce_next;
|
|
|
|
} config_entry_t;
|
|
|
|
|
|
|
|
typedef struct vdev_entry {
|
|
|
|
uint64_t ve_guid;
|
|
|
|
config_entry_t *ve_configs;
|
|
|
|
struct vdev_entry *ve_next;
|
|
|
|
} vdev_entry_t;
|
|
|
|
|
|
|
|
typedef struct pool_entry {
|
|
|
|
uint64_t pe_guid;
|
|
|
|
vdev_entry_t *pe_vdevs;
|
|
|
|
struct pool_entry *pe_next;
|
|
|
|
} pool_entry_t;
|
|
|
|
|
|
|
|
typedef struct name_entry {
|
|
|
|
char *ne_name;
|
|
|
|
uint64_t ne_guid;
|
|
|
|
uint64_t ne_order;
|
|
|
|
uint64_t ne_num_labels;
|
|
|
|
struct name_entry *ne_next;
|
|
|
|
} name_entry_t;
|
|
|
|
|
|
|
|
typedef struct pool_list {
|
|
|
|
pool_entry_t *pools;
|
|
|
|
name_entry_t *names;
|
|
|
|
} pool_list_t;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Go through and fix up any path and/or devid information for the given vdev
|
|
|
|
* configuration.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
fix_paths(libpc_handle_t *hdl, nvlist_t *nv, name_entry_t *names)
|
|
|
|
{
|
|
|
|
nvlist_t **child;
|
|
|
|
uint_t c, children;
|
|
|
|
uint64_t guid;
|
|
|
|
name_entry_t *ne, *best;
|
2023-03-11 18:39:24 +00:00
|
|
|
const char *path;
|
2018-11-05 19:22:33 +00:00
|
|
|
|
|
|
|
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
|
|
|
|
&child, &children) == 0) {
|
|
|
|
for (c = 0; c < children; c++)
|
|
|
|
if (fix_paths(hdl, child[c], names) != 0)
|
|
|
|
return (-1);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is a leaf (file or disk) vdev. In either case, go through
|
|
|
|
* the name list and see if we find a matching guid. If so, replace
|
|
|
|
* the path and see if we can calculate a new devid.
|
|
|
|
*
|
|
|
|
* There may be multiple names associated with a particular guid, in
|
|
|
|
* which case we have overlapping partitions or multiple paths to the
|
|
|
|
* same disk. In this case we prefer to use the path name which
|
|
|
|
* matches the ZPOOL_CONFIG_PATH. If no matching entry is found we
|
|
|
|
* use the lowest order device which corresponds to the first match
|
|
|
|
* while traversing the ZPOOL_IMPORT_PATH search path.
|
|
|
|
*/
|
|
|
|
verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0);
|
|
|
|
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
|
|
|
|
path = NULL;
|
|
|
|
|
|
|
|
best = NULL;
|
|
|
|
for (ne = names; ne != NULL; ne = ne->ne_next) {
|
|
|
|
if (ne->ne_guid == guid) {
|
|
|
|
if (path == NULL) {
|
|
|
|
best = ne;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((strlen(path) == strlen(ne->ne_name)) &&
|
|
|
|
strncmp(path, ne->ne_name, strlen(path)) == 0) {
|
|
|
|
best = ne;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (best == NULL) {
|
|
|
|
best = ne;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Prefer paths with move vdev labels. */
|
|
|
|
if (ne->ne_num_labels > best->ne_num_labels) {
|
|
|
|
best = ne;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Prefer paths earlier in the search order. */
|
|
|
|
if (ne->ne_num_labels == best->ne_num_labels &&
|
|
|
|
ne->ne_order < best->ne_order) {
|
|
|
|
best = ne;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (best == NULL)
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
|
|
|
|
return (-1);
|
|
|
|
|
|
|
|
update_vdev_config_dev_strs(nv);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add the given configuration to the list of known devices.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
add_config(libpc_handle_t *hdl, pool_list_t *pl, const char *path,
|
|
|
|
int order, int num_labels, nvlist_t *config)
|
|
|
|
{
|
|
|
|
uint64_t pool_guid, vdev_guid, top_guid, txg, state;
|
|
|
|
pool_entry_t *pe;
|
|
|
|
vdev_entry_t *ve;
|
|
|
|
config_entry_t *ce;
|
|
|
|
name_entry_t *ne;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If this is a hot spare not currently in use or level 2 cache
|
|
|
|
* device, add it to the list of names to translate, but don't do
|
|
|
|
* anything else.
|
|
|
|
*/
|
|
|
|
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
|
|
|
|
&state) == 0 &&
|
|
|
|
(state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) &&
|
|
|
|
nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
|
2019-10-03 17:20:44 +00:00
|
|
|
if ((ne = zutil_alloc(hdl, sizeof (name_entry_t))) == NULL)
|
2018-11-05 19:22:33 +00:00
|
|
|
return (-1);
|
|
|
|
|
2019-10-03 17:20:44 +00:00
|
|
|
if ((ne->ne_name = zutil_strdup(hdl, path)) == NULL) {
|
2018-11-05 19:22:33 +00:00
|
|
|
free(ne);
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
ne->ne_guid = vdev_guid;
|
|
|
|
ne->ne_order = order;
|
|
|
|
ne->ne_num_labels = num_labels;
|
|
|
|
ne->ne_next = pl->names;
|
|
|
|
pl->names = ne;
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we have a valid config but cannot read any of these fields, then
|
|
|
|
* it means we have a half-initialized label. In vdev_label_init()
|
|
|
|
* we write a label with txg == 0 so that we can identify the device
|
|
|
|
* in case the user refers to the same disk later on. If we fail to
|
|
|
|
* create the pool, we'll be left with a label in this state
|
|
|
|
* which should not be considered part of a valid pool.
|
|
|
|
*/
|
|
|
|
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
|
|
|
|
&pool_guid) != 0 ||
|
|
|
|
nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
|
|
|
|
&vdev_guid) != 0 ||
|
|
|
|
nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID,
|
|
|
|
&top_guid) != 0 ||
|
|
|
|
nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
|
|
|
|
&txg) != 0 || txg == 0) {
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* First, see if we know about this pool. If not, then add it to the
|
|
|
|
* list of known pools.
|
|
|
|
*/
|
|
|
|
for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
|
|
|
|
if (pe->pe_guid == pool_guid)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pe == NULL) {
|
2019-10-03 17:20:44 +00:00
|
|
|
if ((pe = zutil_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
|
2018-11-05 19:22:33 +00:00
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
pe->pe_guid = pool_guid;
|
|
|
|
pe->pe_next = pl->pools;
|
|
|
|
pl->pools = pe;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Second, see if we know about this toplevel vdev. Add it if its
|
|
|
|
* missing.
|
|
|
|
*/
|
|
|
|
for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
|
|
|
|
if (ve->ve_guid == top_guid)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ve == NULL) {
|
2019-10-03 17:20:44 +00:00
|
|
|
if ((ve = zutil_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
|
2018-11-05 19:22:33 +00:00
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
ve->ve_guid = top_guid;
|
|
|
|
ve->ve_next = pe->pe_vdevs;
|
|
|
|
pe->pe_vdevs = ve;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Third, see if we have a config with a matching transaction group. If
|
|
|
|
* so, then we do nothing. Otherwise, add it to the list of known
|
|
|
|
* configs.
|
|
|
|
*/
|
|
|
|
for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) {
|
|
|
|
if (ce->ce_txg == txg)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ce == NULL) {
|
2019-10-03 17:20:44 +00:00
|
|
|
if ((ce = zutil_alloc(hdl, sizeof (config_entry_t))) == NULL) {
|
2018-11-05 19:22:33 +00:00
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
ce->ce_txg = txg;
|
|
|
|
ce->ce_config = fnvlist_dup(config);
|
|
|
|
ce->ce_next = ve->ve_configs;
|
|
|
|
ve->ve_configs = ce;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* At this point we've successfully added our config to the list of
|
|
|
|
* known configs. The last thing to do is add the vdev guid -> path
|
|
|
|
* mappings so that we can fix up the configuration as necessary before
|
|
|
|
* doing the import.
|
|
|
|
*/
|
2019-10-03 17:20:44 +00:00
|
|
|
if ((ne = zutil_alloc(hdl, sizeof (name_entry_t))) == NULL)
|
2018-11-05 19:22:33 +00:00
|
|
|
return (-1);
|
|
|
|
|
2019-10-03 17:20:44 +00:00
|
|
|
if ((ne->ne_name = zutil_strdup(hdl, path)) == NULL) {
|
2018-11-05 19:22:33 +00:00
|
|
|
free(ne);
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
ne->ne_guid = vdev_guid;
|
|
|
|
ne->ne_order = order;
|
|
|
|
ne->ne_num_labels = num_labels;
|
|
|
|
ne->ne_next = pl->names;
|
|
|
|
pl->names = ne;
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2019-10-03 17:20:44 +00:00
|
|
|
zutil_pool_active(libpc_handle_t *hdl, const char *name, uint64_t guid,
|
2018-11-05 19:22:33 +00:00
|
|
|
boolean_t *isactive)
|
|
|
|
{
|
|
|
|
ASSERT(hdl->lpc_ops->pco_pool_active != NULL);
|
|
|
|
|
|
|
|
int error = hdl->lpc_ops->pco_pool_active(hdl->lpc_lib_handle, name,
|
|
|
|
guid, isactive);
|
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
static nvlist_t *
|
2019-10-03 17:20:44 +00:00
|
|
|
zutil_refresh_config(libpc_handle_t *hdl, nvlist_t *tryconfig)
|
2018-11-05 19:22:33 +00:00
|
|
|
{
|
|
|
|
ASSERT(hdl->lpc_ops->pco_refresh_config != NULL);
|
|
|
|
|
|
|
|
return (hdl->lpc_ops->pco_refresh_config(hdl->lpc_lib_handle,
|
|
|
|
tryconfig));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Determine if the vdev id is a hole in the namespace.
|
|
|
|
*/
|
|
|
|
static boolean_t
|
|
|
|
vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id)
|
|
|
|
{
|
|
|
|
int c;
|
|
|
|
|
|
|
|
for (c = 0; c < holes; c++) {
|
|
|
|
|
|
|
|
/* Top-level is a hole */
|
|
|
|
if (hole_array[c] == id)
|
|
|
|
return (B_TRUE);
|
|
|
|
}
|
|
|
|
return (B_FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Convert our list of pools into the definitive set of configurations. We
|
|
|
|
* start by picking the best config for each toplevel vdev. Once that's done,
|
|
|
|
* we assemble the toplevel vdevs into a full config for the pool. We make a
|
|
|
|
* pass to fix up any incorrect paths, and then add it to the main list to
|
|
|
|
* return to the user.
|
|
|
|
*/
|
|
|
|
static nvlist_t *
|
|
|
|
get_configs(libpc_handle_t *hdl, pool_list_t *pl, boolean_t active_ok,
|
|
|
|
nvlist_t *policy)
|
|
|
|
{
|
|
|
|
pool_entry_t *pe;
|
|
|
|
vdev_entry_t *ve;
|
|
|
|
config_entry_t *ce;
|
|
|
|
nvlist_t *ret = NULL, *config = NULL, *tmp = NULL, *nvtop, *nvroot;
|
|
|
|
nvlist_t **spares, **l2cache;
|
|
|
|
uint_t i, nspares, nl2cache;
|
|
|
|
boolean_t config_seen;
|
|
|
|
uint64_t best_txg;
|
2023-03-11 18:39:24 +00:00
|
|
|
const char *name, *hostname = NULL;
|
2018-11-05 19:22:33 +00:00
|
|
|
uint64_t guid;
|
|
|
|
uint_t children = 0;
|
|
|
|
nvlist_t **child = NULL;
|
|
|
|
uint64_t *hole_array, max_id;
|
|
|
|
uint_t c;
|
|
|
|
boolean_t isactive;
|
|
|
|
nvlist_t *nvl;
|
|
|
|
boolean_t valid_top_config = B_FALSE;
|
|
|
|
|
|
|
|
if (nvlist_alloc(&ret, 0, 0) != 0)
|
|
|
|
goto nomem;
|
|
|
|
|
|
|
|
for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
|
2022-10-15 22:27:03 +00:00
|
|
|
uint64_t id, max_txg = 0, hostid = 0;
|
|
|
|
uint_t holes = 0;
|
2018-11-05 19:22:33 +00:00
|
|
|
|
|
|
|
if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
|
|
|
|
goto nomem;
|
|
|
|
config_seen = B_FALSE;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Iterate over all toplevel vdevs. Grab the pool configuration
|
|
|
|
* from the first one we find, and then go through the rest and
|
|
|
|
* add them as necessary to the 'vdevs' member of the config.
|
|
|
|
*/
|
|
|
|
for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Determine the best configuration for this vdev by
|
|
|
|
* selecting the config with the latest transaction
|
|
|
|
* group.
|
|
|
|
*/
|
|
|
|
best_txg = 0;
|
|
|
|
for (ce = ve->ve_configs; ce != NULL;
|
|
|
|
ce = ce->ce_next) {
|
|
|
|
|
|
|
|
if (ce->ce_txg > best_txg) {
|
|
|
|
tmp = ce->ce_config;
|
|
|
|
best_txg = ce->ce_txg;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We rely on the fact that the max txg for the
|
|
|
|
* pool will contain the most up-to-date information
|
|
|
|
* about the valid top-levels in the vdev namespace.
|
|
|
|
*/
|
|
|
|
if (best_txg > max_txg) {
|
|
|
|
(void) nvlist_remove(config,
|
|
|
|
ZPOOL_CONFIG_VDEV_CHILDREN,
|
|
|
|
DATA_TYPE_UINT64);
|
|
|
|
(void) nvlist_remove(config,
|
|
|
|
ZPOOL_CONFIG_HOLE_ARRAY,
|
|
|
|
DATA_TYPE_UINT64_ARRAY);
|
|
|
|
|
|
|
|
max_txg = best_txg;
|
|
|
|
hole_array = NULL;
|
|
|
|
holes = 0;
|
|
|
|
max_id = 0;
|
|
|
|
valid_top_config = B_FALSE;
|
|
|
|
|
|
|
|
if (nvlist_lookup_uint64(tmp,
|
|
|
|
ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
|
|
|
|
verify(nvlist_add_uint64(config,
|
|
|
|
ZPOOL_CONFIG_VDEV_CHILDREN,
|
|
|
|
max_id) == 0);
|
|
|
|
valid_top_config = B_TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nvlist_lookup_uint64_array(tmp,
|
|
|
|
ZPOOL_CONFIG_HOLE_ARRAY, &hole_array,
|
|
|
|
&holes) == 0) {
|
|
|
|
verify(nvlist_add_uint64_array(config,
|
|
|
|
ZPOOL_CONFIG_HOLE_ARRAY,
|
|
|
|
hole_array, holes) == 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!config_seen) {
|
|
|
|
/*
|
|
|
|
* Copy the relevant pieces of data to the pool
|
|
|
|
* configuration:
|
|
|
|
*
|
|
|
|
* version
|
|
|
|
* pool guid
|
|
|
|
* name
|
|
|
|
* comment (if available)
|
2021-02-18 05:30:45 +00:00
|
|
|
* compatibility features (if available)
|
2018-11-05 19:22:33 +00:00
|
|
|
* pool state
|
|
|
|
* hostid (if available)
|
|
|
|
* hostname (if available)
|
|
|
|
*/
|
|
|
|
uint64_t state, version;
|
2023-03-11 18:39:24 +00:00
|
|
|
const char *comment = NULL;
|
|
|
|
const char *compatibility = NULL;
|
2018-11-05 19:22:33 +00:00
|
|
|
|
|
|
|
version = fnvlist_lookup_uint64(tmp,
|
|
|
|
ZPOOL_CONFIG_VERSION);
|
|
|
|
fnvlist_add_uint64(config,
|
|
|
|
ZPOOL_CONFIG_VERSION, version);
|
|
|
|
guid = fnvlist_lookup_uint64(tmp,
|
|
|
|
ZPOOL_CONFIG_POOL_GUID);
|
|
|
|
fnvlist_add_uint64(config,
|
|
|
|
ZPOOL_CONFIG_POOL_GUID, guid);
|
|
|
|
name = fnvlist_lookup_string(tmp,
|
|
|
|
ZPOOL_CONFIG_POOL_NAME);
|
|
|
|
fnvlist_add_string(config,
|
|
|
|
ZPOOL_CONFIG_POOL_NAME, name);
|
|
|
|
|
|
|
|
if (nvlist_lookup_string(tmp,
|
|
|
|
ZPOOL_CONFIG_COMMENT, &comment) == 0)
|
|
|
|
fnvlist_add_string(config,
|
|
|
|
ZPOOL_CONFIG_COMMENT, comment);
|
|
|
|
|
2021-02-18 05:30:45 +00:00
|
|
|
if (nvlist_lookup_string(tmp,
|
|
|
|
ZPOOL_CONFIG_COMPATIBILITY,
|
|
|
|
&compatibility) == 0)
|
|
|
|
fnvlist_add_string(config,
|
|
|
|
ZPOOL_CONFIG_COMPATIBILITY,
|
|
|
|
compatibility);
|
|
|
|
|
2018-11-05 19:22:33 +00:00
|
|
|
state = fnvlist_lookup_uint64(tmp,
|
|
|
|
ZPOOL_CONFIG_POOL_STATE);
|
|
|
|
fnvlist_add_uint64(config,
|
|
|
|
ZPOOL_CONFIG_POOL_STATE, state);
|
|
|
|
|
|
|
|
hostid = 0;
|
|
|
|
if (nvlist_lookup_uint64(tmp,
|
|
|
|
ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
|
|
|
|
fnvlist_add_uint64(config,
|
|
|
|
ZPOOL_CONFIG_HOSTID, hostid);
|
|
|
|
hostname = fnvlist_lookup_string(tmp,
|
|
|
|
ZPOOL_CONFIG_HOSTNAME);
|
|
|
|
fnvlist_add_string(config,
|
|
|
|
ZPOOL_CONFIG_HOSTNAME, hostname);
|
|
|
|
}
|
|
|
|
|
|
|
|
config_seen = B_TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add this top-level vdev to the child array.
|
|
|
|
*/
|
|
|
|
verify(nvlist_lookup_nvlist(tmp,
|
|
|
|
ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
|
|
|
|
verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
|
|
|
|
&id) == 0);
|
|
|
|
|
|
|
|
if (id >= children) {
|
|
|
|
nvlist_t **newchild;
|
|
|
|
|
2019-10-03 17:20:44 +00:00
|
|
|
newchild = zutil_alloc(hdl, (id + 1) *
|
2018-11-05 19:22:33 +00:00
|
|
|
sizeof (nvlist_t *));
|
|
|
|
if (newchild == NULL)
|
|
|
|
goto nomem;
|
|
|
|
|
|
|
|
for (c = 0; c < children; c++)
|
|
|
|
newchild[c] = child[c];
|
|
|
|
|
|
|
|
free(child);
|
|
|
|
child = newchild;
|
|
|
|
children = id + 1;
|
|
|
|
}
|
|
|
|
if (nvlist_dup(nvtop, &child[id], 0) != 0)
|
|
|
|
goto nomem;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we have information about all the top-levels then
|
|
|
|
* clean up the nvlist which we've constructed. This
|
|
|
|
* means removing any extraneous devices that are
|
|
|
|
* beyond the valid range or adding devices to the end
|
|
|
|
* of our array which appear to be missing.
|
|
|
|
*/
|
|
|
|
if (valid_top_config) {
|
|
|
|
if (max_id < children) {
|
|
|
|
for (c = max_id; c < children; c++)
|
|
|
|
nvlist_free(child[c]);
|
|
|
|
children = max_id;
|
|
|
|
} else if (max_id > children) {
|
|
|
|
nvlist_t **newchild;
|
|
|
|
|
2019-10-03 17:20:44 +00:00
|
|
|
newchild = zutil_alloc(hdl, (max_id) *
|
2018-11-05 19:22:33 +00:00
|
|
|
sizeof (nvlist_t *));
|
|
|
|
if (newchild == NULL)
|
|
|
|
goto nomem;
|
|
|
|
|
|
|
|
for (c = 0; c < children; c++)
|
|
|
|
newchild[c] = child[c];
|
|
|
|
|
|
|
|
free(child);
|
|
|
|
child = newchild;
|
|
|
|
children = max_id;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
|
|
|
|
&guid) == 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The vdev namespace may contain holes as a result of
|
|
|
|
* device removal. We must add them back into the vdev
|
|
|
|
* tree before we process any missing devices.
|
|
|
|
*/
|
|
|
|
if (holes > 0) {
|
|
|
|
ASSERT(valid_top_config);
|
|
|
|
|
|
|
|
for (c = 0; c < children; c++) {
|
|
|
|
nvlist_t *holey;
|
|
|
|
|
|
|
|
if (child[c] != NULL ||
|
|
|
|
!vdev_is_hole(hole_array, holes, c))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (nvlist_alloc(&holey, NV_UNIQUE_NAME,
|
|
|
|
0) != 0)
|
|
|
|
goto nomem;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Holes in the namespace are treated as
|
|
|
|
* "hole" top-level vdevs and have a
|
|
|
|
* special flag set on them.
|
|
|
|
*/
|
|
|
|
if (nvlist_add_string(holey,
|
|
|
|
ZPOOL_CONFIG_TYPE,
|
|
|
|
VDEV_TYPE_HOLE) != 0 ||
|
|
|
|
nvlist_add_uint64(holey,
|
|
|
|
ZPOOL_CONFIG_ID, c) != 0 ||
|
|
|
|
nvlist_add_uint64(holey,
|
|
|
|
ZPOOL_CONFIG_GUID, 0ULL) != 0) {
|
|
|
|
nvlist_free(holey);
|
|
|
|
goto nomem;
|
|
|
|
}
|
|
|
|
child[c] = holey;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Look for any missing top-level vdevs. If this is the case,
|
|
|
|
* create a faked up 'missing' vdev as a placeholder. We cannot
|
|
|
|
* simply compress the child array, because the kernel performs
|
|
|
|
* certain checks to make sure the vdev IDs match their location
|
|
|
|
* in the configuration.
|
|
|
|
*/
|
|
|
|
for (c = 0; c < children; c++) {
|
|
|
|
if (child[c] == NULL) {
|
|
|
|
nvlist_t *missing;
|
|
|
|
if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
|
|
|
|
0) != 0)
|
|
|
|
goto nomem;
|
|
|
|
if (nvlist_add_string(missing,
|
|
|
|
ZPOOL_CONFIG_TYPE,
|
|
|
|
VDEV_TYPE_MISSING) != 0 ||
|
|
|
|
nvlist_add_uint64(missing,
|
|
|
|
ZPOOL_CONFIG_ID, c) != 0 ||
|
|
|
|
nvlist_add_uint64(missing,
|
|
|
|
ZPOOL_CONFIG_GUID, 0ULL) != 0) {
|
|
|
|
nvlist_free(missing);
|
|
|
|
goto nomem;
|
|
|
|
}
|
|
|
|
child[c] = missing;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Put all of this pool's top-level vdevs into a root vdev.
|
|
|
|
*/
|
|
|
|
if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
|
|
|
|
goto nomem;
|
|
|
|
if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
|
|
|
|
VDEV_TYPE_ROOT) != 0 ||
|
|
|
|
nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
|
|
|
|
nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
|
|
|
|
nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
|
2021-12-07 01:19:13 +00:00
|
|
|
(const nvlist_t **)child, children) != 0) {
|
2018-11-05 19:22:33 +00:00
|
|
|
nvlist_free(nvroot);
|
|
|
|
goto nomem;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (c = 0; c < children; c++)
|
|
|
|
nvlist_free(child[c]);
|
|
|
|
free(child);
|
|
|
|
children = 0;
|
|
|
|
child = NULL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Go through and fix up any paths and/or devids based on our
|
|
|
|
* known list of vdev GUID -> path mappings.
|
|
|
|
*/
|
|
|
|
if (fix_paths(hdl, nvroot, pl->names) != 0) {
|
|
|
|
nvlist_free(nvroot);
|
|
|
|
goto nomem;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add the root vdev to this pool's configuration.
|
|
|
|
*/
|
|
|
|
if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
|
|
|
|
nvroot) != 0) {
|
|
|
|
nvlist_free(nvroot);
|
|
|
|
goto nomem;
|
|
|
|
}
|
|
|
|
nvlist_free(nvroot);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* zdb uses this path to report on active pools that were
|
|
|
|
* imported or created using -R.
|
|
|
|
*/
|
|
|
|
if (active_ok)
|
|
|
|
goto add_pool;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Determine if this pool is currently active, in which case we
|
|
|
|
* can't actually import it.
|
|
|
|
*/
|
|
|
|
verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
|
|
|
|
&name) == 0);
|
|
|
|
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
|
|
|
|
&guid) == 0);
|
|
|
|
|
2019-10-03 17:20:44 +00:00
|
|
|
if (zutil_pool_active(hdl, name, guid, &isactive) != 0)
|
2018-11-05 19:22:33 +00:00
|
|
|
goto error;
|
|
|
|
|
|
|
|
if (isactive) {
|
|
|
|
nvlist_free(config);
|
|
|
|
config = NULL;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (policy != NULL) {
|
|
|
|
if (nvlist_add_nvlist(config, ZPOOL_LOAD_POLICY,
|
|
|
|
policy) != 0)
|
|
|
|
goto nomem;
|
|
|
|
}
|
|
|
|
|
2019-10-03 17:20:44 +00:00
|
|
|
if ((nvl = zutil_refresh_config(hdl, config)) == NULL) {
|
2018-11-05 19:22:33 +00:00
|
|
|
nvlist_free(config);
|
|
|
|
config = NULL;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
nvlist_free(config);
|
|
|
|
config = nvl;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Go through and update the paths for spares, now that we have
|
|
|
|
* them.
|
|
|
|
*/
|
|
|
|
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
|
|
|
|
&nvroot) == 0);
|
|
|
|
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
|
|
|
|
&spares, &nspares) == 0) {
|
|
|
|
for (i = 0; i < nspares; i++) {
|
|
|
|
if (fix_paths(hdl, spares[i], pl->names) != 0)
|
|
|
|
goto nomem;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update the paths for l2cache devices.
|
|
|
|
*/
|
|
|
|
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
|
|
|
|
&l2cache, &nl2cache) == 0) {
|
|
|
|
for (i = 0; i < nl2cache; i++) {
|
|
|
|
if (fix_paths(hdl, l2cache[i], pl->names) != 0)
|
|
|
|
goto nomem;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Restore the original information read from the actual label.
|
|
|
|
*/
|
|
|
|
(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
|
|
|
|
DATA_TYPE_UINT64);
|
|
|
|
(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME,
|
|
|
|
DATA_TYPE_STRING);
|
|
|
|
if (hostid != 0) {
|
|
|
|
verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
|
|
|
|
hostid) == 0);
|
|
|
|
verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
|
|
|
|
hostname) == 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
add_pool:
|
|
|
|
/*
|
|
|
|
* Add this pool to the list of configs.
|
|
|
|
*/
|
|
|
|
verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
|
|
|
|
&name) == 0);
|
|
|
|
|
|
|
|
if (nvlist_add_nvlist(ret, name, config) != 0)
|
|
|
|
goto nomem;
|
|
|
|
|
|
|
|
nvlist_free(config);
|
|
|
|
config = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (ret);
|
|
|
|
|
|
|
|
nomem:
|
2019-10-03 17:20:44 +00:00
|
|
|
(void) zutil_no_memory(hdl);
|
2018-11-05 19:22:33 +00:00
|
|
|
error:
|
|
|
|
nvlist_free(config);
|
|
|
|
nvlist_free(ret);
|
|
|
|
for (c = 0; c < children; c++)
|
|
|
|
nvlist_free(child[c]);
|
|
|
|
free(child);
|
|
|
|
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return the offset of the given label.
|
|
|
|
*/
|
|
|
|
static uint64_t
|
|
|
|
label_offset(uint64_t size, int l)
|
|
|
|
{
|
|
|
|
ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0);
|
|
|
|
return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
|
|
|
|
0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
|
|
|
|
}
|
|
|
|
|
2021-05-27 05:07:31 +00:00
|
|
|
/*
|
|
|
|
* The same description applies as to zpool_read_label below,
|
|
|
|
* except here we do it without aio, presumably because an aio call
|
|
|
|
* errored out in a way we think not using it could circumvent.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
zpool_read_label_slow(int fd, nvlist_t **config, int *num_labels)
|
|
|
|
{
|
|
|
|
struct stat64 statbuf;
|
|
|
|
int l, count = 0;
|
|
|
|
vdev_phys_t *label;
|
|
|
|
nvlist_t *expected_config = NULL;
|
|
|
|
uint64_t expected_guid = 0, size;
|
|
|
|
|
|
|
|
*config = NULL;
|
|
|
|
|
|
|
|
if (fstat64_blk(fd, &statbuf) == -1)
|
|
|
|
return (0);
|
|
|
|
size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
|
|
|
|
|
2022-10-26 22:08:31 +00:00
|
|
|
label = (vdev_phys_t *)umem_alloc_aligned(sizeof (*label), PAGESIZE,
|
|
|
|
UMEM_DEFAULT);
|
|
|
|
if (label == NULL)
|
2021-05-27 05:07:31 +00:00
|
|
|
return (-1);
|
|
|
|
|
|
|
|
for (l = 0; l < VDEV_LABELS; l++) {
|
|
|
|
uint64_t state, guid, txg;
|
|
|
|
off_t offset = label_offset(size, l) + VDEV_SKIP_SIZE;
|
|
|
|
|
|
|
|
if (pread64(fd, label, sizeof (vdev_phys_t),
|
|
|
|
offset) != sizeof (vdev_phys_t))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (nvlist_unpack(label->vp_nvlist,
|
|
|
|
sizeof (label->vp_nvlist), config, 0) != 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_GUID,
|
|
|
|
&guid) != 0 || guid == 0) {
|
|
|
|
nvlist_free(*config);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
|
|
|
|
&state) != 0 || state > POOL_STATE_L2CACHE) {
|
|
|
|
nvlist_free(*config);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
|
|
|
|
(nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
|
|
|
|
&txg) != 0 || txg == 0)) {
|
|
|
|
nvlist_free(*config);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (expected_guid) {
|
|
|
|
if (expected_guid == guid)
|
|
|
|
count++;
|
|
|
|
|
|
|
|
nvlist_free(*config);
|
|
|
|
} else {
|
|
|
|
expected_config = *config;
|
|
|
|
expected_guid = guid;
|
|
|
|
count++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (num_labels != NULL)
|
|
|
|
*num_labels = count;
|
|
|
|
|
2022-10-26 22:08:31 +00:00
|
|
|
umem_free_aligned(label, sizeof (*label));
|
2021-05-27 05:07:31 +00:00
|
|
|
*config = expected_config;
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2018-11-05 19:22:33 +00:00
|
|
|
/*
|
|
|
|
* Given a file descriptor, read the label information and return an nvlist
|
|
|
|
* describing the configuration, if there is one. The number of valid
|
|
|
|
* labels found will be returned in num_labels when non-NULL.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
zpool_read_label(int fd, nvlist_t **config, int *num_labels)
|
|
|
|
{
|
2022-03-28 17:24:22 +00:00
|
|
|
#ifndef HAVE_AIO_H
|
|
|
|
return (zpool_read_label_slow(fd, config, num_labels));
|
|
|
|
#else
|
2018-11-05 19:22:33 +00:00
|
|
|
struct stat64 statbuf;
|
2021-01-13 17:00:12 +00:00
|
|
|
struct aiocb aiocbs[VDEV_LABELS];
|
|
|
|
struct aiocb *aiocbps[VDEV_LABELS];
|
|
|
|
vdev_phys_t *labels;
|
2018-11-05 19:22:33 +00:00
|
|
|
nvlist_t *expected_config = NULL;
|
|
|
|
uint64_t expected_guid = 0, size;
|
2021-01-13 17:00:12 +00:00
|
|
|
int error, l, count = 0;
|
2018-11-05 19:22:33 +00:00
|
|
|
|
|
|
|
*config = NULL;
|
|
|
|
|
|
|
|
if (fstat64_blk(fd, &statbuf) == -1)
|
|
|
|
return (0);
|
|
|
|
size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
|
|
|
|
|
2022-10-26 22:08:31 +00:00
|
|
|
labels = (vdev_phys_t *)umem_alloc_aligned(
|
|
|
|
VDEV_LABELS * sizeof (*labels), PAGESIZE, UMEM_DEFAULT);
|
|
|
|
if (labels == NULL)
|
2018-11-05 19:22:33 +00:00
|
|
|
return (-1);
|
|
|
|
|
2021-01-13 17:00:12 +00:00
|
|
|
memset(aiocbs, 0, sizeof (aiocbs));
|
2018-11-05 19:22:33 +00:00
|
|
|
for (l = 0; l < VDEV_LABELS; l++) {
|
2021-01-13 16:30:48 +00:00
|
|
|
off_t offset = label_offset(size, l) + VDEV_SKIP_SIZE;
|
2018-11-05 19:22:33 +00:00
|
|
|
|
2021-01-13 17:00:12 +00:00
|
|
|
aiocbs[l].aio_fildes = fd;
|
|
|
|
aiocbs[l].aio_offset = offset;
|
|
|
|
aiocbs[l].aio_buf = &labels[l];
|
|
|
|
aiocbs[l].aio_nbytes = sizeof (vdev_phys_t);
|
|
|
|
aiocbs[l].aio_lio_opcode = LIO_READ;
|
|
|
|
aiocbps[l] = &aiocbs[l];
|
|
|
|
}
|
|
|
|
|
|
|
|
if (lio_listio(LIO_WAIT, aiocbps, VDEV_LABELS, NULL) != 0) {
|
|
|
|
int saved_errno = errno;
|
2021-05-27 05:07:31 +00:00
|
|
|
boolean_t do_slow = B_FALSE;
|
|
|
|
error = -1;
|
2021-01-13 17:00:12 +00:00
|
|
|
|
|
|
|
if (errno == EAGAIN || errno == EINTR || errno == EIO) {
|
|
|
|
/*
|
|
|
|
* A portion of the requests may have been submitted.
|
|
|
|
* Clean them up.
|
|
|
|
*/
|
|
|
|
for (l = 0; l < VDEV_LABELS; l++) {
|
|
|
|
errno = 0;
|
2021-05-27 05:07:31 +00:00
|
|
|
switch (aio_error(&aiocbs[l])) {
|
|
|
|
case EINVAL:
|
|
|
|
break;
|
|
|
|
case EINPROGRESS:
|
2024-01-12 20:35:29 +00:00
|
|
|
/*
|
|
|
|
* This shouldn't be possible to
|
|
|
|
* encounter, die if we do.
|
|
|
|
*/
|
2021-05-27 05:07:31 +00:00
|
|
|
ASSERT(B_FALSE);
|
2022-02-15 16:58:59 +00:00
|
|
|
zfs_fallthrough;
|
2024-01-12 20:35:29 +00:00
|
|
|
case EREMOTEIO:
|
|
|
|
/*
|
|
|
|
* May be returned by an NVMe device
|
|
|
|
* which is visible in /dev/ but due
|
|
|
|
* to a low-level format change, or
|
|
|
|
* other error, needs to be rescanned.
|
|
|
|
* Try the slow method.
|
|
|
|
*/
|
|
|
|
zfs_fallthrough;
|
2021-05-27 05:07:31 +00:00
|
|
|
case EOPNOTSUPP:
|
|
|
|
case ENOSYS:
|
|
|
|
do_slow = B_TRUE;
|
2022-02-15 16:58:59 +00:00
|
|
|
zfs_fallthrough;
|
2021-05-27 05:07:31 +00:00
|
|
|
case 0:
|
|
|
|
default:
|
2021-01-13 17:00:12 +00:00
|
|
|
(void) aio_return(&aiocbs[l]);
|
2021-05-27 05:07:31 +00:00
|
|
|
}
|
2021-01-13 17:00:12 +00:00
|
|
|
}
|
|
|
|
}
|
2021-05-27 05:07:31 +00:00
|
|
|
if (do_slow) {
|
|
|
|
/*
|
|
|
|
* At least some IO involved access unsafe-for-AIO
|
|
|
|
* files. Let's try again, without AIO this time.
|
|
|
|
*/
|
|
|
|
error = zpool_read_label_slow(fd, config, num_labels);
|
|
|
|
saved_errno = errno;
|
|
|
|
}
|
2022-10-26 22:08:31 +00:00
|
|
|
umem_free_aligned(labels, VDEV_LABELS * sizeof (*labels));
|
2021-01-13 17:00:12 +00:00
|
|
|
errno = saved_errno;
|
2021-05-27 05:07:31 +00:00
|
|
|
return (error);
|
2021-01-13 17:00:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for (l = 0; l < VDEV_LABELS; l++) {
|
|
|
|
uint64_t state, guid, txg;
|
|
|
|
|
|
|
|
if (aio_return(&aiocbs[l]) != sizeof (vdev_phys_t))
|
2018-11-05 19:22:33 +00:00
|
|
|
continue;
|
|
|
|
|
2021-01-13 17:00:12 +00:00
|
|
|
if (nvlist_unpack(labels[l].vp_nvlist,
|
|
|
|
sizeof (labels[l].vp_nvlist), config, 0) != 0)
|
2018-11-05 19:22:33 +00:00
|
|
|
continue;
|
|
|
|
|
|
|
|
if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_GUID,
|
|
|
|
&guid) != 0 || guid == 0) {
|
|
|
|
nvlist_free(*config);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
|
|
|
|
&state) != 0 || state > POOL_STATE_L2CACHE) {
|
|
|
|
nvlist_free(*config);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
|
|
|
|
(nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
|
|
|
|
&txg) != 0 || txg == 0)) {
|
|
|
|
nvlist_free(*config);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (expected_guid) {
|
|
|
|
if (expected_guid == guid)
|
|
|
|
count++;
|
|
|
|
|
|
|
|
nvlist_free(*config);
|
|
|
|
} else {
|
|
|
|
expected_config = *config;
|
|
|
|
expected_guid = guid;
|
|
|
|
count++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (num_labels != NULL)
|
|
|
|
*num_labels = count;
|
|
|
|
|
2022-10-26 22:08:31 +00:00
|
|
|
umem_free_aligned(labels, VDEV_LABELS * sizeof (*labels));
|
2018-11-05 19:22:33 +00:00
|
|
|
*config = expected_config;
|
|
|
|
|
|
|
|
return (0);
|
2022-03-28 17:24:22 +00:00
|
|
|
#endif
|
2018-11-05 19:22:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2019-02-26 19:13:15 +00:00
|
|
|
* Sorted by full path and then vdev guid to allow for multiple entries with
|
2018-11-05 19:22:33 +00:00
|
|
|
* the same full path name. This is required because it's possible to
|
|
|
|
* have multiple block devices with labels that refer to the same
|
|
|
|
* ZPOOL_CONFIG_PATH yet have different vdev guids. In this case both
|
|
|
|
* entries need to be added to the cache. Scenarios where this can occur
|
|
|
|
* include overwritten pool labels, devices which are visible from multiple
|
|
|
|
* hosts and multipath devices.
|
|
|
|
*/
|
2019-10-03 17:20:44 +00:00
|
|
|
int
|
2018-11-05 19:22:33 +00:00
|
|
|
slice_cache_compare(const void *arg1, const void *arg2)
|
|
|
|
{
|
|
|
|
const char *nm1 = ((rdsk_node_t *)arg1)->rn_name;
|
|
|
|
const char *nm2 = ((rdsk_node_t *)arg2)->rn_name;
|
|
|
|
uint64_t guid1 = ((rdsk_node_t *)arg1)->rn_vdev_guid;
|
|
|
|
uint64_t guid2 = ((rdsk_node_t *)arg2)->rn_vdev_guid;
|
|
|
|
int rv;
|
|
|
|
|
Reduce loaded range tree memory usage
This patch implements a new tree structure for ZFS, and uses it to
store range trees more efficiently.
The new structure is approximately a B-tree, though there are some
small differences from the usual characterizations. The tree has core
nodes and leaf nodes; each contain data elements, which the elements
in the core nodes acting as separators between its children. The
difference between core and leaf nodes is that the core nodes have an
array of children, while leaf nodes don't. Every node in the tree may
be only partially full; in most cases, they are all at least 50% full
(in terms of element count) except for the root node, which can be
less full. Underfull nodes will steal from their neighbors or merge to
remain full enough, while overfull nodes will split in two. The data
elements are contained in tree-controlled buffers; they are copied
into these on insertion, and overwritten on deletion. This means that
the elements are not independently allocated, which reduces overhead,
but also means they can't be shared between trees (and also that
pointers to them are only valid until a side-effectful tree operation
occurs). The overhead varies based on how dense the tree is, but is
usually on the order of about 50% of the element size; the per-node
overheads are very small, and so don't make a significant difference.
The trees can accept arbitrary records; they accept a size and a
comparator to allow them to be used for a variety of purposes.
The new trees replace the AVL trees used in the range trees today.
Currently, the range_seg_t structure contains three 8 byte integers
of payload and two 24 byte avl_tree_node_ts to handle its storage in
both an offset-sorted tree and a size-sorted tree (total size: 64
bytes). In the new model, the range seg structures are usually two 4
byte integers, but a separate one needs to exist for the size-sorted
and offset-sorted tree. Between the raw size, the 50% overhead, and
the double storage, the new btrees are expected to use 8*1.5*2 = 24
bytes per record, or 33.3% as much memory as the AVL trees (this is
for the purposes of storing metaslab range trees; for other purposes,
like scrubs, they use ~50% as much memory).
We reduced the size of the payload in the range segments by teaching
range trees about starting offsets and shifts; since metaslabs have a
fixed starting offset, and they all operate in terms of disk sectors,
we can store the ranges using 4-byte integers as long as the size of
the metaslab divided by the sector size is less than 2^32. For 512-byte
sectors, this is a 2^41 (or 2TB) metaslab, which with the default
settings corresponds to a 256PB disk. 4k sector disks can handle
metaslabs up to 2^46 bytes, or 2^63 byte disks. Since we do not
anticipate disks of this size in the near future, there should be
almost no cases where metaslabs need 64-byte integers to store their
ranges. We do still have the capability to store 64-byte integer ranges
to account for cases where we are storing per-vdev (or per-dnode) trees,
which could reasonably go above the limits discussed. We also do not
store fill information in the compact version of the node, since it
is only used for sorted scrub.
We also optimized the metaslab loading process in various other ways
to offset some inefficiencies in the btree model. While individual
operations (find, insert, remove_from) are faster for the btree than
they are for the avl tree, remove usually requires a find operation,
while in the AVL tree model the element itself suffices. Some clever
changes actually caused an overall speedup in metaslab loading; we use
approximately 40% less cpu to load metaslabs in our tests on Illumos.
Another memory and performance optimization was achieved by changing
what is stored in the size-sorted trees. When a disk is heavily
fragmented, the df algorithm used by default in ZFS will almost always
find a number of small regions in its initial cursor-based search; it
will usually only fall back to the size-sorted tree to find larger
regions. If we increase the size of the cursor-based search slightly,
and don't store segments that are smaller than a tunable size floor
in the size-sorted tree, we can further cut memory usage down to
below 20% of what the AVL trees store. This also results in further
reductions in CPU time spent loading metaslabs.
The 16KiB size floor was chosen because it results in substantial memory
usage reduction while not usually resulting in situations where we can't
find an appropriate chunk with the cursor and are forced to use an
oversized chunk from the size-sorted tree. In addition, even if we do
have to use an oversized chunk from the size-sorted tree, the chunk
would be too small to use for ZIL allocations, so it isn't as big of a
loss as it might otherwise be. And often, more small allocations will
follow the initial one, and the cursor search will now find the
remainder of the chunk we didn't use all of and use it for subsequent
allocations. Practical testing has shown little or no change in
fragmentation as a result of this change.
If the size-sorted tree becomes empty while the offset sorted one still
has entries, it will load all the entries from the offset sorted tree
and disregard the size floor until it is unloaded again. This operation
occurs rarely with the default setting, only on incredibly thoroughly
fragmented pools.
There are some other small changes to zdb to teach it to handle btrees,
but nothing major.
Reviewed-by: George Wilson <gwilson@delphix.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed by: Sebastien Roy seb@delphix.com
Reviewed-by: Igor Kozhukhov <igor@dilos.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #9181
2019-10-09 17:36:03 +00:00
|
|
|
rv = TREE_ISIGN(strcmp(nm1, nm2));
|
2018-11-05 19:22:33 +00:00
|
|
|
if (rv)
|
|
|
|
return (rv);
|
|
|
|
|
Reduce loaded range tree memory usage
This patch implements a new tree structure for ZFS, and uses it to
store range trees more efficiently.
The new structure is approximately a B-tree, though there are some
small differences from the usual characterizations. The tree has core
nodes and leaf nodes; each contain data elements, which the elements
in the core nodes acting as separators between its children. The
difference between core and leaf nodes is that the core nodes have an
array of children, while leaf nodes don't. Every node in the tree may
be only partially full; in most cases, they are all at least 50% full
(in terms of element count) except for the root node, which can be
less full. Underfull nodes will steal from their neighbors or merge to
remain full enough, while overfull nodes will split in two. The data
elements are contained in tree-controlled buffers; they are copied
into these on insertion, and overwritten on deletion. This means that
the elements are not independently allocated, which reduces overhead,
but also means they can't be shared between trees (and also that
pointers to them are only valid until a side-effectful tree operation
occurs). The overhead varies based on how dense the tree is, but is
usually on the order of about 50% of the element size; the per-node
overheads are very small, and so don't make a significant difference.
The trees can accept arbitrary records; they accept a size and a
comparator to allow them to be used for a variety of purposes.
The new trees replace the AVL trees used in the range trees today.
Currently, the range_seg_t structure contains three 8 byte integers
of payload and two 24 byte avl_tree_node_ts to handle its storage in
both an offset-sorted tree and a size-sorted tree (total size: 64
bytes). In the new model, the range seg structures are usually two 4
byte integers, but a separate one needs to exist for the size-sorted
and offset-sorted tree. Between the raw size, the 50% overhead, and
the double storage, the new btrees are expected to use 8*1.5*2 = 24
bytes per record, or 33.3% as much memory as the AVL trees (this is
for the purposes of storing metaslab range trees; for other purposes,
like scrubs, they use ~50% as much memory).
We reduced the size of the payload in the range segments by teaching
range trees about starting offsets and shifts; since metaslabs have a
fixed starting offset, and they all operate in terms of disk sectors,
we can store the ranges using 4-byte integers as long as the size of
the metaslab divided by the sector size is less than 2^32. For 512-byte
sectors, this is a 2^41 (or 2TB) metaslab, which with the default
settings corresponds to a 256PB disk. 4k sector disks can handle
metaslabs up to 2^46 bytes, or 2^63 byte disks. Since we do not
anticipate disks of this size in the near future, there should be
almost no cases where metaslabs need 64-byte integers to store their
ranges. We do still have the capability to store 64-byte integer ranges
to account for cases where we are storing per-vdev (or per-dnode) trees,
which could reasonably go above the limits discussed. We also do not
store fill information in the compact version of the node, since it
is only used for sorted scrub.
We also optimized the metaslab loading process in various other ways
to offset some inefficiencies in the btree model. While individual
operations (find, insert, remove_from) are faster for the btree than
they are for the avl tree, remove usually requires a find operation,
while in the AVL tree model the element itself suffices. Some clever
changes actually caused an overall speedup in metaslab loading; we use
approximately 40% less cpu to load metaslabs in our tests on Illumos.
Another memory and performance optimization was achieved by changing
what is stored in the size-sorted trees. When a disk is heavily
fragmented, the df algorithm used by default in ZFS will almost always
find a number of small regions in its initial cursor-based search; it
will usually only fall back to the size-sorted tree to find larger
regions. If we increase the size of the cursor-based search slightly,
and don't store segments that are smaller than a tunable size floor
in the size-sorted tree, we can further cut memory usage down to
below 20% of what the AVL trees store. This also results in further
reductions in CPU time spent loading metaslabs.
The 16KiB size floor was chosen because it results in substantial memory
usage reduction while not usually resulting in situations where we can't
find an appropriate chunk with the cursor and are forced to use an
oversized chunk from the size-sorted tree. In addition, even if we do
have to use an oversized chunk from the size-sorted tree, the chunk
would be too small to use for ZIL allocations, so it isn't as big of a
loss as it might otherwise be. And often, more small allocations will
follow the initial one, and the cursor search will now find the
remainder of the chunk we didn't use all of and use it for subsequent
allocations. Practical testing has shown little or no change in
fragmentation as a result of this change.
If the size-sorted tree becomes empty while the offset sorted one still
has entries, it will load all the entries from the offset sorted tree
and disregard the size floor until it is unloaded again. This operation
occurs rarely with the default setting, only on incredibly thoroughly
fragmented pools.
There are some other small changes to zdb to teach it to handle btrees,
but nothing major.
Reviewed-by: George Wilson <gwilson@delphix.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed by: Sebastien Roy seb@delphix.com
Reviewed-by: Igor Kozhukhov <igor@dilos.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #9181
2019-10-09 17:36:03 +00:00
|
|
|
return (TREE_CMP(guid1, guid2));
|
2018-11-05 19:22:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
label_paths_impl(libpc_handle_t *hdl, nvlist_t *nvroot, uint64_t pool_guid,
|
2023-03-11 18:39:24 +00:00
|
|
|
uint64_t vdev_guid, const char **path, const char **devid)
|
2018-11-05 19:22:33 +00:00
|
|
|
{
|
|
|
|
nvlist_t **child;
|
|
|
|
uint_t c, children;
|
|
|
|
uint64_t guid;
|
2023-03-11 18:39:24 +00:00
|
|
|
const char *val;
|
2018-11-05 19:22:33 +00:00
|
|
|
int error;
|
|
|
|
|
|
|
|
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
|
|
|
|
&child, &children) == 0) {
|
|
|
|
for (c = 0; c < children; c++) {
|
|
|
|
error = label_paths_impl(hdl, child[c],
|
|
|
|
pool_guid, vdev_guid, path, devid);
|
|
|
|
if (error)
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nvroot == NULL)
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
error = nvlist_lookup_uint64(nvroot, ZPOOL_CONFIG_GUID, &guid);
|
|
|
|
if ((error != 0) || (guid != vdev_guid))
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
error = nvlist_lookup_string(nvroot, ZPOOL_CONFIG_PATH, &val);
|
|
|
|
if (error == 0)
|
|
|
|
*path = val;
|
|
|
|
|
|
|
|
error = nvlist_lookup_string(nvroot, ZPOOL_CONFIG_DEVID, &val);
|
|
|
|
if (error == 0)
|
|
|
|
*devid = val;
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Given a disk label fetch the ZPOOL_CONFIG_PATH and ZPOOL_CONFIG_DEVID
|
|
|
|
* and store these strings as config_path and devid_path respectively.
|
|
|
|
* The returned pointers are only valid as long as label remains valid.
|
|
|
|
*/
|
2019-10-03 17:20:44 +00:00
|
|
|
int
|
2023-03-11 18:39:24 +00:00
|
|
|
label_paths(libpc_handle_t *hdl, nvlist_t *label, const char **path,
|
|
|
|
const char **devid)
|
2018-11-05 19:22:33 +00:00
|
|
|
{
|
|
|
|
nvlist_t *nvroot;
|
|
|
|
uint64_t pool_guid;
|
|
|
|
uint64_t vdev_guid;
|
2024-01-04 14:35:04 +00:00
|
|
|
uint64_t state;
|
2018-11-05 19:22:33 +00:00
|
|
|
|
|
|
|
*path = NULL;
|
|
|
|
*devid = NULL;
|
2024-01-04 14:35:04 +00:00
|
|
|
if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &vdev_guid) != 0)
|
|
|
|
return (ENOENT);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In case of spare or l2cache, we directly return path/devid from the
|
|
|
|
* label.
|
|
|
|
*/
|
|
|
|
if (!(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, &state)) &&
|
|
|
|
(state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE)) {
|
|
|
|
(void) nvlist_lookup_string(label, ZPOOL_CONFIG_PATH, path);
|
|
|
|
(void) nvlist_lookup_string(label, ZPOOL_CONFIG_DEVID, devid);
|
|
|
|
return (0);
|
|
|
|
}
|
2018-11-05 19:22:33 +00:00
|
|
|
|
|
|
|
if (nvlist_lookup_nvlist(label, ZPOOL_CONFIG_VDEV_TREE, &nvroot) ||
|
2024-01-04 14:35:04 +00:00
|
|
|
nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, &pool_guid))
|
2018-11-05 19:22:33 +00:00
|
|
|
return (ENOENT);
|
|
|
|
|
|
|
|
return (label_paths_impl(hdl, nvroot, pool_guid, vdev_guid, path,
|
|
|
|
devid));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
zpool_find_import_scan_add_slice(libpc_handle_t *hdl, pthread_mutex_t *lock,
|
|
|
|
avl_tree_t *cache, const char *path, const char *name, int order)
|
|
|
|
{
|
|
|
|
avl_index_t where;
|
|
|
|
rdsk_node_t *slice;
|
|
|
|
|
2019-10-03 17:20:44 +00:00
|
|
|
slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
|
2018-11-05 19:22:33 +00:00
|
|
|
if (asprintf(&slice->rn_name, "%s/%s", path, name) == -1) {
|
|
|
|
free(slice);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
slice->rn_vdev_guid = 0;
|
|
|
|
slice->rn_lock = lock;
|
|
|
|
slice->rn_avl = cache;
|
|
|
|
slice->rn_hdl = hdl;
|
|
|
|
slice->rn_order = order + IMPORT_ORDER_SCAN_OFFSET;
|
|
|
|
slice->rn_labelpaths = B_FALSE;
|
|
|
|
|
|
|
|
pthread_mutex_lock(lock);
|
|
|
|
if (avl_find(cache, slice, &where)) {
|
|
|
|
free(slice->rn_name);
|
|
|
|
free(slice);
|
|
|
|
} else {
|
|
|
|
avl_insert(cache, slice, where);
|
|
|
|
}
|
|
|
|
pthread_mutex_unlock(lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
zpool_find_import_scan_dir(libpc_handle_t *hdl, pthread_mutex_t *lock,
|
|
|
|
avl_tree_t *cache, const char *dir, int order)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
char path[MAXPATHLEN];
|
|
|
|
struct dirent64 *dp;
|
|
|
|
DIR *dirp;
|
|
|
|
|
|
|
|
if (realpath(dir, path) == NULL) {
|
|
|
|
error = errno;
|
|
|
|
if (error == ENOENT)
|
|
|
|
return (0);
|
|
|
|
|
2021-06-05 11:14:12 +00:00
|
|
|
zutil_error_aux(hdl, "%s", strerror(error));
|
2022-09-26 13:40:43 +00:00
|
|
|
(void) zutil_error_fmt(hdl, LPC_BADPATH, dgettext(TEXT_DOMAIN,
|
|
|
|
"cannot resolve path '%s'"), dir);
|
2018-11-05 19:22:33 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
dirp = opendir(path);
|
|
|
|
if (dirp == NULL) {
|
|
|
|
error = errno;
|
2021-06-05 11:14:12 +00:00
|
|
|
zutil_error_aux(hdl, "%s", strerror(error));
|
2022-09-26 13:40:43 +00:00
|
|
|
(void) zutil_error_fmt(hdl, LPC_BADPATH, dgettext(TEXT_DOMAIN,
|
|
|
|
"cannot open '%s'"), path);
|
2018-11-05 19:22:33 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
while ((dp = readdir64(dirp)) != NULL) {
|
|
|
|
const char *name = dp->d_name;
|
2021-05-22 14:29:53 +00:00
|
|
|
if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0)
|
2018-11-05 19:22:33 +00:00
|
|
|
continue;
|
|
|
|
|
2021-05-22 14:29:53 +00:00
|
|
|
switch (dp->d_type) {
|
|
|
|
case DT_UNKNOWN:
|
|
|
|
case DT_BLK:
|
2021-06-14 16:59:54 +00:00
|
|
|
case DT_LNK:
|
2021-05-22 14:29:53 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
case DT_CHR:
|
|
|
|
#endif
|
|
|
|
case DT_REG:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2018-11-05 19:22:33 +00:00
|
|
|
zpool_find_import_scan_add_slice(hdl, lock, cache, path, name,
|
|
|
|
order);
|
|
|
|
}
|
|
|
|
|
|
|
|
(void) closedir(dirp);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
zpool_find_import_scan_path(libpc_handle_t *hdl, pthread_mutex_t *lock,
|
|
|
|
avl_tree_t *cache, const char *dir, int order)
|
|
|
|
{
|
|
|
|
int error = 0;
|
|
|
|
char path[MAXPATHLEN];
|
2021-05-22 15:19:14 +00:00
|
|
|
char *d = NULL;
|
|
|
|
ssize_t dl;
|
|
|
|
const char *dpath, *name;
|
2018-11-05 19:22:33 +00:00
|
|
|
|
|
|
|
/*
|
2021-05-22 15:19:14 +00:00
|
|
|
* Separate the directory and the basename.
|
|
|
|
* We do this so that we can get the realpath of
|
2018-11-05 19:22:33 +00:00
|
|
|
* the directory. We don't get the realpath on the
|
|
|
|
* whole path because if it's a symlink, we want the
|
|
|
|
* path of the symlink not where it points to.
|
|
|
|
*/
|
2021-05-22 15:19:14 +00:00
|
|
|
name = zfs_basename(dir);
|
|
|
|
if ((dl = zfs_dirnamelen(dir)) == -1)
|
|
|
|
dpath = ".";
|
|
|
|
else
|
|
|
|
dpath = d = zutil_strndup(hdl, dir, dl);
|
2018-11-05 19:22:33 +00:00
|
|
|
|
|
|
|
if (realpath(dpath, path) == NULL) {
|
|
|
|
error = errno;
|
|
|
|
if (error == ENOENT) {
|
|
|
|
error = 0;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2021-06-05 11:14:12 +00:00
|
|
|
zutil_error_aux(hdl, "%s", strerror(error));
|
2022-09-26 13:40:43 +00:00
|
|
|
(void) zutil_error_fmt(hdl, LPC_BADPATH, dgettext(TEXT_DOMAIN,
|
|
|
|
"cannot resolve path '%s'"), dir);
|
2018-11-05 19:22:33 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
zpool_find_import_scan_add_slice(hdl, lock, cache, path, name, order);
|
|
|
|
|
|
|
|
out:
|
|
|
|
free(d);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Scan a list of directories for zfs devices.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
zpool_find_import_scan(libpc_handle_t *hdl, pthread_mutex_t *lock,
|
2019-10-03 17:20:44 +00:00
|
|
|
avl_tree_t **slice_cache, const char * const *dir, size_t dirs)
|
2018-11-05 19:22:33 +00:00
|
|
|
{
|
|
|
|
avl_tree_t *cache;
|
|
|
|
rdsk_node_t *slice;
|
|
|
|
void *cookie;
|
|
|
|
int i, error;
|
|
|
|
|
|
|
|
*slice_cache = NULL;
|
2019-10-03 17:20:44 +00:00
|
|
|
cache = zutil_alloc(hdl, sizeof (avl_tree_t));
|
2018-11-05 19:22:33 +00:00
|
|
|
avl_create(cache, slice_cache_compare, sizeof (rdsk_node_t),
|
|
|
|
offsetof(rdsk_node_t, rn_node));
|
|
|
|
|
|
|
|
for (i = 0; i < dirs; i++) {
|
|
|
|
struct stat sbuf;
|
|
|
|
|
|
|
|
if (stat(dir[i], &sbuf) != 0) {
|
|
|
|
error = errno;
|
|
|
|
if (error == ENOENT)
|
|
|
|
continue;
|
|
|
|
|
2021-06-05 11:14:12 +00:00
|
|
|
zutil_error_aux(hdl, "%s", strerror(error));
|
2022-09-26 13:40:43 +00:00
|
|
|
(void) zutil_error_fmt(hdl, LPC_BADPATH, dgettext(
|
2018-11-05 19:22:33 +00:00
|
|
|
TEXT_DOMAIN, "cannot resolve path '%s'"), dir[i]);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If dir[i] is a directory, we walk through it and add all
|
2019-10-03 17:20:44 +00:00
|
|
|
* the entries to the cache. If it's not a directory, we just
|
2018-11-05 19:22:33 +00:00
|
|
|
* add it to the cache.
|
|
|
|
*/
|
|
|
|
if (S_ISDIR(sbuf.st_mode)) {
|
|
|
|
if ((error = zpool_find_import_scan_dir(hdl, lock,
|
|
|
|
cache, dir[i], i)) != 0)
|
|
|
|
goto error;
|
|
|
|
} else {
|
|
|
|
if ((error = zpool_find_import_scan_path(hdl, lock,
|
|
|
|
cache, dir[i], i)) != 0)
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*slice_cache = cache;
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
error:
|
|
|
|
cookie = NULL;
|
|
|
|
while ((slice = avl_destroy_nodes(cache, &cookie)) != NULL) {
|
|
|
|
free(slice->rn_name);
|
|
|
|
free(slice);
|
|
|
|
}
|
|
|
|
free(cache);
|
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Given a list of directories to search, find all pools stored on disk. This
|
|
|
|
* includes partial pools which are not available to import. If no args are
|
|
|
|
* given (argc is 0), then the default directory (/dev/dsk) is searched.
|
|
|
|
* poolname or guid (but not both) are provided by the caller when trying
|
|
|
|
* to import a specific pool.
|
|
|
|
*/
|
|
|
|
static nvlist_t *
|
2021-03-12 23:42:27 +00:00
|
|
|
zpool_find_import_impl(libpc_handle_t *hdl, importargs_t *iarg,
|
|
|
|
pthread_mutex_t *lock, avl_tree_t *cache)
|
2018-11-05 19:22:33 +00:00
|
|
|
{
|
2021-12-12 14:34:00 +00:00
|
|
|
(void) lock;
|
2018-11-05 19:22:33 +00:00
|
|
|
nvlist_t *ret = NULL;
|
|
|
|
pool_list_t pools = { 0 };
|
|
|
|
pool_entry_t *pe, *penext;
|
|
|
|
vdev_entry_t *ve, *venext;
|
|
|
|
config_entry_t *ce, *cenext;
|
|
|
|
name_entry_t *ne, *nenext;
|
|
|
|
rdsk_node_t *slice;
|
|
|
|
void *cookie;
|
|
|
|
tpool_t *t;
|
|
|
|
|
|
|
|
verify(iarg->poolname == NULL || iarg->guid == 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create a thread pool to parallelize the process of reading and
|
|
|
|
* validating labels, a large number of threads can be used due to
|
|
|
|
* minimal contention.
|
|
|
|
*/
|
|
|
|
t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN), 0, NULL);
|
|
|
|
for (slice = avl_first(cache); slice;
|
|
|
|
(slice = avl_walk(cache, slice, AVL_AFTER)))
|
|
|
|
(void) tpool_dispatch(t, zpool_open_func, slice);
|
|
|
|
|
|
|
|
tpool_wait(t);
|
|
|
|
tpool_destroy(t);
|
|
|
|
|
|
|
|
/*
|
2019-09-03 00:53:27 +00:00
|
|
|
* Process the cache, filtering out any entries which are not
|
|
|
|
* for the specified pool then adding matching label configs.
|
2018-11-05 19:22:33 +00:00
|
|
|
*/
|
|
|
|
cookie = NULL;
|
|
|
|
while ((slice = avl_destroy_nodes(cache, &cookie)) != NULL) {
|
|
|
|
if (slice->rn_config != NULL) {
|
|
|
|
nvlist_t *config = slice->rn_config;
|
|
|
|
boolean_t matched = B_TRUE;
|
|
|
|
boolean_t aux = B_FALSE;
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check if it's a spare or l2cache device. If it is,
|
|
|
|
* we need to skip the name and guid check since they
|
|
|
|
* don't exist on aux device label.
|
|
|
|
*/
|
|
|
|
if (iarg->poolname != NULL || iarg->guid != 0) {
|
|
|
|
uint64_t state;
|
|
|
|
aux = nvlist_lookup_uint64(config,
|
|
|
|
ZPOOL_CONFIG_POOL_STATE, &state) == 0 &&
|
|
|
|
(state == POOL_STATE_SPARE ||
|
|
|
|
state == POOL_STATE_L2CACHE);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (iarg->poolname != NULL && !aux) {
|
2023-03-11 18:39:24 +00:00
|
|
|
const char *pname;
|
2018-11-05 19:22:33 +00:00
|
|
|
|
|
|
|
matched = nvlist_lookup_string(config,
|
|
|
|
ZPOOL_CONFIG_POOL_NAME, &pname) == 0 &&
|
|
|
|
strcmp(iarg->poolname, pname) == 0;
|
|
|
|
} else if (iarg->guid != 0 && !aux) {
|
|
|
|
uint64_t this_guid;
|
|
|
|
|
|
|
|
matched = nvlist_lookup_uint64(config,
|
|
|
|
ZPOOL_CONFIG_POOL_GUID, &this_guid) == 0 &&
|
|
|
|
iarg->guid == this_guid;
|
|
|
|
}
|
|
|
|
if (matched) {
|
|
|
|
/*
|
|
|
|
* Verify all remaining entries can be opened
|
|
|
|
* exclusively. This will prune all underlying
|
|
|
|
* multipath devices which otherwise could
|
|
|
|
* result in the vdev appearing as UNAVAIL.
|
|
|
|
*
|
|
|
|
* Under zdb, this step isn't required and
|
|
|
|
* would prevent a zdb -e of active pools with
|
|
|
|
* no cachefile.
|
|
|
|
*/
|
2021-04-08 20:17:38 +00:00
|
|
|
fd = open(slice->rn_name,
|
|
|
|
O_RDONLY | O_EXCL | O_CLOEXEC);
|
2018-11-05 19:22:33 +00:00
|
|
|
if (fd >= 0 || iarg->can_be_active) {
|
|
|
|
if (fd >= 0)
|
|
|
|
close(fd);
|
|
|
|
add_config(hdl, &pools,
|
|
|
|
slice->rn_name, slice->rn_order,
|
|
|
|
slice->rn_num_labels, config);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
nvlist_free(config);
|
|
|
|
}
|
|
|
|
free(slice->rn_name);
|
|
|
|
free(slice);
|
|
|
|
}
|
|
|
|
avl_destroy(cache);
|
|
|
|
free(cache);
|
|
|
|
|
|
|
|
ret = get_configs(hdl, &pools, iarg->can_be_active, iarg->policy);
|
|
|
|
|
|
|
|
for (pe = pools.pools; pe != NULL; pe = penext) {
|
|
|
|
penext = pe->pe_next;
|
|
|
|
for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
|
|
|
|
venext = ve->ve_next;
|
|
|
|
for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
|
|
|
|
cenext = ce->ce_next;
|
|
|
|
nvlist_free(ce->ce_config);
|
|
|
|
free(ce);
|
|
|
|
}
|
|
|
|
free(ve);
|
|
|
|
}
|
|
|
|
free(pe);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (ne = pools.names; ne != NULL; ne = nenext) {
|
|
|
|
nenext = ne->ne_next;
|
|
|
|
free(ne->ne_name);
|
|
|
|
free(ne);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
2021-03-12 23:42:27 +00:00
|
|
|
/*
|
|
|
|
* Given a config, discover the paths for the devices which
|
|
|
|
* exist in the config.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
discover_cached_paths(libpc_handle_t *hdl, nvlist_t *nv,
|
|
|
|
avl_tree_t *cache, pthread_mutex_t *lock)
|
|
|
|
{
|
2023-03-11 18:39:24 +00:00
|
|
|
const char *path = NULL;
|
2021-05-22 15:19:14 +00:00
|
|
|
ssize_t dl;
|
2021-03-12 23:42:27 +00:00
|
|
|
uint_t children;
|
|
|
|
nvlist_t **child;
|
|
|
|
|
|
|
|
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
|
|
|
|
&child, &children) == 0) {
|
|
|
|
for (int c = 0; c < children; c++) {
|
|
|
|
discover_cached_paths(hdl, child[c], cache, lock);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Once we have the path, we need to add the directory to
|
2021-04-03 01:38:53 +00:00
|
|
|
* our directory cache.
|
2021-03-12 23:42:27 +00:00
|
|
|
*/
|
|
|
|
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
|
2023-03-12 16:30:21 +00:00
|
|
|
int ret;
|
|
|
|
char c = '\0';
|
|
|
|
if ((dl = zfs_dirnamelen(path)) == -1) {
|
2023-03-11 18:39:24 +00:00
|
|
|
path = ".";
|
2023-03-12 16:30:21 +00:00
|
|
|
} else {
|
|
|
|
c = path[dl];
|
2023-03-11 18:39:24 +00:00
|
|
|
((char *)path)[dl] = '\0';
|
|
|
|
|
2023-03-12 16:30:21 +00:00
|
|
|
}
|
|
|
|
ret = zpool_find_import_scan_dir(hdl, lock, cache,
|
|
|
|
path, 0);
|
|
|
|
if (c != '\0')
|
2023-03-11 18:39:24 +00:00
|
|
|
((char *)path)[dl] = c;
|
|
|
|
|
2023-03-12 16:30:21 +00:00
|
|
|
return (ret);
|
2021-03-12 23:42:27 +00:00
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2018-11-05 19:22:33 +00:00
|
|
|
/*
|
|
|
|
* Given a cache file, return the contents as a list of importable pools.
|
|
|
|
* poolname or guid (but not both) are provided by the caller when trying
|
|
|
|
* to import a specific pool.
|
|
|
|
*/
|
|
|
|
static nvlist_t *
|
2021-03-12 23:42:27 +00:00
|
|
|
zpool_find_import_cached(libpc_handle_t *hdl, importargs_t *iarg)
|
2018-11-05 19:22:33 +00:00
|
|
|
{
|
|
|
|
char *buf;
|
|
|
|
int fd;
|
|
|
|
struct stat64 statbuf;
|
|
|
|
nvlist_t *raw, *src, *dst;
|
|
|
|
nvlist_t *pools;
|
|
|
|
nvpair_t *elem;
|
2023-03-11 18:39:24 +00:00
|
|
|
const char *name;
|
2018-11-05 19:22:33 +00:00
|
|
|
uint64_t this_guid;
|
|
|
|
boolean_t active;
|
|
|
|
|
2021-03-12 23:42:27 +00:00
|
|
|
verify(iarg->poolname == NULL || iarg->guid == 0);
|
2018-11-05 19:22:33 +00:00
|
|
|
|
2021-04-08 20:17:38 +00:00
|
|
|
if ((fd = open(iarg->cachefile, O_RDONLY | O_CLOEXEC)) < 0) {
|
2019-10-03 17:20:44 +00:00
|
|
|
zutil_error_aux(hdl, "%s", strerror(errno));
|
2022-09-26 13:40:43 +00:00
|
|
|
(void) zutil_error(hdl, LPC_BADCACHE, dgettext(TEXT_DOMAIN,
|
|
|
|
"failed to open cache file"));
|
2018-11-05 19:22:33 +00:00
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fstat64(fd, &statbuf) != 0) {
|
2019-10-03 17:20:44 +00:00
|
|
|
zutil_error_aux(hdl, "%s", strerror(errno));
|
2018-11-05 19:22:33 +00:00
|
|
|
(void) close(fd);
|
2022-09-26 13:40:43 +00:00
|
|
|
(void) zutil_error(hdl, LPC_BADCACHE, dgettext(TEXT_DOMAIN,
|
|
|
|
"failed to get size of cache file"));
|
2018-11-05 19:22:33 +00:00
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
2019-10-03 17:20:44 +00:00
|
|
|
if ((buf = zutil_alloc(hdl, statbuf.st_size)) == NULL) {
|
2018-11-05 19:22:33 +00:00
|
|
|
(void) close(fd);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
|
|
|
|
(void) close(fd);
|
|
|
|
free(buf);
|
2022-09-26 13:40:43 +00:00
|
|
|
(void) zutil_error(hdl, LPC_BADCACHE, dgettext(TEXT_DOMAIN,
|
2018-11-05 19:22:33 +00:00
|
|
|
"failed to read cache file contents"));
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
(void) close(fd);
|
|
|
|
|
|
|
|
if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) {
|
|
|
|
free(buf);
|
2022-09-26 13:40:43 +00:00
|
|
|
(void) zutil_error(hdl, LPC_BADCACHE, dgettext(TEXT_DOMAIN,
|
2018-11-05 19:22:33 +00:00
|
|
|
"invalid or corrupt cache file contents"));
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
free(buf);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Go through and get the current state of the pools and refresh their
|
|
|
|
* state.
|
|
|
|
*/
|
|
|
|
if (nvlist_alloc(&pools, 0, 0) != 0) {
|
2019-10-03 17:20:44 +00:00
|
|
|
(void) zutil_no_memory(hdl);
|
2018-11-05 19:22:33 +00:00
|
|
|
nvlist_free(raw);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
elem = NULL;
|
|
|
|
while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) {
|
|
|
|
src = fnvpair_value_nvlist(elem);
|
|
|
|
|
|
|
|
name = fnvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME);
|
2021-03-12 23:42:27 +00:00
|
|
|
if (iarg->poolname != NULL && strcmp(iarg->poolname, name) != 0)
|
2018-11-05 19:22:33 +00:00
|
|
|
continue;
|
|
|
|
|
|
|
|
this_guid = fnvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID);
|
2021-03-12 23:42:27 +00:00
|
|
|
if (iarg->guid != 0 && iarg->guid != this_guid)
|
2018-11-05 19:22:33 +00:00
|
|
|
continue;
|
|
|
|
|
2019-10-03 17:20:44 +00:00
|
|
|
if (zutil_pool_active(hdl, name, this_guid, &active) != 0) {
|
2018-11-05 19:22:33 +00:00
|
|
|
nvlist_free(raw);
|
|
|
|
nvlist_free(pools);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (active)
|
|
|
|
continue;
|
|
|
|
|
2021-03-12 23:42:27 +00:00
|
|
|
if (iarg->scan) {
|
|
|
|
uint64_t saved_guid = iarg->guid;
|
|
|
|
const char *saved_poolname = iarg->poolname;
|
|
|
|
pthread_mutex_t lock;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create the device cache that will hold the
|
|
|
|
* devices we will scan based on the cachefile.
|
|
|
|
* This will get destroyed and freed by
|
|
|
|
* zpool_find_import_impl.
|
|
|
|
*/
|
|
|
|
avl_tree_t *cache = zutil_alloc(hdl,
|
|
|
|
sizeof (avl_tree_t));
|
|
|
|
avl_create(cache, slice_cache_compare,
|
|
|
|
sizeof (rdsk_node_t),
|
|
|
|
offsetof(rdsk_node_t, rn_node));
|
|
|
|
nvlist_t *nvroot = fnvlist_lookup_nvlist(src,
|
|
|
|
ZPOOL_CONFIG_VDEV_TREE);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We only want to find the pool with this_guid.
|
|
|
|
* We will reset these values back later.
|
|
|
|
*/
|
|
|
|
iarg->guid = this_guid;
|
|
|
|
iarg->poolname = NULL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We need to build up a cache of devices that exists
|
|
|
|
* in the paths pointed to by the cachefile. This allows
|
|
|
|
* us to preserve the device namespace that was
|
|
|
|
* originally specified by the user but also lets us
|
|
|
|
* scan devices in those directories in case they had
|
|
|
|
* been renamed.
|
|
|
|
*/
|
|
|
|
pthread_mutex_init(&lock, NULL);
|
|
|
|
discover_cached_paths(hdl, nvroot, cache, &lock);
|
|
|
|
nvlist_t *nv = zpool_find_import_impl(hdl, iarg,
|
|
|
|
&lock, cache);
|
|
|
|
pthread_mutex_destroy(&lock);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* zpool_find_import_impl will return back
|
|
|
|
* a list of pools that it found based on the
|
|
|
|
* device cache. There should only be one pool
|
|
|
|
* since we're looking for a specific guid.
|
|
|
|
* We will use that pool to build up the final
|
|
|
|
* pool nvlist which is returned back to the
|
|
|
|
* caller.
|
|
|
|
*/
|
|
|
|
nvpair_t *pair = nvlist_next_nvpair(nv, NULL);
|
2022-08-26 21:04:27 +00:00
|
|
|
if (pair == NULL)
|
|
|
|
continue;
|
2021-03-12 23:42:27 +00:00
|
|
|
fnvlist_add_nvlist(pools, nvpair_name(pair),
|
|
|
|
fnvpair_value_nvlist(pair));
|
|
|
|
|
|
|
|
VERIFY3P(nvlist_next_nvpair(nv, pair), ==, NULL);
|
|
|
|
|
|
|
|
iarg->guid = saved_guid;
|
|
|
|
iarg->poolname = saved_poolname;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2018-11-05 19:22:33 +00:00
|
|
|
if (nvlist_add_string(src, ZPOOL_CONFIG_CACHEFILE,
|
2021-03-12 23:42:27 +00:00
|
|
|
iarg->cachefile) != 0) {
|
2019-10-03 17:20:44 +00:00
|
|
|
(void) zutil_no_memory(hdl);
|
2018-11-05 19:22:33 +00:00
|
|
|
nvlist_free(raw);
|
|
|
|
nvlist_free(pools);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
2021-10-04 19:32:16 +00:00
|
|
|
update_vdevs_config_dev_sysfs_path(src);
|
|
|
|
|
2019-10-03 17:20:44 +00:00
|
|
|
if ((dst = zutil_refresh_config(hdl, src)) == NULL) {
|
2018-11-05 19:22:33 +00:00
|
|
|
nvlist_free(raw);
|
|
|
|
nvlist_free(pools);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) {
|
2019-10-03 17:20:44 +00:00
|
|
|
(void) zutil_no_memory(hdl);
|
2018-11-05 19:22:33 +00:00
|
|
|
nvlist_free(dst);
|
|
|
|
nvlist_free(raw);
|
|
|
|
nvlist_free(pools);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
nvlist_free(dst);
|
|
|
|
}
|
|
|
|
nvlist_free(raw);
|
|
|
|
return (pools);
|
|
|
|
}
|
|
|
|
|
2021-03-12 23:42:27 +00:00
|
|
|
static nvlist_t *
|
|
|
|
zpool_find_import(libpc_handle_t *hdl, importargs_t *iarg)
|
|
|
|
{
|
|
|
|
pthread_mutex_t lock;
|
|
|
|
avl_tree_t *cache;
|
|
|
|
nvlist_t *pools = NULL;
|
|
|
|
|
|
|
|
verify(iarg->poolname == NULL || iarg->guid == 0);
|
|
|
|
pthread_mutex_init(&lock, NULL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Locate pool member vdevs by blkid or by directory scanning.
|
|
|
|
* On success a newly allocated AVL tree which is populated with an
|
|
|
|
* entry for each discovered vdev will be returned in the cache.
|
|
|
|
* It's the caller's responsibility to consume and destroy this tree.
|
|
|
|
*/
|
|
|
|
if (iarg->scan || iarg->paths != 0) {
|
|
|
|
size_t dirs = iarg->paths;
|
|
|
|
const char * const *dir = (const char * const *)iarg->path;
|
|
|
|
|
|
|
|
if (dirs == 0)
|
|
|
|
dir = zpool_default_search_paths(&dirs);
|
|
|
|
|
|
|
|
if (zpool_find_import_scan(hdl, &lock, &cache,
|
|
|
|
dir, dirs) != 0) {
|
|
|
|
pthread_mutex_destroy(&lock);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (zpool_find_import_blkid(hdl, &lock, &cache) != 0) {
|
|
|
|
pthread_mutex_destroy(&lock);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pools = zpool_find_import_impl(hdl, iarg, &lock, cache);
|
|
|
|
pthread_mutex_destroy(&lock);
|
|
|
|
return (pools);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-11-05 19:22:33 +00:00
|
|
|
nvlist_t *
|
2022-09-26 13:40:43 +00:00
|
|
|
zpool_search_import(libpc_handle_t *hdl, importargs_t *import)
|
2018-11-05 19:22:33 +00:00
|
|
|
{
|
|
|
|
nvlist_t *pools = NULL;
|
|
|
|
|
|
|
|
verify(import->poolname == NULL || import->guid == 0);
|
|
|
|
|
|
|
|
if (import->cachefile != NULL)
|
2022-09-26 13:40:43 +00:00
|
|
|
pools = zpool_find_import_cached(hdl, import);
|
2018-11-05 19:22:33 +00:00
|
|
|
else
|
2022-09-26 13:40:43 +00:00
|
|
|
pools = zpool_find_import(hdl, import);
|
2018-11-05 19:22:33 +00:00
|
|
|
|
|
|
|
if ((pools == NULL || nvlist_empty(pools)) &&
|
2022-09-26 13:40:43 +00:00
|
|
|
hdl->lpc_open_access_error && geteuid() != 0) {
|
|
|
|
(void) zutil_error(hdl, LPC_EACCESS, dgettext(TEXT_DOMAIN,
|
2018-11-05 19:22:33 +00:00
|
|
|
"no pools found"));
|
|
|
|
}
|
|
|
|
|
|
|
|
return (pools);
|
|
|
|
}
|
|
|
|
|
|
|
|
static boolean_t
|
2023-03-11 18:39:24 +00:00
|
|
|
pool_match(nvlist_t *cfg, const char *tgt)
|
2018-11-05 19:22:33 +00:00
|
|
|
{
|
|
|
|
uint64_t v, guid = strtoull(tgt, NULL, 0);
|
2023-03-11 18:39:24 +00:00
|
|
|
const char *s;
|
2018-11-05 19:22:33 +00:00
|
|
|
|
|
|
|
if (guid != 0) {
|
|
|
|
if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
|
|
|
|
return (v == guid);
|
|
|
|
} else {
|
|
|
|
if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
|
|
|
|
return (strcmp(s, tgt) == 0);
|
|
|
|
}
|
|
|
|
return (B_FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2022-09-26 13:40:43 +00:00
|
|
|
zpool_find_config(libpc_handle_t *hdl, const char *target, nvlist_t **configp,
|
|
|
|
importargs_t *args)
|
2018-11-05 19:22:33 +00:00
|
|
|
{
|
|
|
|
nvlist_t *pools;
|
|
|
|
nvlist_t *match = NULL;
|
|
|
|
nvlist_t *config = NULL;
|
2020-12-23 17:52:24 +00:00
|
|
|
char *sepp = NULL;
|
2018-11-05 19:22:33 +00:00
|
|
|
int count = 0;
|
|
|
|
char *targetdup = strdup(target);
|
|
|
|
|
Handle possible null pointers from malloc/strdup/strndup()
GCC 12.1.1_p20220625's static analyzer caught these.
Of the two in the btree test, one had previously been caught by Coverity
and Smatch, but GCC flagged it as a false positive. Upon examining how
other test cases handle this, the solution was changed from
`ASSERT3P(node, !=, NULL);` to using `perror()` to be consistent with
the fixes to the other fixes done to the ZTS code.
That approach was also used in ZED since I did not see a better way of
handling this there. Also, upon inspection, additional unchecked
pointers from malloc()/calloc()/strdup() were found in ZED, so those
were handled too.
In other parts of the code, the existing methods to avoid issues from
memory allocators returning NULL were used, such as using
`umem_alloc(size, UMEM_NOFAIL)` or returning `ENOMEM`.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13979
2022-10-07 00:18:40 +00:00
|
|
|
if (targetdup == NULL)
|
|
|
|
return (ENOMEM);
|
|
|
|
|
2018-11-05 19:22:33 +00:00
|
|
|
*configp = NULL;
|
|
|
|
|
2021-06-03 15:10:41 +00:00
|
|
|
if ((sepp = strpbrk(targetdup, "/@")) != NULL)
|
2018-11-05 19:22:33 +00:00
|
|
|
*sepp = '\0';
|
|
|
|
|
2022-09-26 13:40:43 +00:00
|
|
|
pools = zpool_search_import(hdl, args);
|
2018-11-05 19:22:33 +00:00
|
|
|
|
|
|
|
if (pools != NULL) {
|
|
|
|
nvpair_t *elem = NULL;
|
|
|
|
while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
|
|
|
|
VERIFY0(nvpair_value_nvlist(elem, &config));
|
|
|
|
if (pool_match(config, targetdup)) {
|
|
|
|
count++;
|
|
|
|
if (match != NULL) {
|
|
|
|
/* multiple matches found */
|
|
|
|
continue;
|
|
|
|
} else {
|
2020-12-23 17:52:24 +00:00
|
|
|
match = fnvlist_dup(config);
|
2018-11-05 19:22:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-12-23 17:52:24 +00:00
|
|
|
fnvlist_free(pools);
|
2018-11-05 19:22:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (count == 0) {
|
|
|
|
free(targetdup);
|
|
|
|
return (ENOENT);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (count > 1) {
|
|
|
|
free(targetdup);
|
2020-12-23 17:52:24 +00:00
|
|
|
fnvlist_free(match);
|
2018-11-05 19:22:33 +00:00
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
*configp = match;
|
|
|
|
free(targetdup);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
2021-10-04 19:32:16 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Internal function for iterating over the vdevs.
|
|
|
|
*
|
|
|
|
* For each vdev, func() will be called and will be passed 'zhp' (which is
|
|
|
|
* typically the zpool_handle_t cast as a void pointer), the vdev's nvlist, and
|
|
|
|
* a user-defined data pointer).
|
|
|
|
*
|
|
|
|
* The return values from all the func() calls will be OR'd together and
|
|
|
|
* returned.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
for_each_vdev_cb(void *zhp, nvlist_t *nv, pool_vdev_iter_f func,
|
|
|
|
void *data)
|
|
|
|
{
|
|
|
|
nvlist_t **child;
|
|
|
|
uint_t c, children;
|
|
|
|
int ret = 0;
|
|
|
|
int i;
|
2023-03-11 18:39:24 +00:00
|
|
|
const char *type;
|
2021-10-04 19:32:16 +00:00
|
|
|
|
|
|
|
const char *list[] = {
|
|
|
|
ZPOOL_CONFIG_SPARES,
|
|
|
|
ZPOOL_CONFIG_L2CACHE,
|
|
|
|
ZPOOL_CONFIG_CHILDREN
|
|
|
|
};
|
|
|
|
|
2021-11-30 14:46:25 +00:00
|
|
|
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
|
|
|
|
return (ret);
|
|
|
|
|
2023-04-20 17:07:56 +00:00
|
|
|
/* Don't run our function on indirect vdevs */
|
|
|
|
if (strcmp(type, VDEV_TYPE_INDIRECT) != 0) {
|
2021-11-30 14:46:25 +00:00
|
|
|
ret |= func(zhp, nv, data);
|
|
|
|
}
|
|
|
|
|
2021-10-04 19:32:16 +00:00
|
|
|
for (i = 0; i < ARRAY_SIZE(list); i++) {
|
|
|
|
if (nvlist_lookup_nvlist_array(nv, list[i], &child,
|
|
|
|
&children) == 0) {
|
|
|
|
for (c = 0; c < children; c++) {
|
|
|
|
uint64_t ishole = 0;
|
|
|
|
|
|
|
|
(void) nvlist_lookup_uint64(child[c],
|
|
|
|
ZPOOL_CONFIG_IS_HOLE, &ishole);
|
|
|
|
|
|
|
|
if (ishole)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
ret |= for_each_vdev_cb(zhp, child[c],
|
|
|
|
func, data);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Given an ZPOOL_CONFIG_VDEV_TREE nvpair, iterate over all the vdevs, calling
|
|
|
|
* func() for each one. func() is passed the vdev's nvlist and an optional
|
|
|
|
* user-defined 'data' pointer.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
for_each_vdev_in_nvlist(nvlist_t *nvroot, pool_vdev_iter_f func, void *data)
|
|
|
|
{
|
|
|
|
return (for_each_vdev_cb(NULL, nvroot, func, data));
|
|
|
|
}
|