937 lines
24 KiB
C
937 lines
24 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* The contents of this file are subject to the terms of the
|
|
* Common Development and Distribution License (the "License").
|
|
* You may not use this file except in compliance with the License.
|
|
*
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
* or http://www.opensolaris.org/os/licensing.
|
|
* See the License for the specific language governing permissions
|
|
* and limitations under the License.
|
|
*
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
/*
|
|
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
|
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
|
* Copyright 2015 RackTop Systems.
|
|
* Copyright (c) 2016, Intel Corporation.
|
|
*/
|
|
|
|
/*
|
|
* Pool import support functions.
|
|
*
|
|
* Used by zpool, ztest, zdb, and zhack to locate importable configs. Since
|
|
* these commands are expected to run in the global zone, we can assume
|
|
* that the devices are all readable when called.
|
|
*
|
|
* To import a pool, we rely on reading the configuration information from the
|
|
* ZFS label of each device. If we successfully read the label, then we
|
|
* organize the configuration information in the following hierarchy:
|
|
*
|
|
* pool guid -> toplevel vdev guid -> label txg
|
|
*
|
|
* Duplicate entries matching this same tuple will be discarded. Once we have
|
|
* examined every device, we pick the best label txg config for each toplevel
|
|
* vdev. We then arrange these toplevel vdevs into a complete pool config, and
|
|
* update any paths that have changed. Finally, we attempt to import the pool
|
|
* using our derived config, and record the results.
|
|
*/
|
|
|
|
#include <ctype.h>
|
|
#include <dirent.h>
|
|
#include <errno.h>
|
|
#include <libintl.h>
|
|
#include <libgen.h>
|
|
#include <stddef.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <sys/stat.h>
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
#include <sys/dktp/fdisk.h>
|
|
#include <sys/vdev_impl.h>
|
|
#include <sys/fs/zfs.h>
|
|
|
|
#include <thread_pool.h>
|
|
#include <libzutil.h>
|
|
#include <libnvpair.h>
|
|
#include <libzfs.h>
|
|
|
|
#include "zutil_import.h"
|
|
|
|
#ifdef HAVE_LIBUDEV
|
|
#include <libudev.h>
|
|
#include <sched.h>
|
|
#endif
|
|
#include <blkid/blkid.h>
|
|
|
|
#define DEFAULT_IMPORT_PATH_SIZE 9
|
|
#define DEV_BYID_PATH "/dev/disk/by-id/"
|
|
|
|
static boolean_t
|
|
is_watchdog_dev(char *dev)
|
|
{
|
|
/* For 'watchdog' dev */
|
|
if (strcmp(dev, "watchdog") == 0)
|
|
return (B_TRUE);
|
|
|
|
/* For 'watchdog<digit><whatever> */
|
|
if (strstr(dev, "watchdog") == dev && isdigit(dev[8]))
|
|
return (B_TRUE);
|
|
|
|
return (B_FALSE);
|
|
}
|
|
|
|
int
|
|
zfs_dev_flush(int fd)
|
|
{
|
|
return (ioctl(fd, BLKFLSBUF));
|
|
}
|
|
|
|
void
|
|
zpool_open_func(void *arg)
|
|
{
|
|
rdsk_node_t *rn = arg;
|
|
libpc_handle_t *hdl = rn->rn_hdl;
|
|
struct stat64 statbuf;
|
|
nvlist_t *config;
|
|
char *bname, *dupname;
|
|
uint64_t vdev_guid = 0;
|
|
int error;
|
|
int num_labels = 0;
|
|
int fd;
|
|
|
|
/*
|
|
* Skip devices with well known prefixes there can be side effects
|
|
* when opening devices which need to be avoided.
|
|
*
|
|
* hpet - High Precision Event Timer
|
|
* watchdog - Watchdog must be closed in a special way.
|
|
*/
|
|
dupname = zutil_strdup(hdl, rn->rn_name);
|
|
bname = basename(dupname);
|
|
error = ((strcmp(bname, "hpet") == 0) || is_watchdog_dev(bname));
|
|
free(dupname);
|
|
if (error)
|
|
return;
|
|
|
|
/*
|
|
* Ignore failed stats. We only want regular files and block devices.
|
|
*/
|
|
if (stat64(rn->rn_name, &statbuf) != 0 ||
|
|
(!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)))
|
|
return;
|
|
|
|
/*
|
|
* Preferentially open using O_DIRECT to bypass the block device
|
|
* cache which may be stale for multipath devices. An EINVAL errno
|
|
* indicates O_DIRECT is unsupported so fallback to just O_RDONLY.
|
|
*/
|
|
fd = open(rn->rn_name, O_RDONLY | O_DIRECT | O_CLOEXEC);
|
|
if ((fd < 0) && (errno == EINVAL))
|
|
fd = open(rn->rn_name, O_RDONLY | O_CLOEXEC);
|
|
if ((fd < 0) && (errno == EACCES))
|
|
hdl->lpc_open_access_error = B_TRUE;
|
|
if (fd < 0)
|
|
return;
|
|
|
|
/*
|
|
* This file is too small to hold a zpool
|
|
*/
|
|
if (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE) {
|
|
(void) close(fd);
|
|
return;
|
|
}
|
|
|
|
error = zpool_read_label(fd, &config, &num_labels);
|
|
if (error != 0) {
|
|
(void) close(fd);
|
|
return;
|
|
}
|
|
|
|
if (num_labels == 0) {
|
|
(void) close(fd);
|
|
nvlist_free(config);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Check that the vdev is for the expected guid. Additional entries
|
|
* are speculatively added based on the paths stored in the labels.
|
|
* Entries with valid paths but incorrect guids must be removed.
|
|
*/
|
|
error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid);
|
|
if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) {
|
|
(void) close(fd);
|
|
nvlist_free(config);
|
|
return;
|
|
}
|
|
|
|
(void) close(fd);
|
|
|
|
rn->rn_config = config;
|
|
rn->rn_num_labels = num_labels;
|
|
|
|
/*
|
|
* Add additional entries for paths described by this label.
|
|
*/
|
|
if (rn->rn_labelpaths) {
|
|
char *path = NULL;
|
|
char *devid = NULL;
|
|
rdsk_node_t *slice;
|
|
avl_index_t where;
|
|
int error;
|
|
|
|
if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid))
|
|
return;
|
|
|
|
/*
|
|
* Allow devlinks to stabilize so all paths are available.
|
|
*/
|
|
zpool_disk_wait(rn->rn_name);
|
|
|
|
if (path != NULL) {
|
|
slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
|
|
slice->rn_name = zutil_strdup(hdl, path);
|
|
slice->rn_vdev_guid = vdev_guid;
|
|
slice->rn_avl = rn->rn_avl;
|
|
slice->rn_hdl = hdl;
|
|
slice->rn_order = IMPORT_ORDER_PREFERRED_1;
|
|
slice->rn_labelpaths = B_FALSE;
|
|
pthread_mutex_lock(rn->rn_lock);
|
|
if (avl_find(rn->rn_avl, slice, &where)) {
|
|
pthread_mutex_unlock(rn->rn_lock);
|
|
free(slice->rn_name);
|
|
free(slice);
|
|
} else {
|
|
avl_insert(rn->rn_avl, slice, where);
|
|
pthread_mutex_unlock(rn->rn_lock);
|
|
zpool_open_func(slice);
|
|
}
|
|
}
|
|
|
|
if (devid != NULL) {
|
|
slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
|
|
error = asprintf(&slice->rn_name, "%s%s",
|
|
DEV_BYID_PATH, devid);
|
|
if (error == -1) {
|
|
free(slice);
|
|
return;
|
|
}
|
|
|
|
slice->rn_vdev_guid = vdev_guid;
|
|
slice->rn_avl = rn->rn_avl;
|
|
slice->rn_hdl = hdl;
|
|
slice->rn_order = IMPORT_ORDER_PREFERRED_2;
|
|
slice->rn_labelpaths = B_FALSE;
|
|
pthread_mutex_lock(rn->rn_lock);
|
|
if (avl_find(rn->rn_avl, slice, &where)) {
|
|
pthread_mutex_unlock(rn->rn_lock);
|
|
free(slice->rn_name);
|
|
free(slice);
|
|
} else {
|
|
avl_insert(rn->rn_avl, slice, where);
|
|
pthread_mutex_unlock(rn->rn_lock);
|
|
zpool_open_func(slice);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static char *
|
|
zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = {
|
|
"/dev/disk/by-vdev", /* Custom rules, use first if they exist */
|
|
"/dev/mapper", /* Use multipath devices before components */
|
|
"/dev/disk/by-partlabel", /* Single unique entry set by user */
|
|
"/dev/disk/by-partuuid", /* Generated partition uuid */
|
|
"/dev/disk/by-label", /* Custom persistent labels */
|
|
"/dev/disk/by-uuid", /* Single unique entry and persistent */
|
|
"/dev/disk/by-id", /* May be multiple entries and persistent */
|
|
"/dev/disk/by-path", /* Encodes physical location and persistent */
|
|
"/dev" /* UNSAFE device names will change */
|
|
};
|
|
|
|
const char * const *
|
|
zpool_default_search_paths(size_t *count)
|
|
{
|
|
*count = DEFAULT_IMPORT_PATH_SIZE;
|
|
return ((const char * const *)zpool_default_import_path);
|
|
}
|
|
|
|
/*
|
|
* Given a full path to a device determine if that device appears in the
|
|
* import search path. If it does return the first match and store the
|
|
* index in the passed 'order' variable, otherwise return an error.
|
|
*/
|
|
static int
|
|
zfs_path_order(char *name, int *order)
|
|
{
|
|
int i = 0, error = ENOENT;
|
|
char *dir, *env, *envdup;
|
|
|
|
env = getenv("ZPOOL_IMPORT_PATH");
|
|
if (env) {
|
|
envdup = strdup(env);
|
|
dir = strtok(envdup, ":");
|
|
while (dir) {
|
|
if (strncmp(name, dir, strlen(dir)) == 0) {
|
|
*order = i;
|
|
error = 0;
|
|
break;
|
|
}
|
|
dir = strtok(NULL, ":");
|
|
i++;
|
|
}
|
|
free(envdup);
|
|
} else {
|
|
for (i = 0; i < DEFAULT_IMPORT_PATH_SIZE; i++) {
|
|
if (strncmp(name, zpool_default_import_path[i],
|
|
strlen(zpool_default_import_path[i])) == 0) {
|
|
*order = i;
|
|
error = 0;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return (error);
|
|
}
|
|
|
|
/*
|
|
* Use libblkid to quickly enumerate all known zfs devices.
|
|
*/
|
|
int
|
|
zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
|
|
avl_tree_t **slice_cache)
|
|
{
|
|
rdsk_node_t *slice;
|
|
blkid_cache cache;
|
|
blkid_dev_iterate iter;
|
|
blkid_dev dev;
|
|
avl_index_t where;
|
|
int error;
|
|
|
|
*slice_cache = NULL;
|
|
|
|
error = blkid_get_cache(&cache, NULL);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
error = blkid_probe_all_new(cache);
|
|
if (error != 0) {
|
|
blkid_put_cache(cache);
|
|
return (error);
|
|
}
|
|
|
|
iter = blkid_dev_iterate_begin(cache);
|
|
if (iter == NULL) {
|
|
blkid_put_cache(cache);
|
|
return (EINVAL);
|
|
}
|
|
|
|
error = blkid_dev_set_search(iter, "TYPE", "zfs_member");
|
|
if (error != 0) {
|
|
blkid_dev_iterate_end(iter);
|
|
blkid_put_cache(cache);
|
|
return (error);
|
|
}
|
|
|
|
*slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t));
|
|
avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t),
|
|
offsetof(rdsk_node_t, rn_node));
|
|
|
|
while (blkid_dev_next(iter, &dev) == 0) {
|
|
slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
|
|
slice->rn_name = zutil_strdup(hdl, blkid_dev_devname(dev));
|
|
slice->rn_vdev_guid = 0;
|
|
slice->rn_lock = lock;
|
|
slice->rn_avl = *slice_cache;
|
|
slice->rn_hdl = hdl;
|
|
slice->rn_labelpaths = B_TRUE;
|
|
|
|
error = zfs_path_order(slice->rn_name, &slice->rn_order);
|
|
if (error == 0)
|
|
slice->rn_order += IMPORT_ORDER_SCAN_OFFSET;
|
|
else
|
|
slice->rn_order = IMPORT_ORDER_DEFAULT;
|
|
|
|
pthread_mutex_lock(lock);
|
|
if (avl_find(*slice_cache, slice, &where)) {
|
|
free(slice->rn_name);
|
|
free(slice);
|
|
} else {
|
|
avl_insert(*slice_cache, slice, where);
|
|
}
|
|
pthread_mutex_unlock(lock);
|
|
}
|
|
|
|
blkid_dev_iterate_end(iter);
|
|
blkid_put_cache(cache);
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Linux persistent device strings for vdev labels
|
|
*
|
|
* based on libudev for consistency with libudev disk add/remove events
|
|
*/
|
|
|
|
typedef struct vdev_dev_strs {
|
|
char vds_devid[128];
|
|
char vds_devphys[128];
|
|
} vdev_dev_strs_t;
|
|
|
|
#ifdef HAVE_LIBUDEV
|
|
|
|
/*
|
|
* Obtain the persistent device id string (describes what)
|
|
*
|
|
* used by ZED vdev matching for auto-{online,expand,replace}
|
|
*/
|
|
int
|
|
zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
|
|
{
|
|
struct udev_list_entry *entry;
|
|
const char *bus;
|
|
char devbyid[MAXPATHLEN];
|
|
|
|
/* The bus based by-id path is preferred */
|
|
bus = udev_device_get_property_value(dev, "ID_BUS");
|
|
|
|
if (bus == NULL) {
|
|
const char *dm_uuid;
|
|
|
|
/*
|
|
* For multipath nodes use the persistent uuid based identifier
|
|
*
|
|
* Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f
|
|
*/
|
|
dm_uuid = udev_device_get_property_value(dev, "DM_UUID");
|
|
if (dm_uuid != NULL) {
|
|
(void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid);
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* For volumes use the persistent /dev/zvol/dataset identifier
|
|
*/
|
|
entry = udev_device_get_devlinks_list_entry(dev);
|
|
while (entry != NULL) {
|
|
const char *name;
|
|
|
|
name = udev_list_entry_get_name(entry);
|
|
if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
|
|
(void) strlcpy(bufptr, name, buflen);
|
|
return (0);
|
|
}
|
|
entry = udev_list_entry_get_next(entry);
|
|
}
|
|
|
|
/*
|
|
* NVME 'by-id' symlinks are similar to bus case
|
|
*/
|
|
struct udev_device *parent;
|
|
|
|
parent = udev_device_get_parent_with_subsystem_devtype(dev,
|
|
"nvme", NULL);
|
|
if (parent != NULL)
|
|
bus = "nvme"; /* continue with bus symlink search */
|
|
else
|
|
return (ENODATA);
|
|
}
|
|
|
|
/*
|
|
* locate the bus specific by-id link
|
|
*/
|
|
(void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus);
|
|
entry = udev_device_get_devlinks_list_entry(dev);
|
|
while (entry != NULL) {
|
|
const char *name;
|
|
|
|
name = udev_list_entry_get_name(entry);
|
|
if (strncmp(name, devbyid, strlen(devbyid)) == 0) {
|
|
name += strlen(DEV_BYID_PATH);
|
|
(void) strlcpy(bufptr, name, buflen);
|
|
return (0);
|
|
}
|
|
entry = udev_list_entry_get_next(entry);
|
|
}
|
|
|
|
return (ENODATA);
|
|
}
|
|
|
|
/*
|
|
* Obtain the persistent physical location string (describes where)
|
|
*
|
|
* used by ZED vdev matching for auto-{online,expand,replace}
|
|
*/
|
|
int
|
|
zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
|
|
{
|
|
const char *physpath = NULL;
|
|
struct udev_list_entry *entry;
|
|
|
|
/*
|
|
* Normal disks use ID_PATH for their physical path.
|
|
*/
|
|
physpath = udev_device_get_property_value(dev, "ID_PATH");
|
|
if (physpath != NULL && strlen(physpath) > 0) {
|
|
(void) strlcpy(bufptr, physpath, buflen);
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Device mapper devices are virtual and don't have a physical
|
|
* path. For them we use ID_VDEV instead, which is setup via the
|
|
* /etc/vdev_id.conf file. ID_VDEV provides a persistent path
|
|
* to a virtual device. If you don't have vdev_id.conf setup,
|
|
* you cannot use multipath autoreplace with device mapper.
|
|
*/
|
|
physpath = udev_device_get_property_value(dev, "ID_VDEV");
|
|
if (physpath != NULL && strlen(physpath) > 0) {
|
|
(void) strlcpy(bufptr, physpath, buflen);
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* For ZFS volumes use the persistent /dev/zvol/dataset identifier
|
|
*/
|
|
entry = udev_device_get_devlinks_list_entry(dev);
|
|
while (entry != NULL) {
|
|
physpath = udev_list_entry_get_name(entry);
|
|
if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
|
|
(void) strlcpy(bufptr, physpath, buflen);
|
|
return (0);
|
|
}
|
|
entry = udev_list_entry_get_next(entry);
|
|
}
|
|
|
|
/*
|
|
* For all other devices fallback to using the by-uuid name.
|
|
*/
|
|
entry = udev_device_get_devlinks_list_entry(dev);
|
|
while (entry != NULL) {
|
|
physpath = udev_list_entry_get_name(entry);
|
|
if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) {
|
|
(void) strlcpy(bufptr, physpath, buflen);
|
|
return (0);
|
|
}
|
|
entry = udev_list_entry_get_next(entry);
|
|
}
|
|
|
|
return (ENODATA);
|
|
}
|
|
|
|
/*
|
|
* A disk is considered a multipath whole disk when:
|
|
* DEVNAME key value has "dm-"
|
|
* DM_NAME key value has "mpath" prefix
|
|
* DM_UUID key exists
|
|
* ID_PART_TABLE_TYPE key does not exist or is not gpt
|
|
*/
|
|
static boolean_t
|
|
udev_mpath_whole_disk(struct udev_device *dev)
|
|
{
|
|
const char *devname, *type, *uuid;
|
|
|
|
devname = udev_device_get_property_value(dev, "DEVNAME");
|
|
type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
|
|
uuid = udev_device_get_property_value(dev, "DM_UUID");
|
|
|
|
if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
|
|
((type == NULL) || (strcmp(type, "gpt") != 0)) &&
|
|
(uuid != NULL)) {
|
|
return (B_TRUE);
|
|
}
|
|
|
|
return (B_FALSE);
|
|
}
|
|
|
|
static int
|
|
udev_device_is_ready(struct udev_device *dev)
|
|
{
|
|
#ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED
|
|
return (udev_device_get_is_initialized(dev));
|
|
#else
|
|
/* wait for DEVLINKS property to be initialized */
|
|
return (udev_device_get_property_value(dev, "DEVLINKS") != NULL);
|
|
#endif
|
|
}
|
|
|
|
#else
|
|
|
|
/* ARGSUSED */
|
|
int
|
|
zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
|
|
{
|
|
return (ENODATA);
|
|
}
|
|
|
|
/* ARGSUSED */
|
|
int
|
|
zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
|
|
{
|
|
return (ENODATA);
|
|
}
|
|
|
|
#endif /* HAVE_LIBUDEV */
|
|
|
|
/*
|
|
* Wait up to timeout_ms for udev to set up the device node. The device is
|
|
* considered ready when libudev determines it has been initialized, all of
|
|
* the device links have been verified to exist, and it has been allowed to
|
|
* settle. At this point the device the device can be accessed reliably.
|
|
* Depending on the complexity of the udev rules this process could take
|
|
* several seconds.
|
|
*/
|
|
int
|
|
zpool_label_disk_wait(const char *path, int timeout_ms)
|
|
{
|
|
#ifdef HAVE_LIBUDEV
|
|
struct udev *udev;
|
|
struct udev_device *dev = NULL;
|
|
char nodepath[MAXPATHLEN];
|
|
char *sysname = NULL;
|
|
int ret = ENODEV;
|
|
int settle_ms = 50;
|
|
long sleep_ms = 10;
|
|
hrtime_t start, settle;
|
|
|
|
if ((udev = udev_new()) == NULL)
|
|
return (ENXIO);
|
|
|
|
start = gethrtime();
|
|
settle = 0;
|
|
|
|
do {
|
|
if (sysname == NULL) {
|
|
if (realpath(path, nodepath) != NULL) {
|
|
sysname = strrchr(nodepath, '/') + 1;
|
|
} else {
|
|
(void) usleep(sleep_ms * MILLISEC);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
dev = udev_device_new_from_subsystem_sysname(udev,
|
|
"block", sysname);
|
|
if ((dev != NULL) && udev_device_is_ready(dev)) {
|
|
struct udev_list_entry *links, *link = NULL;
|
|
|
|
ret = 0;
|
|
links = udev_device_get_devlinks_list_entry(dev);
|
|
|
|
udev_list_entry_foreach(link, links) {
|
|
struct stat64 statbuf;
|
|
const char *name;
|
|
|
|
name = udev_list_entry_get_name(link);
|
|
errno = 0;
|
|
if (stat64(name, &statbuf) == 0 && errno == 0)
|
|
continue;
|
|
|
|
settle = 0;
|
|
ret = ENODEV;
|
|
break;
|
|
}
|
|
|
|
if (ret == 0) {
|
|
if (settle == 0) {
|
|
settle = gethrtime();
|
|
} else if (NSEC2MSEC(gethrtime() - settle) >=
|
|
settle_ms) {
|
|
udev_device_unref(dev);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
udev_device_unref(dev);
|
|
(void) usleep(sleep_ms * MILLISEC);
|
|
|
|
} while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
|
|
|
|
udev_unref(udev);
|
|
|
|
return (ret);
|
|
#else
|
|
int settle_ms = 50;
|
|
long sleep_ms = 10;
|
|
hrtime_t start, settle;
|
|
struct stat64 statbuf;
|
|
|
|
start = gethrtime();
|
|
settle = 0;
|
|
|
|
do {
|
|
errno = 0;
|
|
if ((stat64(path, &statbuf) == 0) && (errno == 0)) {
|
|
if (settle == 0)
|
|
settle = gethrtime();
|
|
else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms)
|
|
return (0);
|
|
} else if (errno != ENOENT) {
|
|
return (errno);
|
|
}
|
|
|
|
usleep(sleep_ms * MILLISEC);
|
|
} while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
|
|
|
|
return (ENODEV);
|
|
#endif /* HAVE_LIBUDEV */
|
|
}
|
|
|
|
/*
|
|
* Simplified version of zpool_label_disk_wait() where we wait for a device
|
|
* to appear using the default timeouts.
|
|
*/
|
|
int
|
|
zpool_disk_wait(const char *path)
|
|
{
|
|
int timeout;
|
|
timeout = zpool_getenv_int("ZPOOL_IMPORT_UDEV_TIMEOUT_MS",
|
|
DISK_LABEL_WAIT);
|
|
|
|
return (zpool_label_disk_wait(path, timeout));
|
|
}
|
|
|
|
/*
|
|
* Encode the persistent devices strings
|
|
* used for the vdev disk label
|
|
*/
|
|
static int
|
|
encode_device_strings(const char *path, vdev_dev_strs_t *ds,
|
|
boolean_t wholedisk)
|
|
{
|
|
#ifdef HAVE_LIBUDEV
|
|
struct udev *udev;
|
|
struct udev_device *dev = NULL;
|
|
char nodepath[MAXPATHLEN];
|
|
char *sysname;
|
|
int ret = ENODEV;
|
|
hrtime_t start;
|
|
|
|
if ((udev = udev_new()) == NULL)
|
|
return (ENXIO);
|
|
|
|
/* resolve path to a runtime device node instance */
|
|
if (realpath(path, nodepath) == NULL)
|
|
goto no_dev;
|
|
|
|
sysname = strrchr(nodepath, '/') + 1;
|
|
|
|
/*
|
|
* Wait up to 3 seconds for udev to set up the device node context
|
|
*/
|
|
start = gethrtime();
|
|
do {
|
|
dev = udev_device_new_from_subsystem_sysname(udev, "block",
|
|
sysname);
|
|
if (dev == NULL)
|
|
goto no_dev;
|
|
if (udev_device_is_ready(dev))
|
|
break; /* udev ready */
|
|
|
|
udev_device_unref(dev);
|
|
dev = NULL;
|
|
|
|
if (NSEC2MSEC(gethrtime() - start) < 10)
|
|
(void) sched_yield(); /* yield/busy wait up to 10ms */
|
|
else
|
|
(void) usleep(10 * MILLISEC);
|
|
|
|
} while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC));
|
|
|
|
if (dev == NULL)
|
|
goto no_dev;
|
|
|
|
/*
|
|
* Only whole disks require extra device strings
|
|
*/
|
|
if (!wholedisk && !udev_mpath_whole_disk(dev))
|
|
goto no_dev;
|
|
|
|
ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid));
|
|
if (ret != 0)
|
|
goto no_dev_ref;
|
|
|
|
/* physical location string (optional) */
|
|
if (zfs_device_get_physical(dev, ds->vds_devphys,
|
|
sizeof (ds->vds_devphys)) != 0) {
|
|
ds->vds_devphys[0] = '\0'; /* empty string --> not available */
|
|
}
|
|
|
|
no_dev_ref:
|
|
udev_device_unref(dev);
|
|
no_dev:
|
|
udev_unref(udev);
|
|
|
|
return (ret);
|
|
#else
|
|
return (ENOENT);
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Rescan the enclosure sysfs path for turning on enclosure LEDs and store it
|
|
* in the nvlist * (if applicable). Like:
|
|
* vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
|
|
*
|
|
* If an old path was in the nvlist, and the rescan can not find a new path,
|
|
* then keep the old path, since the disk may have been removed.
|
|
*
|
|
* path: The vdev path (value from ZPOOL_CONFIG_PATH)
|
|
* key: The nvlist_t name (like ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH)
|
|
*/
|
|
void
|
|
update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path,
|
|
const char *key)
|
|
{
|
|
char *upath, *spath;
|
|
char *oldpath = NULL;
|
|
|
|
(void) nvlist_lookup_string(nv, key, &oldpath);
|
|
|
|
/* Add enclosure sysfs path (if disk is in an enclosure). */
|
|
upath = zfs_get_underlying_path(path);
|
|
spath = zfs_get_enclosure_sysfs_path(upath);
|
|
|
|
if (spath) {
|
|
(void) nvlist_add_string(nv, key, spath);
|
|
} else {
|
|
/*
|
|
* We couldn't dynamically scan the disk's enclosure sysfs path.
|
|
* This could be because the disk went away. If there's an old
|
|
* enclosure sysfs path in the nvlist, then keep using it.
|
|
*/
|
|
if (!oldpath) {
|
|
(void) nvlist_remove_all(nv, key);
|
|
}
|
|
}
|
|
|
|
free(upath);
|
|
free(spath);
|
|
}
|
|
|
|
/*
|
|
* This will get called for each leaf vdev.
|
|
*/
|
|
static int
|
|
sysfs_path_pool_vdev_iter_f(void *hdl_data, nvlist_t *nv, void *data)
|
|
{
|
|
char *path = NULL;
|
|
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
|
|
return (1);
|
|
|
|
/* Rescan our enclosure sysfs path for this vdev */
|
|
update_vdev_config_dev_sysfs_path(nv, path,
|
|
ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Given an nvlist for our pool (with vdev tree), iterate over all the
|
|
* leaf vdevs and update their ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH.
|
|
*/
|
|
void
|
|
update_vdevs_config_dev_sysfs_path(nvlist_t *config)
|
|
{
|
|
nvlist_t *nvroot = NULL;
|
|
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
|
|
&nvroot) == 0);
|
|
for_each_vdev_in_nvlist(nvroot, sysfs_path_pool_vdev_iter_f, NULL);
|
|
}
|
|
|
|
/*
|
|
* Update a leaf vdev's persistent device strings
|
|
*
|
|
* - only applies for a dedicated leaf vdev (aka whole disk)
|
|
* - updated during pool create|add|attach|import
|
|
* - used for matching device matching during auto-{online,expand,replace}
|
|
* - stored in a leaf disk config label (i.e. alongside 'path' NVP)
|
|
* - these strings are currently not used in kernel (i.e. for vdev_disk_open)
|
|
*
|
|
* single device node example:
|
|
* devid: 'scsi-MG03SCA300_350000494a8cb3d67-part1'
|
|
* phys_path: 'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0'
|
|
*
|
|
* multipath device node example:
|
|
* devid: 'dm-uuid-mpath-35000c5006304de3f'
|
|
*
|
|
* We also store the enclosure sysfs path for turning on enclosure LEDs
|
|
* (if applicable):
|
|
* vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
|
|
*/
|
|
void
|
|
update_vdev_config_dev_strs(nvlist_t *nv)
|
|
{
|
|
vdev_dev_strs_t vds;
|
|
char *env, *type, *path;
|
|
uint64_t wholedisk = 0;
|
|
|
|
/*
|
|
* For the benefit of legacy ZFS implementations, allow
|
|
* for opting out of devid strings in the vdev label.
|
|
*
|
|
* example use:
|
|
* env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer
|
|
*
|
|
* explanation:
|
|
* Older OpenZFS implementations had issues when attempting to
|
|
* display pool config VDEV names if a "devid" NVP value is
|
|
* present in the pool's config.
|
|
*
|
|
* For example, a pool that originated on illumos platform would
|
|
* have a devid value in the config and "zpool status" would fail
|
|
* when listing the config.
|
|
*
|
|
* A pool can be stripped of any "devid" values on import or
|
|
* prevented from adding them on zpool create|add by setting
|
|
* ZFS_VDEV_DEVID_OPT_OUT.
|
|
*/
|
|
env = getenv("ZFS_VDEV_DEVID_OPT_OUT");
|
|
if (env && (strtoul(env, NULL, 0) > 0 ||
|
|
!strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) {
|
|
(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
|
|
(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
|
|
return;
|
|
}
|
|
|
|
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 ||
|
|
strcmp(type, VDEV_TYPE_DISK) != 0) {
|
|
return;
|
|
}
|
|
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
|
|
return;
|
|
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
|
|
|
|
/*
|
|
* Update device string values in the config nvlist.
|
|
*/
|
|
if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) {
|
|
(void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid);
|
|
if (vds.vds_devphys[0] != '\0') {
|
|
(void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
|
|
vds.vds_devphys);
|
|
}
|
|
update_vdev_config_dev_sysfs_path(nv, path,
|
|
ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
|
|
} else {
|
|
/* Clear out any stale entries. */
|
|
(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
|
|
(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
|
|
(void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
|
|
}
|
|
}
|