Fix ZED auto-replace for VDEVs using by-id paths
The change is simple -- restore the original code so that the VDEV path is updated when using by-id paths. The more challenging part was to devise a second ZTS test, that would test auto-replace for 'by-id' and help prevent a future regression. With that new test, we can now do an A|B test with , and without, the fix to confirm that auto-replace for by-id paths works. The existing auto-replace test, functional/fault/auto_replace_001_pos, will confirm that we didn't break auto-replace for 'by-vdev' paths. In the original functional/fault/auto_replace_001_pos test, the disk wipe (using dd) was not effective in removing the partitioning since the kernel was never informed of the wipe. Added a call to wipefs(8) so that the kernel is informed and ZED will re-partition the device. Added a validation step that the re-partitioning occurred by confirming that the GPT partition UUID changes. Sponsored-By: OpenDrives Inc. Sponsored-By: Klara Inc. Reviewed-by: Rob Norris <rob.norris@klarasystems.com> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Signed-off-by: Don Brady <don.brady@klarasystems.com> Closes #15363
This commit is contained in:
parent
c0e58995e3
commit
f0f330e121
|
@ -24,6 +24,7 @@
|
||||||
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
|
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2016, 2017, Intel Corporation.
|
* Copyright (c) 2016, 2017, Intel Corporation.
|
||||||
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
||||||
|
* Copyright (c) 2023, Klara Inc.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -204,7 +205,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
||||||
uint64_t is_spare = 0;
|
uint64_t is_spare = 0;
|
||||||
const char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL;
|
const char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL;
|
||||||
char rawpath[PATH_MAX], fullpath[PATH_MAX];
|
char rawpath[PATH_MAX], fullpath[PATH_MAX];
|
||||||
char devpath[PATH_MAX];
|
char pathbuf[PATH_MAX];
|
||||||
int ret;
|
int ret;
|
||||||
int online_flag = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE;
|
int online_flag = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE;
|
||||||
boolean_t is_sd = B_FALSE;
|
boolean_t is_sd = B_FALSE;
|
||||||
|
@ -214,6 +215,11 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
||||||
char **lines = NULL;
|
char **lines = NULL;
|
||||||
int lines_cnt = 0;
|
int lines_cnt = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the persistent path, typically under the '/dev/disk/by-id' or
|
||||||
|
* '/dev/disk/by-vdev' directories. Note that this path can change
|
||||||
|
* when a vdev is replaced with a new disk.
|
||||||
|
*/
|
||||||
if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0)
|
if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -370,15 +376,17 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
||||||
(void) snprintf(rawpath, sizeof (rawpath), "%s%s",
|
(void) snprintf(rawpath, sizeof (rawpath), "%s%s",
|
||||||
is_sd ? DEV_BYVDEV_PATH : DEV_BYPATH_PATH, physpath);
|
is_sd ? DEV_BYVDEV_PATH : DEV_BYPATH_PATH, physpath);
|
||||||
|
|
||||||
if (realpath(rawpath, devpath) == NULL && !is_mpath_wholedisk) {
|
if (realpath(rawpath, pathbuf) == NULL && !is_mpath_wholedisk) {
|
||||||
zed_log_msg(LOG_INFO, " realpath: %s failed (%s)",
|
zed_log_msg(LOG_INFO, " realpath: %s failed (%s)",
|
||||||
rawpath, strerror(errno));
|
rawpath, strerror(errno));
|
||||||
|
|
||||||
(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
|
int err = zpool_vdev_online(zhp, fullpath,
|
||||||
&newstate);
|
ZFS_ONLINE_FORCEFAULT, &newstate);
|
||||||
|
|
||||||
zed_log_msg(LOG_INFO, " zpool_vdev_online: %s FORCEFAULT (%s)",
|
zed_log_msg(LOG_INFO, " zpool_vdev_online: %s FORCEFAULT (%s) "
|
||||||
fullpath, libzfs_error_description(g_zfshdl));
|
"err %d, new state %d",
|
||||||
|
fullpath, libzfs_error_description(g_zfshdl), err,
|
||||||
|
err ? (int)newstate : 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -428,7 +436,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
||||||
* to trigger a ZFS fault for the device (and any hot spare
|
* to trigger a ZFS fault for the device (and any hot spare
|
||||||
* replacement).
|
* replacement).
|
||||||
*/
|
*/
|
||||||
leafname = strrchr(devpath, '/') + 1;
|
leafname = strrchr(pathbuf, '/') + 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this is a request to label a whole disk, then attempt to
|
* If this is a request to label a whole disk, then attempt to
|
||||||
|
@ -436,7 +444,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
||||||
*/
|
*/
|
||||||
if (zpool_prepare_and_label_disk(g_zfshdl, zhp, leafname,
|
if (zpool_prepare_and_label_disk(g_zfshdl, zhp, leafname,
|
||||||
vdev, "autoreplace", &lines, &lines_cnt) != 0) {
|
vdev, "autoreplace", &lines, &lines_cnt) != 0) {
|
||||||
zed_log_msg(LOG_INFO,
|
zed_log_msg(LOG_WARNING,
|
||||||
" zpool_prepare_and_label_disk: could not "
|
" zpool_prepare_and_label_disk: could not "
|
||||||
"label '%s' (%s)", leafname,
|
"label '%s' (%s)", leafname,
|
||||||
libzfs_error_description(g_zfshdl));
|
libzfs_error_description(g_zfshdl));
|
||||||
|
@ -468,7 +476,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
||||||
sizeof (device->pd_physpath));
|
sizeof (device->pd_physpath));
|
||||||
list_insert_tail(&g_device_list, device);
|
list_insert_tail(&g_device_list, device);
|
||||||
|
|
||||||
zed_log_msg(LOG_INFO, " zpool_label_disk: async '%s' (%llu)",
|
zed_log_msg(LOG_NOTICE, " zpool_label_disk: async '%s' (%llu)",
|
||||||
leafname, (u_longlong_t)guid);
|
leafname, (u_longlong_t)guid);
|
||||||
|
|
||||||
return; /* resumes at EC_DEV_ADD.ESC_DISK for partition */
|
return; /* resumes at EC_DEV_ADD.ESC_DISK for partition */
|
||||||
|
@ -491,8 +499,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
||||||
}
|
}
|
||||||
if (!found) {
|
if (!found) {
|
||||||
/* unexpected partition slice encountered */
|
/* unexpected partition slice encountered */
|
||||||
zed_log_msg(LOG_INFO, "labeled disk %s unexpected here",
|
zed_log_msg(LOG_WARNING, "labeled disk %s was "
|
||||||
fullpath);
|
"unexpected here", fullpath);
|
||||||
(void) zpool_vdev_online(zhp, fullpath,
|
(void) zpool_vdev_online(zhp, fullpath,
|
||||||
ZFS_ONLINE_FORCEFAULT, &newstate);
|
ZFS_ONLINE_FORCEFAULT, &newstate);
|
||||||
return;
|
return;
|
||||||
|
@ -501,8 +509,17 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
||||||
zed_log_msg(LOG_INFO, " zpool_label_disk: resume '%s' (%llu)",
|
zed_log_msg(LOG_INFO, " zpool_label_disk: resume '%s' (%llu)",
|
||||||
physpath, (u_longlong_t)guid);
|
physpath, (u_longlong_t)guid);
|
||||||
|
|
||||||
(void) snprintf(devpath, sizeof (devpath), "%s%s",
|
/*
|
||||||
DEV_BYID_PATH, new_devid);
|
* Paths that begin with '/dev/disk/by-id/' will change and so
|
||||||
|
* they must be updated before calling zpool_vdev_attach().
|
||||||
|
*/
|
||||||
|
if (strncmp(path, DEV_BYID_PATH, strlen(DEV_BYID_PATH)) == 0) {
|
||||||
|
(void) snprintf(pathbuf, sizeof (pathbuf), "%s%s",
|
||||||
|
DEV_BYID_PATH, new_devid);
|
||||||
|
zed_log_msg(LOG_INFO, " zpool_label_disk: path '%s' "
|
||||||
|
"replaced by '%s'", path, pathbuf);
|
||||||
|
path = pathbuf;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
libzfs_free_str_array(lines, lines_cnt);
|
libzfs_free_str_array(lines, lines_cnt);
|
||||||
|
@ -545,9 +562,11 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
||||||
* Wait for udev to verify the links exist, then auto-replace
|
* Wait for udev to verify the links exist, then auto-replace
|
||||||
* the leaf disk at same physical location.
|
* the leaf disk at same physical location.
|
||||||
*/
|
*/
|
||||||
if (zpool_label_disk_wait(path, 3000) != 0) {
|
if (zpool_label_disk_wait(path, DISK_LABEL_WAIT) != 0) {
|
||||||
zed_log_msg(LOG_WARNING, "zfs_mod: expected replacement "
|
zed_log_msg(LOG_WARNING, "zfs_mod: pool '%s', after labeling "
|
||||||
"disk %s is missing", path);
|
"replacement disk, the expected disk partition link '%s' "
|
||||||
|
"is missing after waiting %u ms",
|
||||||
|
zpool_get_name(zhp), path, DISK_LABEL_WAIT);
|
||||||
nvlist_free(nvroot);
|
nvlist_free(nvroot);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -562,7 +581,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
||||||
B_TRUE, B_FALSE);
|
B_TRUE, B_FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
zed_log_msg(LOG_INFO, " zpool_vdev_replace: %s with %s (%s)",
|
zed_log_msg(LOG_WARNING, " zpool_vdev_replace: %s with %s (%s)",
|
||||||
fullpath, path, (ret == 0) ? "no errors" :
|
fullpath, path, (ret == 0) ? "no errors" :
|
||||||
libzfs_error_description(g_zfshdl));
|
libzfs_error_description(g_zfshdl));
|
||||||
|
|
||||||
|
@ -660,7 +679,7 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
|
||||||
dp->dd_prop, path);
|
dp->dd_prop, path);
|
||||||
dp->dd_found = B_TRUE;
|
dp->dd_found = B_TRUE;
|
||||||
|
|
||||||
/* pass the new devid for use by replacing code */
|
/* pass the new devid for use by auto-replacing code */
|
||||||
if (dp->dd_new_devid != NULL) {
|
if (dp->dd_new_devid != NULL) {
|
||||||
(void) nvlist_add_string(nvl, "new_devid",
|
(void) nvlist_add_string(nvl, "new_devid",
|
||||||
dp->dd_new_devid);
|
dp->dd_new_devid);
|
||||||
|
|
|
@ -34,7 +34,7 @@ extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Default wait time for a device name to be created.
|
* Default wait time in milliseconds for a device name to be created.
|
||||||
*/
|
*/
|
||||||
#define DISK_LABEL_WAIT (30 * 1000) /* 30 seconds */
|
#define DISK_LABEL_WAIT (30 * 1000) /* 30 seconds */
|
||||||
|
|
||||||
|
|
|
@ -582,9 +582,8 @@ zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
|
||||||
* Wait up to timeout_ms for udev to set up the device node. The device is
|
* Wait up to timeout_ms for udev to set up the device node. The device is
|
||||||
* considered ready when libudev determines it has been initialized, all of
|
* considered ready when libudev determines it has been initialized, all of
|
||||||
* the device links have been verified to exist, and it has been allowed to
|
* the device links have been verified to exist, and it has been allowed to
|
||||||
* settle. At this point the device the device can be accessed reliably.
|
* settle. At this point the device can be accessed reliably. Depending on
|
||||||
* Depending on the complexity of the udev rules this process could take
|
* the complexity of the udev rules this process could take several seconds.
|
||||||
* several seconds.
|
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
zpool_label_disk_wait(const char *path, int timeout_ms)
|
zpool_label_disk_wait(const char *path, int timeout_ms)
|
||||||
|
|
|
@ -122,10 +122,10 @@ tags = ['functional', 'fallocate']
|
||||||
|
|
||||||
[tests/functional/fault:Linux]
|
[tests/functional/fault:Linux]
|
||||||
tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
|
tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
|
||||||
'auto_replace_001_pos', 'auto_spare_001_pos', 'auto_spare_002_pos',
|
'auto_replace_001_pos', 'auto_replace_002_pos', 'auto_spare_001_pos',
|
||||||
'auto_spare_multiple', 'auto_spare_ashift', 'auto_spare_shared',
|
'auto_spare_002_pos', 'auto_spare_multiple', 'auto_spare_ashift',
|
||||||
'decrypt_fault', 'decompress_fault', 'scrub_after_resilver',
|
'auto_spare_shared', 'decrypt_fault', 'decompress_fault',
|
||||||
'zpool_status_-s']
|
'scrub_after_resilver', 'zpool_status_-s']
|
||||||
tags = ['functional', 'fault']
|
tags = ['functional', 'fault']
|
||||||
|
|
||||||
[tests/functional/features/large_dnode:Linux]
|
[tests/functional/features/large_dnode:Linux]
|
||||||
|
|
|
@ -328,6 +328,7 @@ if os.environ.get('CI') == 'true':
|
||||||
'fault/auto_online_001_pos': ['SKIP', ci_reason],
|
'fault/auto_online_001_pos': ['SKIP', ci_reason],
|
||||||
'fault/auto_online_002_pos': ['SKIP', ci_reason],
|
'fault/auto_online_002_pos': ['SKIP', ci_reason],
|
||||||
'fault/auto_replace_001_pos': ['SKIP', ci_reason],
|
'fault/auto_replace_001_pos': ['SKIP', ci_reason],
|
||||||
|
'fault/auto_replace_002_pos': ['SKIP', ci_reason],
|
||||||
'fault/auto_spare_ashift': ['SKIP', ci_reason],
|
'fault/auto_spare_ashift': ['SKIP', ci_reason],
|
||||||
'fault/auto_spare_shared': ['SKIP', ci_reason],
|
'fault/auto_spare_shared': ['SKIP', ci_reason],
|
||||||
'procfs/pool_state': ['SKIP', ci_reason],
|
'procfs/pool_state': ['SKIP', ci_reason],
|
||||||
|
|
|
@ -130,12 +130,14 @@ export SYSTEM_FILES_LINUX='attr
|
||||||
chattr
|
chattr
|
||||||
exportfs
|
exportfs
|
||||||
fallocate
|
fallocate
|
||||||
|
flock
|
||||||
free
|
free
|
||||||
getfattr
|
getfattr
|
||||||
groupadd
|
groupadd
|
||||||
groupdel
|
groupdel
|
||||||
groupmod
|
groupmod
|
||||||
hostid
|
hostid
|
||||||
|
logger
|
||||||
losetup
|
losetup
|
||||||
lsattr
|
lsattr
|
||||||
lsblk
|
lsblk
|
||||||
|
@ -145,21 +147,20 @@ export SYSTEM_FILES_LINUX='attr
|
||||||
md5sum
|
md5sum
|
||||||
mkswap
|
mkswap
|
||||||
modprobe
|
modprobe
|
||||||
|
mountpoint
|
||||||
mpstat
|
mpstat
|
||||||
nsenter
|
nsenter
|
||||||
parted
|
parted
|
||||||
perf
|
perf
|
||||||
setfattr
|
setfattr
|
||||||
|
setpriv
|
||||||
sha256sum
|
sha256sum
|
||||||
udevadm
|
udevadm
|
||||||
unshare
|
unshare
|
||||||
useradd
|
useradd
|
||||||
userdel
|
userdel
|
||||||
usermod
|
usermod
|
||||||
setpriv
|
wipefs'
|
||||||
mountpoint
|
|
||||||
flock
|
|
||||||
logger'
|
|
||||||
|
|
||||||
export ZFS_FILES='zdb
|
export ZFS_FILES='zdb
|
||||||
zfs
|
zfs
|
||||||
|
|
|
@ -1431,6 +1431,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||||
functional/fault/auto_online_001_pos.ksh \
|
functional/fault/auto_online_001_pos.ksh \
|
||||||
functional/fault/auto_online_002_pos.ksh \
|
functional/fault/auto_online_002_pos.ksh \
|
||||||
functional/fault/auto_replace_001_pos.ksh \
|
functional/fault/auto_replace_001_pos.ksh \
|
||||||
|
functional/fault/auto_replace_002_pos.ksh \
|
||||||
functional/fault/auto_spare_001_pos.ksh \
|
functional/fault/auto_spare_001_pos.ksh \
|
||||||
functional/fault/auto_spare_002_pos.ksh \
|
functional/fault/auto_spare_002_pos.ksh \
|
||||||
functional/fault/auto_spare_ashift.ksh \
|
functional/fault/auto_spare_ashift.ksh \
|
||||||
|
|
|
@ -34,13 +34,14 @@
|
||||||
# 1. Update /etc/zfs/vdev_id.conf with scsidebug alias for a persistent path.
|
# 1. Update /etc/zfs/vdev_id.conf with scsidebug alias for a persistent path.
|
||||||
# This creates keys ID_VDEV and ID_VDEV_PATH and set phys_path="scsidebug".
|
# This creates keys ID_VDEV and ID_VDEV_PATH and set phys_path="scsidebug".
|
||||||
# 2. Create a pool and set autoreplace=on (auto-replace is opt-in)
|
# 2. Create a pool and set autoreplace=on (auto-replace is opt-in)
|
||||||
# 3. Export a pool
|
# 3. Export the pool
|
||||||
# 4. Wipe and offline the scsi_debug disk
|
# 4. Wipe and offline the scsi_debug disk
|
||||||
# 5. Import pool with missing disk
|
# 5. Import the pool with missing disk
|
||||||
# 6. Re-online the wiped scsi_debug disk
|
# 6. Re-online the wiped scsi_debug disk
|
||||||
# 7. Verify the ZED detects the new unused disk and adds it back to the pool
|
# 7. Verify ZED detects the new blank disk and replaces the missing vdev
|
||||||
|
# 8. Verify that the scsi_debug disk was re-partitioned
|
||||||
#
|
#
|
||||||
# Creates a raidz1 zpool using persistent disk path names
|
# Creates a raidz1 zpool using persistent /dev/disk/by-vdev path names
|
||||||
# (ie not /dev/sdc)
|
# (ie not /dev/sdc)
|
||||||
#
|
#
|
||||||
# Auto-replace is opt in, and matches by phys_path.
|
# Auto-replace is opt in, and matches by phys_path.
|
||||||
|
@ -83,11 +84,27 @@ log_must zpool create -f $TESTPOOL raidz1 $SD_DEVICE $DISK1 $DISK2 $DISK3
|
||||||
log_must zpool set autoreplace=on $TESTPOOL
|
log_must zpool set autoreplace=on $TESTPOOL
|
||||||
|
|
||||||
# Add some data to the pool
|
# Add some data to the pool
|
||||||
log_must mkfile $FSIZE /$TESTPOOL/data
|
log_must zfs create $TESTPOOL/fs
|
||||||
|
log_must fill_fs /$TESTPOOL/fs 4 100 4096 512 Z
|
||||||
log_must zpool export $TESTPOOL
|
log_must zpool export $TESTPOOL
|
||||||
|
|
||||||
|
# Record the partition UUID for later comparison
|
||||||
|
part_uuid=$(udevadm info --query=property --property=ID_PART_TABLE_UUID \
|
||||||
|
--value /dev/disk/by-id/$SD_DEVICE_ID)
|
||||||
|
[[ -z "$part_uuid" ]] || log_note original disk GPT uuid ${part_uuid}
|
||||||
|
|
||||||
|
#
|
||||||
# Wipe and offline the disk
|
# Wipe and offline the disk
|
||||||
|
#
|
||||||
|
# Note that it is not enough to zero the disk to expunge the partitions.
|
||||||
|
# You also need to inform the kernel (e.g., 'hdparm -z' or 'partprobe').
|
||||||
|
#
|
||||||
|
# Using partprobe is overkill and hdparm is not as common as wipefs. So
|
||||||
|
# we use wipefs which lets the kernel know the partition was removed
|
||||||
|
# from the device (i.e., calls BLKRRPART ioctl).
|
||||||
|
#
|
||||||
log_must dd if=/dev/zero of=/dev/disk/by-id/$SD_DEVICE_ID bs=1M count=$SDSIZE
|
log_must dd if=/dev/zero of=/dev/disk/by-id/$SD_DEVICE_ID bs=1M count=$SDSIZE
|
||||||
|
log_must /usr/sbin/wipefs -a /dev/disk/by-id/$SD_DEVICE_ID
|
||||||
remove_disk $SD
|
remove_disk $SD
|
||||||
block_device_wait
|
block_device_wait
|
||||||
|
|
||||||
|
@ -106,4 +123,18 @@ log_must wait_replacing $TESTPOOL 60
|
||||||
# Validate auto-replace was successful
|
# Validate auto-replace was successful
|
||||||
log_must check_state $TESTPOOL "" "ONLINE"
|
log_must check_state $TESTPOOL "" "ONLINE"
|
||||||
|
|
||||||
|
#
|
||||||
|
# Confirm the partition UUID changed so we know the new disk was relabeled
|
||||||
|
#
|
||||||
|
# Note: some older versions of udevadm don't support "--property" option so
|
||||||
|
# we'll # skip this test when it is not supported
|
||||||
|
#
|
||||||
|
if [ ! -z "$part_uuid" ]; then
|
||||||
|
new_uuid=$(udevadm info --query=property --property=ID_PART_TABLE_UUID \
|
||||||
|
--value /dev/disk/by-id/$SD_DEVICE_ID)
|
||||||
|
log_note new disk GPT uuid ${new_uuid}
|
||||||
|
[[ "$part_uuid" = "$new_uuid" ]] && \
|
||||||
|
log_fail "The new disk was not relabeled as expected"
|
||||||
|
fi
|
||||||
|
|
||||||
log_pass "Auto-replace test successful"
|
log_pass "Auto-replace test successful"
|
||||||
|
|
|
@ -0,0 +1,192 @@
|
||||||
|
#!/bin/ksh -p
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the terms of the
|
||||||
|
# Common Development and Distribution License (the "License").
|
||||||
|
# You may not use this file except in compliance with the License.
|
||||||
|
#
|
||||||
|
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
# or https://opensource.org/licenses/CDDL-1.0.
|
||||||
|
# See the License for the specific language governing permissions
|
||||||
|
# and limitations under the License.
|
||||||
|
#
|
||||||
|
# When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
# If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Copyright (c) 2017 by Intel Corporation. All rights reserved.
|
||||||
|
# Copyright (c) 2023 by Klara, Inc. All rights reserved.
|
||||||
|
#
|
||||||
|
|
||||||
|
. $STF_SUITE/include/libtest.shlib
|
||||||
|
. $STF_SUITE/tests/functional/fault/fault.cfg
|
||||||
|
|
||||||
|
#
|
||||||
|
# DESCRIPTION:
|
||||||
|
# Testing Fault Management Agent ZED Logic - Automated Auto-Replace Test.
|
||||||
|
# Verifys that auto-replace works with by-id paths.
|
||||||
|
#
|
||||||
|
# STRATEGY:
|
||||||
|
# 1. Update /etc/zfs/vdev_id.conf with scsidebug alias for a persistent path.
|
||||||
|
# This creates keys ID_VDEV and ID_VDEV_PATH and set phys_path="scsidebug".
|
||||||
|
# 2. Create a pool and set autoreplace=on (auto-replace is opt-in)
|
||||||
|
# 3. Export the pool
|
||||||
|
# 4. Wipe and offline the scsi_debug disk
|
||||||
|
# 5. Import the pool with missing disk
|
||||||
|
# 6. Re-online the wiped scsi_debug disk with a new serial number
|
||||||
|
# 7. Verify ZED detects the new blank disk and replaces the missing vdev
|
||||||
|
# 8. Verify that the scsi_debug disk was re-partitioned
|
||||||
|
#
|
||||||
|
# Creates a raidz1 zpool using persistent /dev/disk/by-id path names
|
||||||
|
#
|
||||||
|
# Auto-replace is opt in, and matches by phys_path.
|
||||||
|
#
|
||||||
|
|
||||||
|
verify_runnable "both"
|
||||||
|
|
||||||
|
if ! is_physical_device $DISKS; then
|
||||||
|
log_unsupported "Unsupported disks for this test."
|
||||||
|
fi
|
||||||
|
|
||||||
|
function cleanup
|
||||||
|
{
|
||||||
|
zpool status $TESTPOOL
|
||||||
|
destroy_pool $TESTPOOL
|
||||||
|
sed -i '/alias scsidebug/d' $VDEVID_CONF
|
||||||
|
unload_scsi_debug
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Wait until a vdev transitions to its replacement vdev
|
||||||
|
#
|
||||||
|
# Return 0 when vdev reaches expected state, 1 on timeout.
|
||||||
|
#
|
||||||
|
# Note: index +2 is to skip over root and raidz-0 vdevs
|
||||||
|
#
|
||||||
|
function wait_vdev_online # pool index oldguid timeout
|
||||||
|
{
|
||||||
|
typeset pool=$1
|
||||||
|
typeset -i index=$2+2
|
||||||
|
typeset guid=$3
|
||||||
|
typeset timeout=${4:-60}
|
||||||
|
typeset -i i=0
|
||||||
|
|
||||||
|
while [[ $i -lt $timeout ]]; do
|
||||||
|
vdev_guids=( $(zpool get -H -o value guid $pool all-vdevs) )
|
||||||
|
|
||||||
|
if [ "${vdev_guids[$index]}" != "${guid}" ]; then
|
||||||
|
log_note "new vdev[$((index-2))]: ${vdev_guids[$index]}, replacing ${guid}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
i=$((i+1))
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
log_assert "automated auto-replace with by-id paths"
|
||||||
|
log_onexit cleanup
|
||||||
|
|
||||||
|
load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b'
|
||||||
|
SD=$(get_debug_device)
|
||||||
|
SD_DEVICE_ID=$(get_persistent_disk_name $SD)
|
||||||
|
SD_HOST=$(get_scsi_host $SD)
|
||||||
|
|
||||||
|
# Register vdev_id alias for scsi_debug device to create a persistent path
|
||||||
|
echo "alias scsidebug /dev/disk/by-id/$SD_DEVICE_ID" >>$VDEVID_CONF
|
||||||
|
block_device_wait
|
||||||
|
|
||||||
|
SD_DEVICE=$(udevadm info -q all -n $DEV_DSKDIR/$SD | \
|
||||||
|
awk -F'=' '/ID_VDEV=/ {print $2; exit}')
|
||||||
|
[ -z $SD_DEVICE ] && log_fail "vdev rule was not registered properly"
|
||||||
|
|
||||||
|
log_must zpool events -c
|
||||||
|
log_must zpool create -f $TESTPOOL raidz1 $SD_DEVICE_ID $DISK1 $DISK2 $DISK3
|
||||||
|
|
||||||
|
vdev_guid=$(zpool get guid -H -o value $TESTPOOL $SD_DEVICE_ID)
|
||||||
|
log_note original vdev guid ${vdev_guid}
|
||||||
|
|
||||||
|
# Auto-replace is opt-in so need to set property
|
||||||
|
log_must zpool set autoreplace=on $TESTPOOL
|
||||||
|
|
||||||
|
# Add some data to the pool
|
||||||
|
log_must zfs create $TESTPOOL/fs
|
||||||
|
log_must fill_fs /$TESTPOOL/fs 4 100 4096 512 Z
|
||||||
|
log_must zpool export $TESTPOOL
|
||||||
|
|
||||||
|
# Record the partition UUID for later comparison
|
||||||
|
part_uuid=$(udevadm info --query=property --property=ID_PART_TABLE_UUID \
|
||||||
|
--value /dev/disk/by-id/$SD_DEVICE_ID)
|
||||||
|
[[ -z "$part_uuid" ]] || log_note original disk GPT uuid ${part_uuid}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Wipe and offline the disk
|
||||||
|
#
|
||||||
|
# Note that it is not enough to zero the disk to expunge the partitions.
|
||||||
|
# You also need to inform the kernel (e.g., 'hdparm -z' or 'partprobe').
|
||||||
|
#
|
||||||
|
# Using partprobe is overkill and hdparm is not as common as wipefs. So
|
||||||
|
# we use wipefs which lets the kernel know the partition was removed
|
||||||
|
# from the device (i.e., calls BLKRRPART ioctl).
|
||||||
|
#
|
||||||
|
log_must dd if=/dev/zero of=/dev/disk/by-id/$SD_DEVICE_ID bs=1M count=$SDSIZE
|
||||||
|
log_must /usr/sbin/wipefs -a /dev/disk/by-id/$SD_DEVICE_ID
|
||||||
|
remove_disk $SD
|
||||||
|
block_device_wait
|
||||||
|
|
||||||
|
# Re-import pool with drive missing
|
||||||
|
log_must zpool import $TESTPOOL
|
||||||
|
log_must check_state $TESTPOOL "" "DEGRADED"
|
||||||
|
block_device_wait
|
||||||
|
|
||||||
|
#
|
||||||
|
# Online an empty disk in the same physical location, with a different by-id
|
||||||
|
# symlink. We use vpd_use_hostno to make sure the underlying serial number
|
||||||
|
# changes for the new disk which in turn gives us a different by-id path.
|
||||||
|
#
|
||||||
|
# The original names were something like:
|
||||||
|
# /dev/disk/by-id/scsi-SLinux_scsi_debug_16000-part1
|
||||||
|
# /dev/disk/by-id/wwn-0x33333330000007d0-part1
|
||||||
|
#
|
||||||
|
# This new inserted disk, will have different links like:
|
||||||
|
# /dev/disk/by-id/scsi-SLinux_scsi_debug_2000-part1
|
||||||
|
# /dev/disk/by-id/wwn-0x0x3333333000003e80 -part1
|
||||||
|
#
|
||||||
|
echo '0' > /sys/bus/pseudo/drivers/scsi_debug/vpd_use_hostno
|
||||||
|
|
||||||
|
insert_disk $SD $SD_HOST
|
||||||
|
|
||||||
|
# make sure the physical path points to the same scsi-debug device
|
||||||
|
SD_DEVICE_ID=$(get_persistent_disk_name $SD)
|
||||||
|
echo "alias scsidebug /dev/disk/by-id/$SD_DEVICE_ID" >>$VDEVID_CONF
|
||||||
|
block_device_wait
|
||||||
|
|
||||||
|
# Wait for the new disk to be online and replaced
|
||||||
|
log_must wait_vdev_online $TESTPOOL 0 $vdev_guid 45
|
||||||
|
log_must wait_replacing $TESTPOOL 45
|
||||||
|
|
||||||
|
# Validate auto-replace was successful
|
||||||
|
log_must check_state $TESTPOOL "" "ONLINE"
|
||||||
|
|
||||||
|
#
|
||||||
|
# Confirm the partition UUID changed so we know the new disk was relabeled
|
||||||
|
#
|
||||||
|
# Note: some older versions of udevadm don't support "--property" option so
|
||||||
|
# we'll # skip this test when it is not supported
|
||||||
|
#
|
||||||
|
if [ ! -z "$part_uuid" ]; then
|
||||||
|
new_uuid=$(udevadm info --query=property --property=ID_PART_TABLE_UUID \
|
||||||
|
--value /dev/disk/by-id/$SD_DEVICE_ID)
|
||||||
|
log_note new disk GPT uuid ${new_uuid}
|
||||||
|
[[ "$part_uuid" = "$new_uuid" ]] && \
|
||||||
|
log_fail "The new disk was not relabeled as expected"
|
||||||
|
fi
|
||||||
|
|
||||||
|
log_pass "automated auto-replace with by-id paths"
|
Loading…
Reference in New Issue