ZED: Match added disk by pool/vdev GUID if found (#12217)
This enables ZED to auto-online vdevs that are not wholedisk managed by ZFS. Signed-off-by: Ryan Moeller <ryan@iXsystems.com> Reviewed-by: Don Brady <don.brady@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Tony Hutter <hutter2@llnl.gov>
This commit is contained in:
parent
f3969ea78b
commit
d6c2b89032
|
@ -640,6 +640,27 @@ devid_iter(const char *devid, zfs_process_func_t func, boolean_t is_slice)
|
||||||
return (data.dd_found);
|
return (data.dd_found);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Given a device guid, find any vdevs with a matching guid.
|
||||||
|
*/
|
||||||
|
static boolean_t
|
||||||
|
guid_iter(uint64_t pool_guid, uint64_t vdev_guid, const char *devid,
|
||||||
|
zfs_process_func_t func, boolean_t is_slice)
|
||||||
|
{
|
||||||
|
dev_data_t data = { 0 };
|
||||||
|
|
||||||
|
data.dd_func = func;
|
||||||
|
data.dd_found = B_FALSE;
|
||||||
|
data.dd_pool_guid = pool_guid;
|
||||||
|
data.dd_vdev_guid = vdev_guid;
|
||||||
|
data.dd_islabeled = is_slice;
|
||||||
|
data.dd_new_devid = devid;
|
||||||
|
|
||||||
|
(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
|
||||||
|
|
||||||
|
return (data.dd_found);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Handle a EC_DEV_ADD.ESC_DISK event.
|
* Handle a EC_DEV_ADD.ESC_DISK event.
|
||||||
*
|
*
|
||||||
|
@ -663,15 +684,18 @@ static int
|
||||||
zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
|
zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
|
||||||
{
|
{
|
||||||
char *devpath = NULL, *devid;
|
char *devpath = NULL, *devid;
|
||||||
|
uint64_t pool_guid = 0, vdev_guid = 0;
|
||||||
boolean_t is_slice;
|
boolean_t is_slice;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Expecting a devid string and an optional physical location
|
* Expecting a devid string and an optional physical location and guid
|
||||||
*/
|
*/
|
||||||
if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid) != 0)
|
if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid) != 0)
|
||||||
return (-1);
|
return (-1);
|
||||||
|
|
||||||
(void) nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath);
|
(void) nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath);
|
||||||
|
(void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
|
||||||
|
(void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
|
||||||
|
|
||||||
is_slice = (nvlist_lookup_boolean(nvl, DEV_IS_PART) == 0);
|
is_slice = (nvlist_lookup_boolean(nvl, DEV_IS_PART) == 0);
|
||||||
|
|
||||||
|
@ -682,12 +706,16 @@ zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
|
||||||
* Iterate over all vdevs looking for a match in the following order:
|
* Iterate over all vdevs looking for a match in the following order:
|
||||||
* 1. ZPOOL_CONFIG_DEVID (identifies the unique disk)
|
* 1. ZPOOL_CONFIG_DEVID (identifies the unique disk)
|
||||||
* 2. ZPOOL_CONFIG_PHYS_PATH (identifies disk physical location).
|
* 2. ZPOOL_CONFIG_PHYS_PATH (identifies disk physical location).
|
||||||
*
|
* 3. ZPOOL_CONFIG_GUID (identifies unique vdev).
|
||||||
* For disks, we only want to pay attention to vdevs marked as whole
|
|
||||||
* disks or are a multipath device.
|
|
||||||
*/
|
*/
|
||||||
if (!devid_iter(devid, zfs_process_add, is_slice) && devpath != NULL)
|
if (devid_iter(devid, zfs_process_add, is_slice))
|
||||||
(void) devphys_iter(devpath, devid, zfs_process_add, is_slice);
|
return (0);
|
||||||
|
if (devpath != NULL && devphys_iter(devpath, devid, zfs_process_add,
|
||||||
|
is_slice))
|
||||||
|
return (0);
|
||||||
|
if (vdev_guid != 0)
|
||||||
|
(void) guid_iter(pool_guid, vdev_guid, devid, zfs_process_add,
|
||||||
|
is_slice);
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -72,6 +72,8 @@ zed_udev_event(const char *class, const char *subclass, nvlist_t *nvl)
|
||||||
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PATH, strval);
|
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PATH, strval);
|
||||||
if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &strval) == 0)
|
if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &strval) == 0)
|
||||||
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_IDENTIFIER, strval);
|
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_IDENTIFIER, strval);
|
||||||
|
if (nvlist_lookup_boolean(nvl, DEV_IS_PART) == B_TRUE)
|
||||||
|
zed_log_msg(LOG_INFO, "\t%s: B_TRUE", DEV_IS_PART);
|
||||||
if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &strval) == 0)
|
if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &strval) == 0)
|
||||||
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval);
|
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval);
|
||||||
if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0)
|
if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0)
|
||||||
|
|
|
@ -98,10 +98,11 @@ tests = ['fallocate_prealloc', 'fallocate_punch-hole']
|
||||||
tags = ['functional', 'fallocate']
|
tags = ['functional', 'fallocate']
|
||||||
|
|
||||||
[tests/functional/fault:Linux]
|
[tests/functional/fault:Linux]
|
||||||
tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_replace_001_pos',
|
tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
|
||||||
'auto_spare_001_pos', 'auto_spare_002_pos', 'auto_spare_multiple',
|
'auto_replace_001_pos', 'auto_spare_001_pos', 'auto_spare_002_pos',
|
||||||
'auto_spare_ashift', 'auto_spare_shared', 'decrypt_fault',
|
'auto_spare_multiple', 'auto_spare_ashift', 'auto_spare_shared',
|
||||||
'decompress_fault', 'scrub_after_resilver', 'zpool_status_-s']
|
'decrypt_fault', 'decompress_fault', 'scrub_after_resilver',
|
||||||
|
'zpool_status_-s']
|
||||||
tags = ['functional', 'fault']
|
tags = ['functional', 'fault']
|
||||||
|
|
||||||
[tests/functional/features/large_dnode:Linux]
|
[tests/functional/features/large_dnode:Linux]
|
||||||
|
|
|
@ -323,6 +323,7 @@ if os.environ.get('CI') == 'true':
|
||||||
'cli_root/zpool_split/zpool_split_wholedisk': ['SKIP', ci_reason],
|
'cli_root/zpool_split/zpool_split_wholedisk': ['SKIP', ci_reason],
|
||||||
'fault/auto_offline_001_pos': ['SKIP', ci_reason],
|
'fault/auto_offline_001_pos': ['SKIP', ci_reason],
|
||||||
'fault/auto_online_001_pos': ['SKIP', ci_reason],
|
'fault/auto_online_001_pos': ['SKIP', ci_reason],
|
||||||
|
'fault/auto_online_002_pos': ['SKIP', ci_reason],
|
||||||
'fault/auto_replace_001_pos': ['SKIP', ci_reason],
|
'fault/auto_replace_001_pos': ['SKIP', ci_reason],
|
||||||
'fault/auto_spare_ashift': ['SKIP', ci_reason],
|
'fault/auto_spare_ashift': ['SKIP', ci_reason],
|
||||||
'fault/auto_spare_shared': ['SKIP', ci_reason],
|
'fault/auto_spare_shared': ['SKIP', ci_reason],
|
||||||
|
|
|
@ -4,6 +4,7 @@ dist_pkgdata_SCRIPTS = \
|
||||||
cleanup.ksh \
|
cleanup.ksh \
|
||||||
auto_offline_001_pos.ksh \
|
auto_offline_001_pos.ksh \
|
||||||
auto_online_001_pos.ksh \
|
auto_online_001_pos.ksh \
|
||||||
|
auto_online_002_pos.ksh \
|
||||||
auto_replace_001_pos.ksh \
|
auto_replace_001_pos.ksh \
|
||||||
auto_spare_001_pos.ksh \
|
auto_spare_001_pos.ksh \
|
||||||
auto_spare_002_pos.ksh \
|
auto_spare_002_pos.ksh \
|
||||||
|
|
|
@ -0,0 +1,94 @@
|
||||||
|
#!/bin/ksh -p
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the terms of the
|
||||||
|
# Common Development and Distribution License (the "License").
|
||||||
|
# You may not use this file except in compliance with the License.
|
||||||
|
#
|
||||||
|
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
# or http://www.opensolaris.org/os/licensing.
|
||||||
|
# See the License for the specific language governing permissions
|
||||||
|
# and limitations under the License.
|
||||||
|
#
|
||||||
|
# When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
# If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
# Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved.
|
||||||
|
# Copyright (c) 2019 by Delphix. All rights reserved.
|
||||||
|
# Portions Copyright 2021 iXsystems, Inc.
|
||||||
|
#
|
||||||
|
|
||||||
|
. $STF_SUITE/include/libtest.shlib
|
||||||
|
. $STF_SUITE/tests/functional/fault/fault.cfg
|
||||||
|
|
||||||
|
#
|
||||||
|
# DESCRIPTION:
|
||||||
|
# Testing Fault Management Agent ZED Logic - Automated Auto-Online Test.
|
||||||
|
# Now with partitioned vdevs.
|
||||||
|
#
|
||||||
|
# STRATEGY:
|
||||||
|
# 1. Partition a scsi_debug device for simulating removal
|
||||||
|
# 2. Create a pool
|
||||||
|
# 3. Offline disk
|
||||||
|
# 4. ZED polls for an event change for online disk to be automatically
|
||||||
|
# added back to the pool.
|
||||||
|
#
|
||||||
|
verify_runnable "both"
|
||||||
|
|
||||||
|
function cleanup
|
||||||
|
{
|
||||||
|
poolexists ${TESTPOOL} && destroy_pool ${TESTPOOL}
|
||||||
|
unload_scsi_debug
|
||||||
|
}
|
||||||
|
|
||||||
|
log_assert "Testing automated auto-online FMA test with partitioned vdev"
|
||||||
|
|
||||||
|
log_onexit cleanup
|
||||||
|
|
||||||
|
load_scsi_debug ${SDSIZE} ${SDHOSTS} ${SDTGTS} ${SDLUNS} '512b'
|
||||||
|
SDDEVICE=$(get_debug_device)
|
||||||
|
zpool labelclear -f ${SDDEVICE}
|
||||||
|
partition_disk ${SDSIZE} ${SDDEVICE} 1
|
||||||
|
part=${SDDEVICE}1
|
||||||
|
host=$(get_scsi_host ${SDDEVICE})
|
||||||
|
|
||||||
|
block_device_wait /dev/${part}
|
||||||
|
log_must zpool create -f ${TESTPOOL} raidz1 ${part} ${DISKS}
|
||||||
|
|
||||||
|
# Add some data to the pool
|
||||||
|
log_must mkfile ${FSIZE} /${TESTPOOL}/data
|
||||||
|
|
||||||
|
remove_disk ${SDDEVICE}
|
||||||
|
check_state ${TESTPOOL} "" "degraded" || \
|
||||||
|
log_fail "${TESTPOOL} is not degraded"
|
||||||
|
|
||||||
|
# Clear zpool events
|
||||||
|
log_must zpool events -c
|
||||||
|
|
||||||
|
# Online disk
|
||||||
|
insert_disk ${SDDEVICE} ${host}
|
||||||
|
|
||||||
|
log_note "Delay for ZED auto-online"
|
||||||
|
typeset -i timeout=0
|
||||||
|
until is_pool_resilvered ${TESTPOOL}; do
|
||||||
|
if ((timeout++ == MAXTIMEOUT)); then
|
||||||
|
log_fail "Timeout occurred"
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
log_note "Auto-online of ${SDDEVICE} is complete"
|
||||||
|
|
||||||
|
# Validate auto-online was successful
|
||||||
|
sleep 1
|
||||||
|
check_state ${TESTPOOL} "" "online" || \
|
||||||
|
log_fail "${TESTPOOL} is not back online"
|
||||||
|
|
||||||
|
log_must zpool destroy ${TESTPOOL}
|
||||||
|
|
||||||
|
log_pass "Auto-online with partitioned vdev test successful"
|
Loading…
Reference in New Issue