ZTS: Fix auto_replace_001_pos test
The root cause of these failures is that udev can notify the ZED of newly created partition before its links are created. Handle this by allowing an auto-replace to briefly wait until udev confirms the links exist. Distill this test case down to its essentials so it can be run reliably. What we need to check is that: 1) A new disk, in the same physical location, is automatically brought online when added to the system, 2) It completes the replacement process, and 3) The pool is now ONLINE and healthy. There is no need to remove the scsi_debug module. After exporting the pool the disk can be zeroed, removed, and then re-added to the system as a new disk. Reviewed by: loli10K <ezomori.nozomu@gmail.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #8051
This commit is contained in:
parent
b74f48fe1b
commit
bea7578356
|
@ -427,8 +427,16 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
||||||
nvlist_free(newvd);
|
nvlist_free(newvd);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* auto replace a leaf disk at same physical location
|
* Wait for udev to verify the links exist, then auto-replace
|
||||||
|
* the leaf disk at same physical location.
|
||||||
*/
|
*/
|
||||||
|
if (zpool_label_disk_wait(path, 3000) != 0) {
|
||||||
|
zed_log_msg(LOG_WARNING, "zfs_mod: expected replacement "
|
||||||
|
"disk %s is missing", path);
|
||||||
|
nvlist_free(nvroot);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
ret = zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE);
|
ret = zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE);
|
||||||
|
|
||||||
zed_log_msg(LOG_INFO, " zpool_vdev_replace: %s with %s (%s)",
|
zed_log_msg(LOG_INFO, " zpool_vdev_replace: %s with %s (%s)",
|
||||||
|
|
|
@ -31,17 +31,14 @@
|
||||||
# Testing Fault Management Agent ZED Logic - Automated Auto-Replace Test.
|
# Testing Fault Management Agent ZED Logic - Automated Auto-Replace Test.
|
||||||
#
|
#
|
||||||
# STRATEGY:
|
# STRATEGY:
|
||||||
# 1. Update /etc/zfs/vdev_id.conf with scsidebug alias rule for a persistent
|
# 1. Update /etc/zfs/vdev_id.conf with scsidebug alias for a persistent path.
|
||||||
# path. This creates keys ID_VDEV and ID_VDEV_PATH and sets
|
# This creates keys ID_VDEV and ID_VDEV_PATH and set phys_path="scsidebug".
|
||||||
# phys_path="scsidebug".
|
# 2. Create a pool and set autoreplace=on (auto-replace is opt-in)
|
||||||
# 2. Create a pool & set autoreplace=on (auto-replace is opt-in)
|
# 3. Export a pool
|
||||||
# 2. Export a pool
|
# 4. Wipe and offline the scsi_debug disk
|
||||||
# 3. Offline disk by removing scsi_debug module
|
# 5. Import pool with missing disk
|
||||||
# 4. Import pool with missing disk
|
# 6. Re-online the wiped scsi_debug disk
|
||||||
# 5. Online disk by loading scsi_debug module again and re-registering vdev_id
|
# 7. Verify the ZED detects the new unused disk and adds it back to the pool
|
||||||
# rule.
|
|
||||||
# 6. ZED polls for an event change for new disk to be automatically
|
|
||||||
# added back to the pool
|
|
||||||
#
|
#
|
||||||
# Creates a raidz1 zpool using persistent disk path names
|
# Creates a raidz1 zpool using persistent disk path names
|
||||||
# (ie not /dev/sdc)
|
# (ie not /dev/sdc)
|
||||||
|
@ -55,98 +52,57 @@ if ! is_physical_device $DISKS; then
|
||||||
log_unsupported "Unsupported disks for this test."
|
log_unsupported "Unsupported disks for this test."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
function setup
|
|
||||||
{
|
|
||||||
load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b'
|
|
||||||
SD=$(get_debug_device)
|
|
||||||
SDDEVICE_ID=$(get_persistent_disk_name $SD)
|
|
||||||
# Register vdev_id alias rule for scsi_debug device to create a
|
|
||||||
# persistent path
|
|
||||||
log_must eval "echo "alias scsidebug /dev/disk/by-id/$SDDEVICE_ID" \
|
|
||||||
>> $VDEVID_CONF"
|
|
||||||
block_device_wait
|
|
||||||
SDDEVICE=$(udevadm info -q all -n $DEV_DSKDIR/$SD \
|
|
||||||
| awk -F'=' '/ID_VDEV=/{print $2; exit}')
|
|
||||||
[[ -z $SDDEVICE ]] && log_fail "vdev rule was not registered properly"
|
|
||||||
}
|
|
||||||
|
|
||||||
function cleanup
|
function cleanup
|
||||||
{
|
{
|
||||||
destroy_pool $TESTPOOL
|
destroy_pool $TESTPOOL
|
||||||
|
sed -i '/alias scsidebug/d' $VDEVID_CONF
|
||||||
unload_scsi_debug
|
unload_scsi_debug
|
||||||
}
|
}
|
||||||
|
|
||||||
log_assert "Testing automated auto-replace FMA test"
|
log_assert "Testing automated auto-replace FMA test"
|
||||||
|
|
||||||
log_onexit cleanup
|
log_onexit cleanup
|
||||||
|
|
||||||
# Clear disk labels
|
load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b'
|
||||||
for i in {0..2}
|
SD=$(get_debug_device)
|
||||||
do
|
SD_DEVICE_ID=$(get_persistent_disk_name $SD)
|
||||||
zpool labelclear -f /dev/disk/by-id/"${devs_id[i]}"
|
SD_HOST=$(get_scsi_host $SD)
|
||||||
done
|
|
||||||
|
|
||||||
setup
|
# Register vdev_id alias for scsi_debug device to create a persistent path
|
||||||
if is_loop_device $DISK1; then
|
echo "alias scsidebug /dev/disk/by-id/$SD_DEVICE_ID" >>$VDEVID_CONF
|
||||||
log_must zpool create -f $TESTPOOL raidz1 $SDDEVICE $DISK1 $DISK2 \
|
block_device_wait
|
||||||
$DISK3
|
|
||||||
elif ( is_real_device $DISK1 || is_mpath_device $DISK1 ); then
|
SD_DEVICE=$(udevadm info -q all -n $DEV_DSKDIR/$SD | \
|
||||||
log_must zpool create -f $TESTPOOL raidz1 $SDDEVICE ${devs_id[0]} \
|
awk -F'=' '/ID_VDEV=/{print $2; exit}')
|
||||||
${devs_id[1]} ${devs_id[2]}
|
[[ -z $SD_DEVICE ]] && log_fail "vdev rule was not registered properly"
|
||||||
else
|
|
||||||
log_fail "Disks are not supported for this test"
|
log_must zpool events -c
|
||||||
fi
|
log_must zpool create -f $TESTPOOL raidz1 $SD_DEVICE $DISK1 $DISK2 $DISK3
|
||||||
|
|
||||||
# Auto-replace is opt-in so need to set property
|
# Auto-replace is opt-in so need to set property
|
||||||
log_must zpool set autoreplace=on $TESTPOOL
|
log_must zpool set autoreplace=on $TESTPOOL
|
||||||
|
|
||||||
# Add some data to the pool
|
# Add some data to the pool
|
||||||
log_must mkfile $FSIZE /$TESTPOOL/data
|
log_must mkfile $FSIZE /$TESTPOOL/data
|
||||||
|
log_must zpool export $TESTPOOL
|
||||||
|
|
||||||
log_must zpool export -F $TESTPOOL
|
# Wipe and offline the disk
|
||||||
|
log_must dd if=/dev/zero of=/dev/disk/by-id/$SD_DEVICE_ID bs=1M count=$SDSIZE
|
||||||
# Offline disk
|
|
||||||
remove_disk $SD
|
remove_disk $SD
|
||||||
block_device_wait
|
block_device_wait
|
||||||
unload_scsi_debug
|
|
||||||
|
|
||||||
# Reimport pool with drive missing
|
# Re-import pool with drive missing
|
||||||
log_must zpool import $TESTPOOL
|
log_must zpool import $TESTPOOL
|
||||||
check_state $TESTPOOL "" "degraded"
|
log_must check_state $TESTPOOL "" "DEGRADED"
|
||||||
if (($? != 0)); then
|
block_device_wait
|
||||||
log_fail "$TESTPOOL is not degraded"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Clear zpool events
|
# Online an empty disk in the same physical location
|
||||||
log_must zpool events -c
|
insert_disk $SD $SD_HOST
|
||||||
|
|
||||||
# Create another scsi_debug device
|
# Wait for the new disk to be online and replaced
|
||||||
setup
|
log_must wait_vdev_state $TESTPOOL "scsidebug" "ONLINE" $MAXTIMEOUT
|
||||||
|
log_must wait_replacing $TESTPOOL
|
||||||
log_note "Delay for ZED auto-replace"
|
|
||||||
typeset -i timeout=0
|
|
||||||
while true; do
|
|
||||||
if ((timeout == $MAXTIMEOUT)); then
|
|
||||||
log_fail "Timeout occured"
|
|
||||||
fi
|
|
||||||
((timeout++))
|
|
||||||
sleep 1
|
|
||||||
zpool events $TESTPOOL | egrep sysevent.fs.zfs.resilver_finish \
|
|
||||||
> /dev/null
|
|
||||||
if (($? == 0)); then
|
|
||||||
log_note "Auto-replace should be complete"
|
|
||||||
sleep 1
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
# Validate auto-replace was successful
|
# Validate auto-replace was successful
|
||||||
check_state $TESTPOOL "" "online"
|
log_must check_state $TESTPOOL "" "ONLINE"
|
||||||
if (($? != 0)); then
|
|
||||||
log_fail "$TESTPOOL is not back online"
|
|
||||||
fi
|
|
||||||
sleep 2
|
|
||||||
|
|
||||||
log_must zpool destroy $TESTPOOL
|
|
||||||
|
|
||||||
log_pass "Auto-replace test successful"
|
log_pass "Auto-replace test successful"
|
||||||
|
|
|
@ -29,7 +29,7 @@ verify_runnable "global"
|
||||||
export DISK_ARRAY_NUM=$(echo ${DISKS} | nawk '{print NF}')
|
export DISK_ARRAY_NUM=$(echo ${DISKS} | nawk '{print NF}')
|
||||||
export DISKSARRAY=$DISKS
|
export DISKSARRAY=$DISKS
|
||||||
export FSIZE=10M
|
export FSIZE=10M
|
||||||
export MAXTIMEOUT=20
|
export MAXTIMEOUT=30
|
||||||
|
|
||||||
export SDSIZE=256
|
export SDSIZE=256
|
||||||
export SDHOSTS=1
|
export SDHOSTS=1
|
||||||
|
|
Loading…
Reference in New Issue