ZTS: Add auto-spare tests
The ZED is expected to automatically kick in a hot spare device when there's one available in the pool and a sufficient number of read errors have been encountered. Use zinject to simulate the failure condition and verify the hot spare is used. auto_spare_001_pos.ksh: read IO errors, the vdev is FAULTED auto_spare_002_pos.ksh: read CHECKSUM errors, the vdev is DEGRADE Reviewed by: Richard Elling <Richard.Elling@RichardElling.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: David Quigley <david.quigley@intel.com> Closes #6280
This commit is contained in:
parent
f8cd871a01
commit
d9daa7abcf
|
@ -381,7 +381,8 @@ tests = ['events_001_pos', 'events_002_pos']
|
|||
tests = ['exec_001_pos', 'exec_002_neg']
|
||||
|
||||
[tests/functional/fault]
|
||||
tests = ['auto_online_001_pos', 'auto_replace_001_pos']
|
||||
tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos',
|
||||
'auto_spare_002_pos.ksh']
|
||||
|
||||
[tests/functional/features/async_destroy]
|
||||
tests = ['async_destroy_001_pos']
|
||||
|
|
|
@ -2029,6 +2029,31 @@ function check_hotspare_state # pool disk state{inuse,avail}
|
|||
return 0
|
||||
}
|
||||
|
||||
#
|
||||
# Wait until a hotspare transitions to a given state or times out.
|
||||
#
|
||||
# Return 0 when pool/disk matches expected state, 1 on timeout.
|
||||
#
|
||||
function wait_hotspare_state # pool disk state timeout
|
||||
{
|
||||
typeset pool=$1
|
||||
typeset disk=${2#$/DEV_DSKDIR/}
|
||||
typeset state=$3
|
||||
typeset timeout=${4:-60}
|
||||
typeset -i i=0
|
||||
|
||||
while [[ $i -lt $timeout ]]; do
|
||||
if check_hotspare_state $pool $disk $state; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
i=$((i+1))
|
||||
sleep 1
|
||||
done
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
#
|
||||
# Verify a given slog disk is inuse or avail
|
||||
#
|
||||
|
@ -2067,6 +2092,31 @@ function check_vdev_state # pool disk state{online,offline,unavail}
|
|||
return 0
|
||||
}
|
||||
|
||||
#
|
||||
# Wait until a vdev transitions to a given state or times out.
|
||||
#
|
||||
# Return 0 when pool/disk matches expected state, 1 on timeout.
|
||||
#
|
||||
function wait_vdev_state # pool disk state timeout
|
||||
{
|
||||
typeset pool=$1
|
||||
typeset disk=${2#$/DEV_DSKDIR/}
|
||||
typeset state=$3
|
||||
typeset timeout=${4:-60}
|
||||
typeset -i i=0
|
||||
|
||||
while [[ $i -lt $timeout ]]; do
|
||||
if check_vdev_state $pool $disk $state; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
i=$((i+1))
|
||||
sleep 1
|
||||
done
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
#
|
||||
# Check the output of 'zpool status -v <pool>',
|
||||
# and to see if the content of <token> contain the <keyword> specified.
|
||||
|
|
|
@ -4,4 +4,6 @@ dist_pkgdata_SCRIPTS = \
|
|||
setup.ksh \
|
||||
cleanup.ksh \
|
||||
auto_online_001_pos.ksh \
|
||||
auto_replace_001_pos.ksh
|
||||
auto_replace_001_pos.ksh \
|
||||
auto_spare_001_pos.ksh \
|
||||
auto_spare_002_pos.ksh
|
||||
|
|
|
@ -0,0 +1,91 @@
|
|||
#!/bin/ksh -p
|
||||
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2017 by Intel Corporation. All rights reserved.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/fault/fault.cfg
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Testing Fault Management Agent ZED Logic - Automated Auto-Spare Test when
|
||||
# drive is faulted due to IO ERRORS.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a pool with hot spares
|
||||
# 2. Create a filesystem with the primary cache disable to force reads
|
||||
# 3. Write a file to the pool to be read back
|
||||
# 4. Inject IO ERRORS on read with a zinject error handler
|
||||
# 5. Verify the ZED kicks in a hot spare and expected pool/device status
|
||||
# 6. Clear the fault
|
||||
# 7. Verify the hot spare is available and expected pool/device status
|
||||
#
|
||||
|
||||
verify_runnable "both"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
log_must zinject -c all
|
||||
poolexists $TESTPOOL && destroy_pool $TESTPOOL
|
||||
rm -f $VDEV_FILES $SPARE_FILE
|
||||
}
|
||||
|
||||
log_assert "Testing automated auto-spare FMA test"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
TESTFILE="/$TESTPOOL/$TESTFS/testfile"
|
||||
|
||||
for type in "mirror" "raidz" "raidz2"; do
|
||||
# 1. Create a pool with hot spares
|
||||
truncate -s $SPA_MINDEVSIZE $VDEV_FILES $SPARE_FILE
|
||||
log_must zpool create -f $TESTPOOL $type $VDEV_FILES spare $SPARE_FILE
|
||||
|
||||
# 2. Create a filesystem with the primary cache disable to force reads
|
||||
log_must zfs create -o primarycache=none $TESTPOOL/$TESTFS
|
||||
log_must zfs set recordsize=16k $TESTPOOL/$TESTFS
|
||||
|
||||
# 3. Write a file to the pool to be read back
|
||||
log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=16
|
||||
|
||||
# 4. Inject IO ERRORS on read with a zinject error handler
|
||||
log_must zinject -d $FAULT_FILE -e io -T read $TESTPOOL
|
||||
log_must cp $TESTFILE /dev/null
|
||||
|
||||
# 5. Verify the ZED kicks in a hot spare and expected pool/device status
|
||||
log_note "Wait for ZED to auto-spare"
|
||||
log_must wait_vdev_state $TESTPOOL $FAULT_FILE "FAULTED" 60
|
||||
log_must wait_vdev_state $TESTPOOL $SPARE_FILE "ONLINE" 60
|
||||
log_must wait_hotspare_state $TESTPOOL $SPARE_FILE "INUSE"
|
||||
log_must check_state $TESTPOOL "" "DEGRADED"
|
||||
|
||||
# 6. Clear the fault
|
||||
log_must zinject -c all
|
||||
log_must zpool clear $TESTPOOL $FAULT_FILE
|
||||
|
||||
# 7. Verify the hot spare is available and expected pool/device status
|
||||
log_must wait_vdev_state $TESTPOOL $FAULT_FILE "ONLINE" 60
|
||||
log_must wait_hotspare_state $TESTPOOL $SPARE_FILE "AVAIL"
|
||||
log_must is_pool_resilvered $TESTPOOL
|
||||
log_must check_state $TESTPOOL "" "ONLINE"
|
||||
|
||||
cleanup
|
||||
done
|
||||
|
||||
log_pass "Auto-spare test successful"
|
|
@ -0,0 +1,90 @@
|
|||
#!/bin/ksh -p
|
||||
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2017 by Intel Corporation. All rights reserved.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/fault/fault.cfg
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Testing Fault Management Agent ZED Logic - Automated Auto-Spare Test when
|
||||
# drive is faulted due to CHECKSUM ERRORS.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a pool with hot spares
|
||||
# 2. Create a filesystem with the primary cache disable to force reads
|
||||
# 3. Write a file to the pool to be read back
|
||||
# 4. Inject CHECKSUM ERRORS on read with a zinject error handler
|
||||
# 5. Verify the ZED kicks in a hot spare and expected pool/device status
|
||||
# 6. Clear the fault
|
||||
# 7. Verify the hot spare is available and expected pool/device status
|
||||
#
|
||||
|
||||
verify_runnable "both"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
log_must zinject -c all
|
||||
poolexists $TESTPOOL && destroy_pool $TESTPOOL
|
||||
rm -f $VDEV_FILES $SPARE_FILE
|
||||
}
|
||||
|
||||
log_assert "Testing automated auto-spare FMA test"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
TESTFILE="/$TESTPOOL/$TESTFS/testfile"
|
||||
|
||||
for type in "mirror" "raidz" "raidz2"; do
|
||||
# 1. Create a pool with hot spares
|
||||
truncate -s $SPA_MINDEVSIZE $VDEV_FILES $SPARE_FILE
|
||||
log_must zpool create -f $TESTPOOL $type $VDEV_FILES spare $SPARE_FILE
|
||||
|
||||
# 2. Create a filesystem with the primary cache disable to force reads
|
||||
log_must zfs create -o primarycache=none $TESTPOOL/$TESTFS
|
||||
log_must zfs set recordsize=16k $TESTPOOL/$TESTFS
|
||||
|
||||
# 3. Write a file to the pool to be read back
|
||||
log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=16
|
||||
|
||||
# 4. Inject CHECKSUM ERRORS on read with a zinject error handler
|
||||
log_must zinject -d $FAULT_FILE -e corrupt -f 50 -T read $TESTPOOL
|
||||
log_must cp $TESTFILE /dev/null
|
||||
|
||||
# 5. Verify the ZED kicks in a hot spare and expected pool/device status
|
||||
log_note "Wait for ZED to auto-spare"
|
||||
log_must wait_vdev_state $TESTPOOL $FAULT_FILE "DEGRADED" 60
|
||||
log_must wait_vdev_state $TESTPOOL $SPARE_FILE "ONLINE" 60
|
||||
log_must wait_hotspare_state $TESTPOOL $SPARE_FILE "INUSE"
|
||||
log_must check_state $TESTPOOL "" "DEGRADED"
|
||||
|
||||
# 6. Clear the fault
|
||||
log_must zinject -c all
|
||||
log_must zpool clear $TESTPOOL $FAULT_FILE
|
||||
|
||||
# 7. Verify the hot spare is available and expected pool/device status
|
||||
log_must wait_vdev_state $TESTPOOL $FAULT_FILE "ONLINE" 60
|
||||
log_must wait_hotspare_state $TESTPOOL $SPARE_FILE "AVAIL"
|
||||
log_must check_state $TESTPOOL "" "ONLINE"
|
||||
|
||||
cleanup
|
||||
done
|
||||
|
||||
log_pass "Auto-spare test successful"
|
|
@ -51,3 +51,8 @@ if is_linux; then
|
|||
else
|
||||
DEV_DSKDIR="/dev"
|
||||
fi
|
||||
|
||||
export VDEV_FILES="$TEST_BASE_DIR/file-1 $TEST_BASE_DIR/file-2 \
|
||||
$TEST_BASE_DIR/file-3 $TEST_BASE_DIR/file-4"
|
||||
export SPARE_FILE="$TEST_BASE_DIR/spare-1"
|
||||
export FAULT_FILE="$TEST_BASE_DIR/file-1"
|
||||
|
|
Loading…
Reference in New Issue