ZTS: Add auto-spare tests

The ZED is expected to automatically kick in a hot spare device
when there's one available in the pool and a sufficient number of
read errors have been encountered.  Use zinject to simulate the
failure condition and verify the hot spare is used.

auto_spare_001_pos.ksh: read IO errors, the vdev is FAULTED
auto_spare_002_pos.ksh: read CHECKSUM errors, the vdev is DEGRADE

Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: David Quigley <david.quigley@intel.com>
Closes #6280
This commit is contained in:
David Quigley 2017-10-23 12:42:37 -06:00 committed by Brian Behlendorf
parent f8cd871a01
commit d9daa7abcf
6 changed files with 241 additions and 2 deletions

View File

@ -381,7 +381,8 @@ tests = ['events_001_pos', 'events_002_pos']
tests = ['exec_001_pos', 'exec_002_neg']
[tests/functional/fault]
tests = ['auto_online_001_pos', 'auto_replace_001_pos']
tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos',
'auto_spare_002_pos.ksh']
[tests/functional/features/async_destroy]
tests = ['async_destroy_001_pos']

View File

@ -2029,6 +2029,31 @@ function check_hotspare_state # pool disk state{inuse,avail}
return 0
}
#
# Wait until a hotspare transitions to a given state or times out.
#
# Return 0 when pool/disk matches expected state, 1 on timeout.
#
function wait_hotspare_state # pool disk state timeout
{
typeset pool=$1
typeset disk=${2#$/DEV_DSKDIR/}
typeset state=$3
typeset timeout=${4:-60}
typeset -i i=0
while [[ $i -lt $timeout ]]; do
if check_hotspare_state $pool $disk $state; then
return 0
fi
i=$((i+1))
sleep 1
done
return 1
}
#
# Verify a given slog disk is inuse or avail
#
@ -2067,6 +2092,31 @@ function check_vdev_state # pool disk state{online,offline,unavail}
return 0
}
#
# Wait until a vdev transitions to a given state or times out.
#
# Return 0 when pool/disk matches expected state, 1 on timeout.
#
function wait_vdev_state # pool disk state timeout
{
typeset pool=$1
typeset disk=${2#$/DEV_DSKDIR/}
typeset state=$3
typeset timeout=${4:-60}
typeset -i i=0
while [[ $i -lt $timeout ]]; do
if check_vdev_state $pool $disk $state; then
return 0
fi
i=$((i+1))
sleep 1
done
return 1
}
#
# Check the output of 'zpool status -v <pool>',
# and to see if the content of <token> contain the <keyword> specified.

View File

@ -4,4 +4,6 @@ dist_pkgdata_SCRIPTS = \
setup.ksh \
cleanup.ksh \
auto_online_001_pos.ksh \
auto_replace_001_pos.ksh
auto_replace_001_pos.ksh \
auto_spare_001_pos.ksh \
auto_spare_002_pos.ksh

View File

@ -0,0 +1,91 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#
#
# Copyright (c) 2017 by Intel Corporation. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/fault/fault.cfg
#
# DESCRIPTION:
# Testing Fault Management Agent ZED Logic - Automated Auto-Spare Test when
# drive is faulted due to IO ERRORS.
#
# STRATEGY:
# 1. Create a pool with hot spares
# 2. Create a filesystem with the primary cache disable to force reads
# 3. Write a file to the pool to be read back
# 4. Inject IO ERRORS on read with a zinject error handler
# 5. Verify the ZED kicks in a hot spare and expected pool/device status
# 6. Clear the fault
# 7. Verify the hot spare is available and expected pool/device status
#
verify_runnable "both"
function cleanup
{
log_must zinject -c all
poolexists $TESTPOOL && destroy_pool $TESTPOOL
rm -f $VDEV_FILES $SPARE_FILE
}
log_assert "Testing automated auto-spare FMA test"
log_onexit cleanup
TESTFILE="/$TESTPOOL/$TESTFS/testfile"
for type in "mirror" "raidz" "raidz2"; do
# 1. Create a pool with hot spares
truncate -s $SPA_MINDEVSIZE $VDEV_FILES $SPARE_FILE
log_must zpool create -f $TESTPOOL $type $VDEV_FILES spare $SPARE_FILE
# 2. Create a filesystem with the primary cache disable to force reads
log_must zfs create -o primarycache=none $TESTPOOL/$TESTFS
log_must zfs set recordsize=16k $TESTPOOL/$TESTFS
# 3. Write a file to the pool to be read back
log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=16
# 4. Inject IO ERRORS on read with a zinject error handler
log_must zinject -d $FAULT_FILE -e io -T read $TESTPOOL
log_must cp $TESTFILE /dev/null
# 5. Verify the ZED kicks in a hot spare and expected pool/device status
log_note "Wait for ZED to auto-spare"
log_must wait_vdev_state $TESTPOOL $FAULT_FILE "FAULTED" 60
log_must wait_vdev_state $TESTPOOL $SPARE_FILE "ONLINE" 60
log_must wait_hotspare_state $TESTPOOL $SPARE_FILE "INUSE"
log_must check_state $TESTPOOL "" "DEGRADED"
# 6. Clear the fault
log_must zinject -c all
log_must zpool clear $TESTPOOL $FAULT_FILE
# 7. Verify the hot spare is available and expected pool/device status
log_must wait_vdev_state $TESTPOOL $FAULT_FILE "ONLINE" 60
log_must wait_hotspare_state $TESTPOOL $SPARE_FILE "AVAIL"
log_must is_pool_resilvered $TESTPOOL
log_must check_state $TESTPOOL "" "ONLINE"
cleanup
done
log_pass "Auto-spare test successful"

View File

@ -0,0 +1,90 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#
#
# Copyright (c) 2017 by Intel Corporation. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/fault/fault.cfg
#
# DESCRIPTION:
# Testing Fault Management Agent ZED Logic - Automated Auto-Spare Test when
# drive is faulted due to CHECKSUM ERRORS.
#
# STRATEGY:
# 1. Create a pool with hot spares
# 2. Create a filesystem with the primary cache disable to force reads
# 3. Write a file to the pool to be read back
# 4. Inject CHECKSUM ERRORS on read with a zinject error handler
# 5. Verify the ZED kicks in a hot spare and expected pool/device status
# 6. Clear the fault
# 7. Verify the hot spare is available and expected pool/device status
#
verify_runnable "both"
function cleanup
{
log_must zinject -c all
poolexists $TESTPOOL && destroy_pool $TESTPOOL
rm -f $VDEV_FILES $SPARE_FILE
}
log_assert "Testing automated auto-spare FMA test"
log_onexit cleanup
TESTFILE="/$TESTPOOL/$TESTFS/testfile"
for type in "mirror" "raidz" "raidz2"; do
# 1. Create a pool with hot spares
truncate -s $SPA_MINDEVSIZE $VDEV_FILES $SPARE_FILE
log_must zpool create -f $TESTPOOL $type $VDEV_FILES spare $SPARE_FILE
# 2. Create a filesystem with the primary cache disable to force reads
log_must zfs create -o primarycache=none $TESTPOOL/$TESTFS
log_must zfs set recordsize=16k $TESTPOOL/$TESTFS
# 3. Write a file to the pool to be read back
log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=16
# 4. Inject CHECKSUM ERRORS on read with a zinject error handler
log_must zinject -d $FAULT_FILE -e corrupt -f 50 -T read $TESTPOOL
log_must cp $TESTFILE /dev/null
# 5. Verify the ZED kicks in a hot spare and expected pool/device status
log_note "Wait for ZED to auto-spare"
log_must wait_vdev_state $TESTPOOL $FAULT_FILE "DEGRADED" 60
log_must wait_vdev_state $TESTPOOL $SPARE_FILE "ONLINE" 60
log_must wait_hotspare_state $TESTPOOL $SPARE_FILE "INUSE"
log_must check_state $TESTPOOL "" "DEGRADED"
# 6. Clear the fault
log_must zinject -c all
log_must zpool clear $TESTPOOL $FAULT_FILE
# 7. Verify the hot spare is available and expected pool/device status
log_must wait_vdev_state $TESTPOOL $FAULT_FILE "ONLINE" 60
log_must wait_hotspare_state $TESTPOOL $SPARE_FILE "AVAIL"
log_must check_state $TESTPOOL "" "ONLINE"
cleanup
done
log_pass "Auto-spare test successful"

View File

@ -51,3 +51,8 @@ if is_linux; then
else
DEV_DSKDIR="/dev"
fi
export VDEV_FILES="$TEST_BASE_DIR/file-1 $TEST_BASE_DIR/file-2 \
$TEST_BASE_DIR/file-3 $TEST_BASE_DIR/file-4"
export SPARE_FILE="$TEST_BASE_DIR/spare-1"
export FAULT_FILE="$TEST_BASE_DIR/file-1"