2010-08-26 18:22:58 +00:00
|
|
|
#!/bin/bash
|
|
|
|
#
|
2010-09-04 20:26:23 +00:00
|
|
|
# Common support functions for testing scripts. If a script-config
|
2010-08-26 18:22:58 +00:00
|
|
|
# files is available it will be sourced so in-tree kernel modules and
|
2010-09-04 20:26:23 +00:00
|
|
|
# utilities will be used. If no script-config can be found then the
|
2010-08-26 18:22:58 +00:00
|
|
|
# installed kernel modules and utilities will be used.
|
|
|
|
|
|
|
|
basedir="$(dirname $0)"
|
|
|
|
|
2010-09-04 20:26:23 +00:00
|
|
|
SCRIPT_CONFIG=zfs-script-config.sh
|
2010-08-26 18:22:58 +00:00
|
|
|
if [ -f "${basedir}/../${SCRIPT_CONFIG}" ]; then
|
|
|
|
. "${basedir}/../${SCRIPT_CONFIG}"
|
|
|
|
else
|
2010-11-11 18:00:39 +00:00
|
|
|
KERNEL_MODULES=(zlib_deflate zlib_inflate)
|
|
|
|
MODULES=(spl splat zavl znvpair zunicode zcommon zfs)
|
2010-08-26 18:22:58 +00:00
|
|
|
fi
|
|
|
|
|
|
|
|
PROG="<define PROG>"
|
|
|
|
CLEANUP=
|
|
|
|
VERBOSE=
|
|
|
|
VERBOSE_FLAG=
|
|
|
|
FORCE=
|
|
|
|
FORCE_FLAG=
|
|
|
|
DUMP_LOG=
|
|
|
|
ERROR=
|
|
|
|
RAID0S=()
|
|
|
|
RAID10S=()
|
|
|
|
RAIDZS=()
|
|
|
|
RAIDZ2S=()
|
2010-08-26 18:44:39 +00:00
|
|
|
TESTS_RUN=${TESTS_RUN:-'*'}
|
|
|
|
TESTS_SKIP=${TESTS_SKIP:-}
|
2010-08-26 18:22:58 +00:00
|
|
|
|
|
|
|
prefix=@prefix@
|
|
|
|
exec_prefix=@exec_prefix@
|
2013-02-08 18:01:41 +00:00
|
|
|
pkgdatadir=@datarootdir@/@PACKAGE@
|
2010-08-26 18:22:58 +00:00
|
|
|
bindir=@bindir@
|
|
|
|
sbindir=@sbindir@
|
2011-10-11 21:36:42 +00:00
|
|
|
udevdir=@udevdir@
|
|
|
|
udevruledir=@udevruledir@
|
2016-05-06 17:24:06 +00:00
|
|
|
mounthelperdir=@mounthelperdir@
|
2011-10-11 21:36:42 +00:00
|
|
|
sysconfdir=@sysconfdir@
|
2014-01-21 21:30:03 +00:00
|
|
|
localstatedir=@localstatedir@
|
2010-08-26 18:22:58 +00:00
|
|
|
|
|
|
|
ETCDIR=${ETCDIR:-/etc}
|
2013-01-29 18:53:19 +00:00
|
|
|
DEVDIR=${DEVDIR:-/dev/disk/by-vdev}
|
2013-02-08 18:01:41 +00:00
|
|
|
ZPOOLDIR=${ZPOOLDIR:-${pkgdatadir}/zpool-config}
|
|
|
|
ZPIOSDIR=${ZPIOSDIR:-${pkgdatadir}/zpios-test}
|
|
|
|
ZPIOSPROFILEDIR=${ZPIOSPROFILEDIR:-${pkgdatadir}/zpios-profile}
|
2015-07-01 22:23:09 +00:00
|
|
|
TESTSDIR=${TESTSDIR:-${pkgdatadir}/zfs-tests}
|
|
|
|
RUNFILEDIR=${RUNFILEDIR:-${pkgdatadir}/runfiles}
|
2010-08-26 18:22:58 +00:00
|
|
|
|
|
|
|
ZDB=${ZDB:-${sbindir}/zdb}
|
|
|
|
ZFS=${ZFS:-${sbindir}/zfs}
|
|
|
|
ZINJECT=${ZINJECT:-${sbindir}/zinject}
|
2015-07-01 22:23:09 +00:00
|
|
|
ZHACK=${ZHACK:-${sbindir}/zhack}
|
2010-08-26 18:22:58 +00:00
|
|
|
ZPOOL=${ZPOOL:-${sbindir}/zpool}
|
|
|
|
ZTEST=${ZTEST:-${sbindir}/ztest}
|
2010-08-26 18:58:00 +00:00
|
|
|
ZPIOS=${ZPIOS:-${sbindir}/zpios}
|
2010-08-26 18:22:58 +00:00
|
|
|
|
2013-02-08 18:01:41 +00:00
|
|
|
COMMON_SH=${COMMON_SH:-${pkgdatadir}/common.sh}
|
|
|
|
ZFS_SH=${ZFS_SH:-${pkgdatadir}/zfs.sh}
|
|
|
|
ZPOOL_CREATE_SH=${ZPOOL_CREATE_SH:-${pkgdatadir}/zpool-create.sh}
|
|
|
|
ZPIOS_SH=${ZPIOS_SH:-${pkgdatadir}/zpios.sh}
|
|
|
|
ZPIOS_SURVEY_SH=${ZPIOS_SURVEY_SH:-${pkgdatadir}/zpios-survey.sh}
|
2015-07-01 22:23:09 +00:00
|
|
|
TEST_RUNNER=${TEST_RUNNER:-${pkgdatadir}/test-runner/bin/test-runner.py}
|
|
|
|
STF_TOOLS=${STF_TOOLS:-${pkgdatadir}/test-runner}
|
|
|
|
STF_SUITE=${STF_SUITE:-${pkgdatadir}/zfs-tests}
|
2010-08-26 18:22:58 +00:00
|
|
|
|
|
|
|
LDMOD=${LDMOD:-/sbin/modprobe}
|
|
|
|
LSMOD=${LSMOD:-/sbin/lsmod}
|
|
|
|
RMMOD=${RMMOD:-/sbin/rmmod}
|
|
|
|
INFOMOD=${INFOMOD:-/sbin/modinfo}
|
|
|
|
LOSETUP=${LOSETUP:-/sbin/losetup}
|
Add zfault zpool configurations and tests
Eleven new zpool configurations were added to allow testing of various
failure cases. The first 5 zpool configurations leverage the 'faulty'
md device type which allow us to simuluate IO errors at the block layer.
The last 6 zpool configurations leverage the scsi_debug module provided
by modern kernels. This device allows you to create virtual scsi
devices which are backed by a ram disk. With this setup we can verify
the full IO stack by injecting faults at the lowest layer. Both methods
of fault injection are important to verifying the IO stack.
The zfs code itself also provides a mechanism for error injection
via the zinject command line tool. While we should also take advantage
of this appraoch to validate the code it does not address any of the
Linux integration issues which are the most concerning. For the
moment we're trusting that the upstream Solaris guys are running
zinject and would have caught internal zfs logic errors.
Currently, there are 6 r/w test cases layered on top of the 'faulty'
md devices. They include 3 writes tests for soft/transient errors,
hard/permenant errors, and all writes error to the device. There
are 3 matching read tests for soft/transient errors, hard/permenant
errors, and fixable read error with a write. Although for this last
case zfs doesn't do anything special.
The seventh test case verifies zfs detects and corrects checksum
errors. In this case one of the drives is extensively damaged and
by dd'ing over large sections of it. We then ensure zfs logs the
issue and correctly rebuilds the damage.
The next test cases use the scsi_debug configuration to injects error
at the bottom of the scsi stack. This ensures we find any flaws in the
scsi midlayer or our usage of it. Plus it stresses the device specific
retry, timeout, and error handling outside of zfs's control.
The eighth test case is to verify that the system correctly handles an
intermittent device timeout. Here the scsi_debug device drops 1 in N
requests resulting in a retry either at the block level. The ZFS code
does specify the FAILFAST option but it turns out that for this case
the Linux IO stack with still retry the command. The FAILFAST logic
located in scsi_noretry_cmd() does no seem to apply to the simply
timeout case. It appears to be more targeted to specific device or
transport errors from the lower layers.
The ninth test case handles a persistent failure in which the device
is removed from the system by Linux. The test verifies that the failure
is detected, the device is made unavailable, and then can be successfully
re-add when brought back online. Additionally, it ensures that errors
and events are logged to the correct places and the no data corruption
has occured due to the failure.
2010-09-28 23:32:12 +00:00
|
|
|
MDADM=${MDADM:-/sbin/mdadm}
|
2015-07-01 22:23:09 +00:00
|
|
|
DMSETUP=${DMSETUP:-/sbin/dmsetup}
|
Add zfault zpool configurations and tests
Eleven new zpool configurations were added to allow testing of various
failure cases. The first 5 zpool configurations leverage the 'faulty'
md device type which allow us to simuluate IO errors at the block layer.
The last 6 zpool configurations leverage the scsi_debug module provided
by modern kernels. This device allows you to create virtual scsi
devices which are backed by a ram disk. With this setup we can verify
the full IO stack by injecting faults at the lowest layer. Both methods
of fault injection are important to verifying the IO stack.
The zfs code itself also provides a mechanism for error injection
via the zinject command line tool. While we should also take advantage
of this appraoch to validate the code it does not address any of the
Linux integration issues which are the most concerning. For the
moment we're trusting that the upstream Solaris guys are running
zinject and would have caught internal zfs logic errors.
Currently, there are 6 r/w test cases layered on top of the 'faulty'
md devices. They include 3 writes tests for soft/transient errors,
hard/permenant errors, and all writes error to the device. There
are 3 matching read tests for soft/transient errors, hard/permenant
errors, and fixable read error with a write. Although for this last
case zfs doesn't do anything special.
The seventh test case verifies zfs detects and corrects checksum
errors. In this case one of the drives is extensively damaged and
by dd'ing over large sections of it. We then ensure zfs logs the
issue and correctly rebuilds the damage.
The next test cases use the scsi_debug configuration to injects error
at the bottom of the scsi stack. This ensures we find any flaws in the
scsi midlayer or our usage of it. Plus it stresses the device specific
retry, timeout, and error handling outside of zfs's control.
The eighth test case is to verify that the system correctly handles an
intermittent device timeout. Here the scsi_debug device drops 1 in N
requests resulting in a retry either at the block level. The ZFS code
does specify the FAILFAST option but it turns out that for this case
the Linux IO stack with still retry the command. The FAILFAST logic
located in scsi_noretry_cmd() does no seem to apply to the simply
timeout case. It appears to be more targeted to specific device or
transport errors from the lower layers.
The ninth test case handles a persistent failure in which the device
is removed from the system by Linux. The test verifies that the failure
is detected, the device is made unavailable, and then can be successfully
re-add when brought back online. Additionally, it ensures that errors
and events are logged to the correct places and the no data corruption
has occured due to the failure.
2010-09-28 23:32:12 +00:00
|
|
|
PARTED=${PARTED:-/sbin/parted}
|
|
|
|
BLOCKDEV=${BLOCKDEV:-/sbin/blockdev}
|
|
|
|
LSSCSI=${LSSCSI:-/usr/bin/lsscsi}
|
|
|
|
SCSIRESCAN=${SCSIRESCAN:-/usr/bin/scsi-rescan}
|
2010-08-26 18:22:58 +00:00
|
|
|
SYSCTL=${SYSCTL:-/sbin/sysctl}
|
|
|
|
UDEVADM=${UDEVADM:-/sbin/udevadm}
|
|
|
|
AWK=${AWK:-/usr/bin/awk}
|
2016-03-23 01:08:59 +00:00
|
|
|
GDB=${GDB:-/usr/bin/gdb}
|
2010-08-26 18:22:58 +00:00
|
|
|
|
2014-01-21 21:30:03 +00:00
|
|
|
ZED_PIDFILE=${ZED_PIDFILE:-${localstatedir}/run/zed.pid}
|
|
|
|
|
2010-08-26 18:44:39 +00:00
|
|
|
COLOR_BLACK="\033[0;30m"
|
|
|
|
COLOR_DK_GRAY="\033[1;30m"
|
|
|
|
COLOR_BLUE="\033[0;34m"
|
|
|
|
COLOR_LT_BLUE="\033[1;34m"
|
|
|
|
COLOR_GREEN="\033[0;32m"
|
|
|
|
COLOR_LT_GREEN="\033[1;32m"
|
|
|
|
COLOR_CYAN="\033[0;36m"
|
|
|
|
COLOR_LT_CYAN="\033[1;36m"
|
|
|
|
COLOR_RED="\033[0;31m"
|
|
|
|
COLOR_LT_RED="\033[1;31m"
|
|
|
|
COLOR_PURPLE="\033[0;35m"
|
|
|
|
COLOR_LT_PURPLE="\033[1;35m"
|
|
|
|
COLOR_BROWN="\033[0;33m"
|
|
|
|
COLOR_YELLOW="\033[1;33m"
|
|
|
|
COLOR_LT_GRAY="\033[0;37m"
|
|
|
|
COLOR_WHITE="\033[1;37m"
|
|
|
|
COLOR_RESET="\033[0m"
|
|
|
|
|
2010-08-26 18:22:58 +00:00
|
|
|
die() {
|
|
|
|
echo -e "${PROG}: $1" >&2
|
|
|
|
exit 1
|
|
|
|
}
|
|
|
|
|
|
|
|
msg() {
|
|
|
|
if [ ${VERBOSE} ]; then
|
|
|
|
echo "$@"
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
pass() {
|
2010-08-26 18:44:39 +00:00
|
|
|
echo -e "${COLOR_GREEN}Pass${COLOR_RESET}"
|
2010-08-26 18:22:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fail() {
|
2010-08-26 18:44:39 +00:00
|
|
|
echo -e "${COLOR_RED}Fail${COLOR_RESET} ($1)"
|
2010-08-26 18:22:58 +00:00
|
|
|
exit $1
|
|
|
|
}
|
|
|
|
|
2010-08-26 18:44:39 +00:00
|
|
|
skip() {
|
|
|
|
echo -e "${COLOR_BROWN}Skip${COLOR_RESET}"
|
|
|
|
}
|
|
|
|
|
2013-01-28 21:39:40 +00:00
|
|
|
populate() {
|
|
|
|
local ROOT=$1
|
|
|
|
local MAX_DIR_SIZE=$2
|
|
|
|
local MAX_FILE_SIZE=$3
|
|
|
|
|
|
|
|
mkdir -p $ROOT/{a,b,c,d,e,f,g}/{h,i}
|
|
|
|
DIRS=`find $ROOT`
|
|
|
|
|
|
|
|
for DIR in $DIRS; do
|
|
|
|
COUNT=$(($RANDOM % $MAX_DIR_SIZE))
|
|
|
|
|
|
|
|
for i in `seq $COUNT`; do
|
|
|
|
FILE=`mktemp -p ${DIR}`
|
|
|
|
SIZE=$(($RANDOM % $MAX_FILE_SIZE))
|
|
|
|
dd if=/dev/urandom of=$FILE bs=1k count=$SIZE &>/dev/null
|
|
|
|
done
|
|
|
|
done
|
|
|
|
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
2011-10-11 21:36:42 +00:00
|
|
|
init() {
|
2013-01-28 21:39:40 +00:00
|
|
|
# Create a random directory tree of files and sub-directories to
|
|
|
|
# to act as a copy source for the various regression tests.
|
|
|
|
SRC_DIR=`mktemp -d -p /var/tmp/ zfs.src.XXXXXXXX`
|
|
|
|
trap "rm -Rf $SRC_DIR" INT TERM EXIT
|
|
|
|
populate $SRC_DIR 10 100
|
2011-10-11 21:36:42 +00:00
|
|
|
}
|
|
|
|
|
2010-08-26 18:22:58 +00:00
|
|
|
spl_dump_log() {
|
|
|
|
${SYSCTL} -w kernel.spl.debug.dump=1 &>/dev/null
|
|
|
|
local NAME=`dmesg | tail -n 1 | cut -f5 -d' '`
|
|
|
|
${SPLBUILD}/cmd/spl ${NAME} >${NAME}.log
|
|
|
|
echo
|
|
|
|
echo "Dumped debug log: ${NAME}.log"
|
|
|
|
tail -n1 ${NAME}.log
|
|
|
|
echo
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
check_modules() {
|
|
|
|
local LOADED_MODULES=()
|
|
|
|
local MISSING_MODULES=()
|
|
|
|
|
|
|
|
for MOD in ${MODULES[*]}; do
|
|
|
|
local NAME=`basename $MOD .ko`
|
|
|
|
|
|
|
|
if ${LSMOD} | egrep -q "^${NAME}"; then
|
|
|
|
LOADED_MODULES=(${NAME} ${LOADED_MODULES[*]})
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [ ${INFOMOD} ${MOD} 2>/dev/null ]; then
|
|
|
|
MISSING_MODULES=("\t${MOD}\n" ${MISSING_MODULES[*]})
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
|
|
|
if [ ${#LOADED_MODULES[*]} -gt 0 ]; then
|
|
|
|
ERROR="Unload these modules with '${PROG} -u':\n"
|
|
|
|
ERROR="${ERROR}${LOADED_MODULES[*]}"
|
|
|
|
return 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [ ${#MISSING_MODULES[*]} -gt 0 ]; then
|
|
|
|
ERROR="The following modules can not be found,"
|
|
|
|
ERROR="${ERROR} ensure your source trees are built:\n"
|
|
|
|
ERROR="${ERROR}${MISSING_MODULES[*]}"
|
|
|
|
return 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
load_module() {
|
|
|
|
local NAME=`basename $1 .ko`
|
|
|
|
|
|
|
|
if [ ${VERBOSE} ]; then
|
|
|
|
echo "Loading ${NAME} ($@)"
|
|
|
|
fi
|
|
|
|
|
2014-04-10 17:44:00 +00:00
|
|
|
${LDMOD} $* &>/dev/null
|
|
|
|
if [ $? -ne 0 ]; then
|
|
|
|
echo "Failed to load ${NAME} ($@)"
|
|
|
|
return 1
|
|
|
|
fi
|
2010-08-26 18:22:58 +00:00
|
|
|
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
load_modules() {
|
|
|
|
mkdir -p /etc/zfs
|
|
|
|
|
2010-11-11 18:00:39 +00:00
|
|
|
for MOD in ${KERNEL_MODULES[*]}; do
|
2014-04-10 17:44:00 +00:00
|
|
|
load_module ${MOD} >/dev/null
|
2010-11-11 18:00:39 +00:00
|
|
|
done
|
|
|
|
|
2010-08-26 18:22:58 +00:00
|
|
|
for MOD in ${MODULES[*]}; do
|
|
|
|
local NAME=`basename ${MOD} .ko`
|
|
|
|
local VALUE=
|
|
|
|
|
|
|
|
for OPT in "$@"; do
|
|
|
|
OPT_NAME=`echo ${OPT} | cut -f1 -d'='`
|
|
|
|
|
|
|
|
if [ ${NAME} = "${OPT_NAME}" ]; then
|
|
|
|
VALUE=`echo ${OPT} | cut -f2- -d'='`
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
|
|
|
load_module ${MOD} ${VALUE} || return 1
|
|
|
|
done
|
|
|
|
|
|
|
|
if [ ${VERBOSE} ]; then
|
|
|
|
echo "Successfully loaded ZFS module stack"
|
|
|
|
fi
|
|
|
|
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
unload_module() {
|
|
|
|
local NAME=`basename $1 .ko`
|
|
|
|
|
|
|
|
if [ ${VERBOSE} ]; then
|
|
|
|
echo "Unloading ${NAME} ($@)"
|
|
|
|
fi
|
|
|
|
|
|
|
|
${RMMOD} ${NAME} || ERROR="Failed to unload ${NAME}" return 1
|
|
|
|
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
unload_modules() {
|
|
|
|
local MODULES_REVERSE=( $(echo ${MODULES[@]} |
|
|
|
|
${AWK} '{for (i=NF;i>=1;i--) printf $i" "} END{print ""}') )
|
|
|
|
|
|
|
|
for MOD in ${MODULES_REVERSE[*]}; do
|
|
|
|
local NAME=`basename ${MOD} .ko`
|
|
|
|
local USE_COUNT=`${LSMOD} |
|
|
|
|
egrep "^${NAME} "| ${AWK} '{print $3}'`
|
|
|
|
|
|
|
|
if [ "${USE_COUNT}" = 0 ] ; then
|
|
|
|
|
|
|
|
if [ "${DUMP_LOG}" -a ${NAME} = "spl" ]; then
|
|
|
|
spl_dump_log
|
|
|
|
fi
|
|
|
|
|
|
|
|
unload_module ${MOD} || return 1
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
|
|
|
if [ ${VERBOSE} ]; then
|
|
|
|
echo "Successfully unloaded ZFS module stack"
|
|
|
|
fi
|
|
|
|
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
Add zfault zpool configurations and tests
Eleven new zpool configurations were added to allow testing of various
failure cases. The first 5 zpool configurations leverage the 'faulty'
md device type which allow us to simuluate IO errors at the block layer.
The last 6 zpool configurations leverage the scsi_debug module provided
by modern kernels. This device allows you to create virtual scsi
devices which are backed by a ram disk. With this setup we can verify
the full IO stack by injecting faults at the lowest layer. Both methods
of fault injection are important to verifying the IO stack.
The zfs code itself also provides a mechanism for error injection
via the zinject command line tool. While we should also take advantage
of this appraoch to validate the code it does not address any of the
Linux integration issues which are the most concerning. For the
moment we're trusting that the upstream Solaris guys are running
zinject and would have caught internal zfs logic errors.
Currently, there are 6 r/w test cases layered on top of the 'faulty'
md devices. They include 3 writes tests for soft/transient errors,
hard/permenant errors, and all writes error to the device. There
are 3 matching read tests for soft/transient errors, hard/permenant
errors, and fixable read error with a write. Although for this last
case zfs doesn't do anything special.
The seventh test case verifies zfs detects and corrects checksum
errors. In this case one of the drives is extensively damaged and
by dd'ing over large sections of it. We then ensure zfs logs the
issue and correctly rebuilds the damage.
The next test cases use the scsi_debug configuration to injects error
at the bottom of the scsi stack. This ensures we find any flaws in the
scsi midlayer or our usage of it. Plus it stresses the device specific
retry, timeout, and error handling outside of zfs's control.
The eighth test case is to verify that the system correctly handles an
intermittent device timeout. Here the scsi_debug device drops 1 in N
requests resulting in a retry either at the block level. The ZFS code
does specify the FAILFAST option but it turns out that for this case
the Linux IO stack with still retry the command. The FAILFAST logic
located in scsi_noretry_cmd() does no seem to apply to the simply
timeout case. It appears to be more targeted to specific device or
transport errors from the lower layers.
The ninth test case handles a persistent failure in which the device
is removed from the system by Linux. The test verifies that the failure
is detected, the device is made unavailable, and then can be successfully
re-add when brought back online. Additionally, it ensures that errors
and events are logged to the correct places and the no data corruption
has occured due to the failure.
2010-09-28 23:32:12 +00:00
|
|
|
#
|
|
|
|
# Check that the mdadm utilities are installed.
|
|
|
|
#
|
|
|
|
check_loop_utils() {
|
|
|
|
test -f ${LOSETUP} || die "${LOSETUP} utility must be installed"
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#
|
2014-04-08 23:46:13 +00:00
|
|
|
# Find and return an unused loop device. A new /dev/loopN node will be
|
|
|
|
# created if required. The kernel loop driver will automatically register
|
|
|
|
# the minor as long as it's less than /sys/module/loop/parameters/max_loop.
|
Add zfault zpool configurations and tests
Eleven new zpool configurations were added to allow testing of various
failure cases. The first 5 zpool configurations leverage the 'faulty'
md device type which allow us to simuluate IO errors at the block layer.
The last 6 zpool configurations leverage the scsi_debug module provided
by modern kernels. This device allows you to create virtual scsi
devices which are backed by a ram disk. With this setup we can verify
the full IO stack by injecting faults at the lowest layer. Both methods
of fault injection are important to verifying the IO stack.
The zfs code itself also provides a mechanism for error injection
via the zinject command line tool. While we should also take advantage
of this appraoch to validate the code it does not address any of the
Linux integration issues which are the most concerning. For the
moment we're trusting that the upstream Solaris guys are running
zinject and would have caught internal zfs logic errors.
Currently, there are 6 r/w test cases layered on top of the 'faulty'
md devices. They include 3 writes tests for soft/transient errors,
hard/permenant errors, and all writes error to the device. There
are 3 matching read tests for soft/transient errors, hard/permenant
errors, and fixable read error with a write. Although for this last
case zfs doesn't do anything special.
The seventh test case verifies zfs detects and corrects checksum
errors. In this case one of the drives is extensively damaged and
by dd'ing over large sections of it. We then ensure zfs logs the
issue and correctly rebuilds the damage.
The next test cases use the scsi_debug configuration to injects error
at the bottom of the scsi stack. This ensures we find any flaws in the
scsi midlayer or our usage of it. Plus it stresses the device specific
retry, timeout, and error handling outside of zfs's control.
The eighth test case is to verify that the system correctly handles an
intermittent device timeout. Here the scsi_debug device drops 1 in N
requests resulting in a retry either at the block level. The ZFS code
does specify the FAILFAST option but it turns out that for this case
the Linux IO stack with still retry the command. The FAILFAST logic
located in scsi_noretry_cmd() does no seem to apply to the simply
timeout case. It appears to be more targeted to specific device or
transport errors from the lower layers.
The ninth test case handles a persistent failure in which the device
is removed from the system by Linux. The test verifies that the failure
is detected, the device is made unavailable, and then can be successfully
re-add when brought back online. Additionally, it ensures that errors
and events are logged to the correct places and the no data corruption
has occured due to the failure.
2010-09-28 23:32:12 +00:00
|
|
|
#
|
2010-08-26 18:22:58 +00:00
|
|
|
unused_loop_device() {
|
2015-07-01 22:23:09 +00:00
|
|
|
local DEVICE=$(${LOSETUP} -f)
|
2014-04-08 23:46:13 +00:00
|
|
|
local MAX_LOOP_PATH="/sys/module/loop/parameters/max_loop"
|
|
|
|
local MAX_LOOP;
|
|
|
|
|
|
|
|
# An existing /dev/loopN device was available.
|
|
|
|
if [ -n "${DEVICE}" ]; then
|
|
|
|
echo "${DEVICE}"
|
|
|
|
return 0
|
|
|
|
fi
|
|
|
|
|
|
|
|
# Create a new /dev/loopN provided we are not at MAX_LOOP.
|
|
|
|
if [ -f "${MAX_LOOP_PATH}" ]; then
|
|
|
|
MAX_LOOP=`cat /sys/module/loop/parameters/max_loop`
|
|
|
|
if [ ${MAX_LOOP} -eq 0 ]; then
|
|
|
|
MAX_LOOP=255
|
2010-08-26 18:22:58 +00:00
|
|
|
fi
|
|
|
|
|
2014-04-08 23:46:13 +00:00
|
|
|
for (( i=0; i<=${MAX_LOOP}; i++ )); do
|
|
|
|
DEVICE="/dev/loop$i"
|
|
|
|
|
|
|
|
if [ -b "${DEVICE}" ]; then
|
|
|
|
continue
|
|
|
|
else
|
|
|
|
mknod -m660 "${DEVICE}" b 7 $i
|
|
|
|
chown root.disk "${DEVICE}"
|
|
|
|
chmod 666 "${DEVICE}"
|
|
|
|
|
|
|
|
echo "${DEVICE}"
|
|
|
|
return 0
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
fi
|
|
|
|
|
|
|
|
die "Error: Unable to create new loopback device"
|
2010-08-26 18:22:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# This can be slightly dangerous because the loop devices we are
|
Add zfault zpool configurations and tests
Eleven new zpool configurations were added to allow testing of various
failure cases. The first 5 zpool configurations leverage the 'faulty'
md device type which allow us to simuluate IO errors at the block layer.
The last 6 zpool configurations leverage the scsi_debug module provided
by modern kernels. This device allows you to create virtual scsi
devices which are backed by a ram disk. With this setup we can verify
the full IO stack by injecting faults at the lowest layer. Both methods
of fault injection are important to verifying the IO stack.
The zfs code itself also provides a mechanism for error injection
via the zinject command line tool. While we should also take advantage
of this appraoch to validate the code it does not address any of the
Linux integration issues which are the most concerning. For the
moment we're trusting that the upstream Solaris guys are running
zinject and would have caught internal zfs logic errors.
Currently, there are 6 r/w test cases layered on top of the 'faulty'
md devices. They include 3 writes tests for soft/transient errors,
hard/permenant errors, and all writes error to the device. There
are 3 matching read tests for soft/transient errors, hard/permenant
errors, and fixable read error with a write. Although for this last
case zfs doesn't do anything special.
The seventh test case verifies zfs detects and corrects checksum
errors. In this case one of the drives is extensively damaged and
by dd'ing over large sections of it. We then ensure zfs logs the
issue and correctly rebuilds the damage.
The next test cases use the scsi_debug configuration to injects error
at the bottom of the scsi stack. This ensures we find any flaws in the
scsi midlayer or our usage of it. Plus it stresses the device specific
retry, timeout, and error handling outside of zfs's control.
The eighth test case is to verify that the system correctly handles an
intermittent device timeout. Here the scsi_debug device drops 1 in N
requests resulting in a retry either at the block level. The ZFS code
does specify the FAILFAST option but it turns out that for this case
the Linux IO stack with still retry the command. The FAILFAST logic
located in scsi_noretry_cmd() does no seem to apply to the simply
timeout case. It appears to be more targeted to specific device or
transport errors from the lower layers.
The ninth test case handles a persistent failure in which the device
is removed from the system by Linux. The test verifies that the failure
is detected, the device is made unavailable, and then can be successfully
re-add when brought back online. Additionally, it ensures that errors
and events are logged to the correct places and the no data corruption
has occured due to the failure.
2010-09-28 23:32:12 +00:00
|
|
|
# cleaning up may not be ours. However, if the devices are currently
|
2010-08-26 18:22:58 +00:00
|
|
|
# in use we will not be able to remove them, and we only remove
|
2014-04-08 23:46:13 +00:00
|
|
|
# devices which include 'zpool' or 'deleted' in the name. So any
|
|
|
|
# damage we might do should be limited to other zfs related testing.
|
2010-08-26 18:22:58 +00:00
|
|
|
#
|
|
|
|
cleanup_loop_devices() {
|
|
|
|
local TMP_FILE=`mktemp`
|
|
|
|
|
|
|
|
${LOSETUP} -a | tr -d '()' >${TMP_FILE}
|
|
|
|
${AWK} -F":" -v losetup="$LOSETUP" \
|
2014-04-08 23:46:13 +00:00
|
|
|
'/zpool/ || /deleted/ { system("losetup -d "$1) }' ${TMP_FILE}
|
|
|
|
${AWK} -F" " '/zpool/ || /deleted/ { system("rm -f "$3) }' ${TMP_FILE}
|
2010-08-26 18:22:58 +00:00
|
|
|
|
|
|
|
rm -f ${TMP_FILE}
|
|
|
|
}
|
|
|
|
|
Add zfault zpool configurations and tests
Eleven new zpool configurations were added to allow testing of various
failure cases. The first 5 zpool configurations leverage the 'faulty'
md device type which allow us to simuluate IO errors at the block layer.
The last 6 zpool configurations leverage the scsi_debug module provided
by modern kernels. This device allows you to create virtual scsi
devices which are backed by a ram disk. With this setup we can verify
the full IO stack by injecting faults at the lowest layer. Both methods
of fault injection are important to verifying the IO stack.
The zfs code itself also provides a mechanism for error injection
via the zinject command line tool. While we should also take advantage
of this appraoch to validate the code it does not address any of the
Linux integration issues which are the most concerning. For the
moment we're trusting that the upstream Solaris guys are running
zinject and would have caught internal zfs logic errors.
Currently, there are 6 r/w test cases layered on top of the 'faulty'
md devices. They include 3 writes tests for soft/transient errors,
hard/permenant errors, and all writes error to the device. There
are 3 matching read tests for soft/transient errors, hard/permenant
errors, and fixable read error with a write. Although for this last
case zfs doesn't do anything special.
The seventh test case verifies zfs detects and corrects checksum
errors. In this case one of the drives is extensively damaged and
by dd'ing over large sections of it. We then ensure zfs logs the
issue and correctly rebuilds the damage.
The next test cases use the scsi_debug configuration to injects error
at the bottom of the scsi stack. This ensures we find any flaws in the
scsi midlayer or our usage of it. Plus it stresses the device specific
retry, timeout, and error handling outside of zfs's control.
The eighth test case is to verify that the system correctly handles an
intermittent device timeout. Here the scsi_debug device drops 1 in N
requests resulting in a retry either at the block level. The ZFS code
does specify the FAILFAST option but it turns out that for this case
the Linux IO stack with still retry the command. The FAILFAST logic
located in scsi_noretry_cmd() does no seem to apply to the simply
timeout case. It appears to be more targeted to specific device or
transport errors from the lower layers.
The ninth test case handles a persistent failure in which the device
is removed from the system by Linux. The test verifies that the failure
is detected, the device is made unavailable, and then can be successfully
re-add when brought back online. Additionally, it ensures that errors
and events are logged to the correct places and the no data corruption
has occured due to the failure.
2010-09-28 23:32:12 +00:00
|
|
|
#
|
|
|
|
# Destroy the passed loopback devices, this is used when you know
|
|
|
|
# the names of the loopback devices.
|
|
|
|
#
|
|
|
|
destroy_loop_devices() {
|
|
|
|
local LODEVICES="$1"
|
|
|
|
|
|
|
|
msg "Destroying ${LODEVICES}"
|
2015-07-01 22:23:09 +00:00
|
|
|
${LOSETUP} -d ${LODEVICES} || \
|
Add zfault zpool configurations and tests
Eleven new zpool configurations were added to allow testing of various
failure cases. The first 5 zpool configurations leverage the 'faulty'
md device type which allow us to simuluate IO errors at the block layer.
The last 6 zpool configurations leverage the scsi_debug module provided
by modern kernels. This device allows you to create virtual scsi
devices which are backed by a ram disk. With this setup we can verify
the full IO stack by injecting faults at the lowest layer. Both methods
of fault injection are important to verifying the IO stack.
The zfs code itself also provides a mechanism for error injection
via the zinject command line tool. While we should also take advantage
of this appraoch to validate the code it does not address any of the
Linux integration issues which are the most concerning. For the
moment we're trusting that the upstream Solaris guys are running
zinject and would have caught internal zfs logic errors.
Currently, there are 6 r/w test cases layered on top of the 'faulty'
md devices. They include 3 writes tests for soft/transient errors,
hard/permenant errors, and all writes error to the device. There
are 3 matching read tests for soft/transient errors, hard/permenant
errors, and fixable read error with a write. Although for this last
case zfs doesn't do anything special.
The seventh test case verifies zfs detects and corrects checksum
errors. In this case one of the drives is extensively damaged and
by dd'ing over large sections of it. We then ensure zfs logs the
issue and correctly rebuilds the damage.
The next test cases use the scsi_debug configuration to injects error
at the bottom of the scsi stack. This ensures we find any flaws in the
scsi midlayer or our usage of it. Plus it stresses the device specific
retry, timeout, and error handling outside of zfs's control.
The eighth test case is to verify that the system correctly handles an
intermittent device timeout. Here the scsi_debug device drops 1 in N
requests resulting in a retry either at the block level. The ZFS code
does specify the FAILFAST option but it turns out that for this case
the Linux IO stack with still retry the command. The FAILFAST logic
located in scsi_noretry_cmd() does no seem to apply to the simply
timeout case. It appears to be more targeted to specific device or
transport errors from the lower layers.
The ninth test case handles a persistent failure in which the device
is removed from the system by Linux. The test verifies that the failure
is detected, the device is made unavailable, and then can be successfully
re-add when brought back online. Additionally, it ensures that errors
and events are logged to the correct places and the no data corruption
has occured due to the failure.
2010-09-28 23:32:12 +00:00
|
|
|
die "Error $? destroying ${FILE} -> ${DEVICE} loopback"
|
|
|
|
|
|
|
|
rm -f ${FILES}
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
2012-02-09 18:38:03 +00:00
|
|
|
#
|
2013-12-06 22:20:22 +00:00
|
|
|
# Create a device label taking care to briefly wait if udev needs to settle.
|
2012-02-09 18:38:03 +00:00
|
|
|
#
|
|
|
|
label() {
|
|
|
|
local DEVICE=$1
|
|
|
|
local LABEL=$2
|
|
|
|
|
2013-12-06 22:20:22 +00:00
|
|
|
wait_udev ${DEVICE} 30 || return 1
|
|
|
|
${PARTED} ${DEVICE} --script -- mklabel ${LABEL} || return 2
|
2012-02-09 18:38:03 +00:00
|
|
|
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Create a primary partition on a block device.
|
|
|
|
#
|
|
|
|
partition() {
|
|
|
|
local DEVICE=$1
|
|
|
|
local TYPE=$2
|
|
|
|
local START=$3
|
|
|
|
local END=$4
|
|
|
|
|
|
|
|
${PARTED} --align optimal ${DEVICE} --script -- \
|
|
|
|
mkpart ${TYPE} ${START} ${END} || return 1
|
|
|
|
udev_trigger
|
|
|
|
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Create a filesystem on the block device
|
|
|
|
#
|
|
|
|
format() {
|
|
|
|
local DEVICE=$1
|
|
|
|
local FSTYPE=$2
|
|
|
|
|
2012-09-20 10:00:50 +00:00
|
|
|
# Force 4K blocksize, else mkfs.ext2 tries to use 8K, which
|
|
|
|
# won't mount
|
2013-01-28 22:49:12 +00:00
|
|
|
/sbin/mkfs.${FSTYPE} -b 4096 -F -q ${DEVICE} >/dev/null || return 1
|
2012-02-09 18:38:03 +00:00
|
|
|
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
Add zfault zpool configurations and tests
Eleven new zpool configurations were added to allow testing of various
failure cases. The first 5 zpool configurations leverage the 'faulty'
md device type which allow us to simuluate IO errors at the block layer.
The last 6 zpool configurations leverage the scsi_debug module provided
by modern kernels. This device allows you to create virtual scsi
devices which are backed by a ram disk. With this setup we can verify
the full IO stack by injecting faults at the lowest layer. Both methods
of fault injection are important to verifying the IO stack.
The zfs code itself also provides a mechanism for error injection
via the zinject command line tool. While we should also take advantage
of this appraoch to validate the code it does not address any of the
Linux integration issues which are the most concerning. For the
moment we're trusting that the upstream Solaris guys are running
zinject and would have caught internal zfs logic errors.
Currently, there are 6 r/w test cases layered on top of the 'faulty'
md devices. They include 3 writes tests for soft/transient errors,
hard/permenant errors, and all writes error to the device. There
are 3 matching read tests for soft/transient errors, hard/permenant
errors, and fixable read error with a write. Although for this last
case zfs doesn't do anything special.
The seventh test case verifies zfs detects and corrects checksum
errors. In this case one of the drives is extensively damaged and
by dd'ing over large sections of it. We then ensure zfs logs the
issue and correctly rebuilds the damage.
The next test cases use the scsi_debug configuration to injects error
at the bottom of the scsi stack. This ensures we find any flaws in the
scsi midlayer or our usage of it. Plus it stresses the device specific
retry, timeout, and error handling outside of zfs's control.
The eighth test case is to verify that the system correctly handles an
intermittent device timeout. Here the scsi_debug device drops 1 in N
requests resulting in a retry either at the block level. The ZFS code
does specify the FAILFAST option but it turns out that for this case
the Linux IO stack with still retry the command. The FAILFAST logic
located in scsi_noretry_cmd() does no seem to apply to the simply
timeout case. It appears to be more targeted to specific device or
transport errors from the lower layers.
The ninth test case handles a persistent failure in which the device
is removed from the system by Linux. The test verifies that the failure
is detected, the device is made unavailable, and then can be successfully
re-add when brought back online. Additionally, it ensures that errors
and events are logged to the correct places and the no data corruption
has occured due to the failure.
2010-09-28 23:32:12 +00:00
|
|
|
#
|
|
|
|
# Check that the mdadm utilities are installed.
|
|
|
|
#
|
|
|
|
check_md_utils() {
|
|
|
|
test -f ${MDADM} || die "${MDADM} utility must be installed"
|
|
|
|
test -f ${PARTED} || die "${PARTED} utility must be installed"
|
|
|
|
}
|
|
|
|
|
|
|
|
check_md_partitionable() {
|
|
|
|
local LOFILE=`mktemp -p /tmp zpool-lo.XXXXXXXX`
|
|
|
|
local LODEVICE=`unused_loop_device`
|
|
|
|
local MDDEVICE=`unused_md_device`
|
|
|
|
local RESULT=1
|
|
|
|
|
|
|
|
check_md_utils
|
|
|
|
|
|
|
|
rm -f ${LOFILE}
|
|
|
|
dd if=/dev/zero of=${LOFILE} bs=1M count=0 seek=16 \
|
|
|
|
&>/dev/null || return ${RESULT}
|
|
|
|
|
|
|
|
msg "Creating ${LODEVICE} using ${LOFILE}"
|
|
|
|
${LOSETUP} ${LODEVICE} ${LOFILE}
|
|
|
|
if [ $? -ne 0 ]; then
|
|
|
|
rm -f ${LOFILE}
|
|
|
|
return ${RESULT}
|
|
|
|
fi
|
|
|
|
|
|
|
|
msg "Creating ${MDDEVICE} using ${LODEVICE}"
|
|
|
|
${MDADM} --build ${MDDEVICE} --level=faulty \
|
|
|
|
--raid-devices=1 ${LODEVICE} &>/dev/null
|
|
|
|
if [ $? -ne 0 ]; then
|
|
|
|
destroy_loop_devices ${LODEVICE}
|
|
|
|
rm -f ${LOFILE}
|
|
|
|
return ${RESULT}
|
|
|
|
fi
|
|
|
|
wait_udev ${MDDEVICE} 30
|
|
|
|
|
|
|
|
${BLOCKDEV} --rereadpt ${MDDEVICE} 2>/dev/null
|
|
|
|
RESULT=$?
|
|
|
|
|
|
|
|
destroy_md_devices ${MDDEVICE}
|
|
|
|
destroy_loop_devices ${LODEVICE}
|
|
|
|
rm -f ${LOFILE}
|
|
|
|
|
|
|
|
return ${RESULT}
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Find and return an unused md device.
|
|
|
|
#
|
|
|
|
unused_md_device() {
|
|
|
|
for (( i=0; i<32; i++ )); do
|
|
|
|
MDDEVICE=md${i}
|
|
|
|
|
|
|
|
# Skip active devicesudo in /proc/mdstat.
|
|
|
|
grep -q "${MDDEVICE} " /proc/mdstat && continue
|
|
|
|
|
|
|
|
# Device doesn't exist, use it.
|
|
|
|
if [ ! -e $/dev/{MDDEVICE} ]; then
|
|
|
|
echo /dev/${MDDEVICE}
|
|
|
|
return
|
|
|
|
fi
|
|
|
|
|
|
|
|
# Device exists but may not be in use.
|
|
|
|
if [ -b /dev/${MDDEVICE} ]; then
|
|
|
|
${MDADM} --detail /dev/${MDDEVICE} &>/dev/null
|
|
|
|
if [ $? -eq 1 ]; then
|
|
|
|
echo /dev/${MDDEVICE}
|
|
|
|
return
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
|
|
|
die "Error: Unable to find unused md device"
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# This can be slightly dangerous because it is possible the md devices
|
|
|
|
# we are cleaning up may not be ours. However, if the devices are
|
|
|
|
# currently in use we will not be able to remove them, and even if
|
|
|
|
# we remove devices which were not out we do not zero the super block
|
|
|
|
# so you should be able to reconstruct them.
|
|
|
|
#
|
|
|
|
cleanup_md_devices() {
|
|
|
|
destroy_md_devices "`ls /dev/md* 2>/dev/null | grep -v p`"
|
|
|
|
udev_trigger
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Destroy the passed md devices, this is used when you know
|
|
|
|
# the names of the md devices.
|
|
|
|
#
|
|
|
|
destroy_md_devices() {
|
|
|
|
local MDDEVICES="$1"
|
|
|
|
|
|
|
|
msg "Destroying ${MDDEVICES}"
|
|
|
|
for MDDEVICE in ${MDDEVICES}; do
|
|
|
|
${MDADM} --stop ${MDDEVICE} &>/dev/null
|
|
|
|
${MDADM} --remove ${MDDEVICE} &>/dev/null
|
|
|
|
${MDADM} --detail ${MDDEVICE} &>/dev/null
|
|
|
|
done
|
|
|
|
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Check that the scsi utilities are installed.
|
|
|
|
#
|
|
|
|
check_sd_utils() {
|
|
|
|
${INFOMOD} scsi_debug &>/dev/null || die "scsi_debug module required"
|
|
|
|
test -f ${LSSCSI} || die "${LSSCSI} utility must be installed"
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Rescan the scsi bus for scsi_debug devices. It is preferable to use the
|
|
|
|
# scsi-rescan tool if it is installed, but if it's not we can fall back to
|
|
|
|
# removing and readding the device manually. This rescan will only effect
|
|
|
|
# the first scsi_debug device if scsi-rescan is missing.
|
|
|
|
#
|
|
|
|
scsi_rescan() {
|
|
|
|
local AWK_SCRIPT="/scsi_debug/ { print \$1; exit }"
|
|
|
|
|
|
|
|
if [ -f ${SCSIRESCAN} ]; then
|
|
|
|
${SCSIRESCAN} --forcerescan --remove &>/dev/null
|
|
|
|
else
|
|
|
|
local SCSIID=`${LSSCSI} | ${AWK} "${AWK_SCRIPT}" | tr -d '[]'`
|
|
|
|
local SCSIHOST=`echo ${SCSIID} | cut -f1 -d':'`
|
|
|
|
echo 1 >"/sys/class/scsi_device/${SCSIID}/device/delete"
|
|
|
|
udev_trigger
|
|
|
|
echo "- - -" >/sys/class/scsi_host/host${SCSIHOST}/scan
|
|
|
|
udev_trigger
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Trigger udev and wait for it to settle.
|
|
|
|
#
|
|
|
|
udev_trigger() {
|
|
|
|
if [ -f ${UDEVADM} ]; then
|
2011-04-01 16:47:05 +00:00
|
|
|
${UDEVADM} trigger --action=change --subsystem-match=block
|
Add zfault zpool configurations and tests
Eleven new zpool configurations were added to allow testing of various
failure cases. The first 5 zpool configurations leverage the 'faulty'
md device type which allow us to simuluate IO errors at the block layer.
The last 6 zpool configurations leverage the scsi_debug module provided
by modern kernels. This device allows you to create virtual scsi
devices which are backed by a ram disk. With this setup we can verify
the full IO stack by injecting faults at the lowest layer. Both methods
of fault injection are important to verifying the IO stack.
The zfs code itself also provides a mechanism for error injection
via the zinject command line tool. While we should also take advantage
of this appraoch to validate the code it does not address any of the
Linux integration issues which are the most concerning. For the
moment we're trusting that the upstream Solaris guys are running
zinject and would have caught internal zfs logic errors.
Currently, there are 6 r/w test cases layered on top of the 'faulty'
md devices. They include 3 writes tests for soft/transient errors,
hard/permenant errors, and all writes error to the device. There
are 3 matching read tests for soft/transient errors, hard/permenant
errors, and fixable read error with a write. Although for this last
case zfs doesn't do anything special.
The seventh test case verifies zfs detects and corrects checksum
errors. In this case one of the drives is extensively damaged and
by dd'ing over large sections of it. We then ensure zfs logs the
issue and correctly rebuilds the damage.
The next test cases use the scsi_debug configuration to injects error
at the bottom of the scsi stack. This ensures we find any flaws in the
scsi midlayer or our usage of it. Plus it stresses the device specific
retry, timeout, and error handling outside of zfs's control.
The eighth test case is to verify that the system correctly handles an
intermittent device timeout. Here the scsi_debug device drops 1 in N
requests resulting in a retry either at the block level. The ZFS code
does specify the FAILFAST option but it turns out that for this case
the Linux IO stack with still retry the command. The FAILFAST logic
located in scsi_noretry_cmd() does no seem to apply to the simply
timeout case. It appears to be more targeted to specific device or
transport errors from the lower layers.
The ninth test case handles a persistent failure in which the device
is removed from the system by Linux. The test verifies that the failure
is detected, the device is made unavailable, and then can be successfully
re-add when brought back online. Additionally, it ensures that errors
and events are logged to the correct places and the no data corruption
has occured due to the failure.
2010-09-28 23:32:12 +00:00
|
|
|
${UDEVADM} settle
|
|
|
|
else
|
|
|
|
/sbin/udevtrigger
|
|
|
|
/sbin/udevsettle
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
2010-08-26 18:22:58 +00:00
|
|
|
#
|
|
|
|
# The following udev helper functions assume that the provided
|
2013-01-29 18:53:19 +00:00
|
|
|
# udev rules file will create a /dev/disk/by-vdev/<CHANNEL><RANK>
|
2010-08-26 18:22:58 +00:00
|
|
|
# disk mapping. In this mapping each CHANNEL is represented by
|
|
|
|
# the letters a-z, and the RANK is represented by the numbers
|
|
|
|
# 1-n. A CHANNEL should identify a group of RANKS which are all
|
|
|
|
# attached to a single controller, each RANK represents a disk.
|
|
|
|
# This provides a simply mechanism to locate a specific drive
|
|
|
|
# given a known hardware configuration.
|
|
|
|
#
|
|
|
|
udev_setup() {
|
|
|
|
local SRC_PATH=$1
|
|
|
|
|
|
|
|
# When running in tree manually contruct symlinks in tree to
|
|
|
|
# the proper devices. Symlinks are installed for all entires
|
|
|
|
# in the config file regardless of if that device actually
|
|
|
|
# exists. When installed as a package udev can be relied on for
|
|
|
|
# this and it will only create links for devices which exist.
|
|
|
|
if [ ${INTREE} ]; then
|
|
|
|
PWD=`pwd`
|
|
|
|
mkdir -p ${DEVDIR}/
|
|
|
|
cd ${DEVDIR}/
|
|
|
|
${AWK} '!/^#/ && /./ { system( \
|
|
|
|
"ln -f -s /dev/disk/by-path/"$2" "$1";" \
|
|
|
|
"ln -f -s /dev/disk/by-path/"$2"-part1 "$1"p1;" \
|
|
|
|
"ln -f -s /dev/disk/by-path/"$2"-part9 "$1"p9;" \
|
|
|
|
) }' $SRC_PATH
|
|
|
|
cd ${PWD}
|
|
|
|
else
|
|
|
|
DST_FILE=`basename ${SRC_PATH} | cut -f1-2 -d'.'`
|
|
|
|
DST_PATH=/etc/zfs/${DST_FILE}
|
|
|
|
|
|
|
|
if [ -e ${DST_PATH} ]; then
|
|
|
|
die "Error: Config ${DST_PATH} already exists"
|
|
|
|
fi
|
|
|
|
|
|
|
|
cp ${SRC_PATH} ${DST_PATH}
|
Add zfault zpool configurations and tests
Eleven new zpool configurations were added to allow testing of various
failure cases. The first 5 zpool configurations leverage the 'faulty'
md device type which allow us to simuluate IO errors at the block layer.
The last 6 zpool configurations leverage the scsi_debug module provided
by modern kernels. This device allows you to create virtual scsi
devices which are backed by a ram disk. With this setup we can verify
the full IO stack by injecting faults at the lowest layer. Both methods
of fault injection are important to verifying the IO stack.
The zfs code itself also provides a mechanism for error injection
via the zinject command line tool. While we should also take advantage
of this appraoch to validate the code it does not address any of the
Linux integration issues which are the most concerning. For the
moment we're trusting that the upstream Solaris guys are running
zinject and would have caught internal zfs logic errors.
Currently, there are 6 r/w test cases layered on top of the 'faulty'
md devices. They include 3 writes tests for soft/transient errors,
hard/permenant errors, and all writes error to the device. There
are 3 matching read tests for soft/transient errors, hard/permenant
errors, and fixable read error with a write. Although for this last
case zfs doesn't do anything special.
The seventh test case verifies zfs detects and corrects checksum
errors. In this case one of the drives is extensively damaged and
by dd'ing over large sections of it. We then ensure zfs logs the
issue and correctly rebuilds the damage.
The next test cases use the scsi_debug configuration to injects error
at the bottom of the scsi stack. This ensures we find any flaws in the
scsi midlayer or our usage of it. Plus it stresses the device specific
retry, timeout, and error handling outside of zfs's control.
The eighth test case is to verify that the system correctly handles an
intermittent device timeout. Here the scsi_debug device drops 1 in N
requests resulting in a retry either at the block level. The ZFS code
does specify the FAILFAST option but it turns out that for this case
the Linux IO stack with still retry the command. The FAILFAST logic
located in scsi_noretry_cmd() does no seem to apply to the simply
timeout case. It appears to be more targeted to specific device or
transport errors from the lower layers.
The ninth test case handles a persistent failure in which the device
is removed from the system by Linux. The test verifies that the failure
is detected, the device is made unavailable, and then can be successfully
re-add when brought back online. Additionally, it ensures that errors
and events are logged to the correct places and the no data corruption
has occured due to the failure.
2010-09-28 23:32:12 +00:00
|
|
|
udev_trigger
|
2010-08-26 18:22:58 +00:00
|
|
|
fi
|
|
|
|
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
udev_cleanup() {
|
|
|
|
local SRC_PATH=$1
|
|
|
|
|
|
|
|
if [ ${INTREE} ]; then
|
|
|
|
PWD=`pwd`
|
|
|
|
cd ${DEVDIR}/
|
|
|
|
${AWK} '!/^#/ && /./ { system( \
|
|
|
|
"rm -f "$1" "$1"p1 "$1"p9") }' $SRC_PATH
|
|
|
|
cd ${PWD}
|
|
|
|
fi
|
|
|
|
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
udev_cr2d() {
|
|
|
|
local CHANNEL=`echo "obase=16; $1+96" | bc`
|
|
|
|
local RANK=$2
|
|
|
|
|
|
|
|
printf "\x${CHANNEL}${RANK}"
|
|
|
|
}
|
|
|
|
|
|
|
|
udev_raid0_setup() {
|
|
|
|
local RANKS=$1
|
|
|
|
local CHANNELS=$2
|
|
|
|
local IDX=0
|
|
|
|
|
|
|
|
RAID0S=()
|
|
|
|
for RANK in `seq 1 ${RANKS}`; do
|
|
|
|
for CHANNEL in `seq 1 ${CHANNELS}`; do
|
|
|
|
DISK=`udev_cr2d ${CHANNEL} ${RANK}`
|
|
|
|
RAID0S[${IDX}]="${DEVDIR}/${DISK}"
|
|
|
|
let IDX=IDX+1
|
|
|
|
done
|
|
|
|
done
|
|
|
|
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
udev_raid10_setup() {
|
|
|
|
local RANKS=$1
|
|
|
|
local CHANNELS=$2
|
|
|
|
local IDX=0
|
|
|
|
|
|
|
|
RAID10S=()
|
|
|
|
for RANK in `seq 1 ${RANKS}`; do
|
|
|
|
for CHANNEL1 in `seq 1 2 ${CHANNELS}`; do
|
|
|
|
let CHANNEL2=CHANNEL1+1
|
|
|
|
DISK1=`udev_cr2d ${CHANNEL1} ${RANK}`
|
|
|
|
DISK2=`udev_cr2d ${CHANNEL2} ${RANK}`
|
|
|
|
GROUP="${DEVDIR}/${DISK1} ${DEVDIR}/${DISK2}"
|
|
|
|
RAID10S[${IDX}]="mirror ${GROUP}"
|
|
|
|
let IDX=IDX+1
|
|
|
|
done
|
|
|
|
done
|
|
|
|
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
udev_raidz_setup() {
|
|
|
|
local RANKS=$1
|
|
|
|
local CHANNELS=$2
|
|
|
|
|
|
|
|
RAIDZS=()
|
|
|
|
for RANK in `seq 1 ${RANKS}`; do
|
|
|
|
RAIDZ=("raidz")
|
|
|
|
|
|
|
|
for CHANNEL in `seq 1 ${CHANNELS}`; do
|
|
|
|
DISK=`udev_cr2d ${CHANNEL} ${RANK}`
|
|
|
|
RAIDZ[${CHANNEL}]="${DEVDIR}/${DISK}"
|
|
|
|
done
|
|
|
|
|
|
|
|
RAIDZS[${RANK}]="${RAIDZ[*]}"
|
|
|
|
done
|
|
|
|
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
udev_raidz2_setup() {
|
|
|
|
local RANKS=$1
|
|
|
|
local CHANNELS=$2
|
|
|
|
|
|
|
|
RAIDZ2S=()
|
|
|
|
for RANK in `seq 1 ${RANKS}`; do
|
|
|
|
RAIDZ2=("raidz2")
|
|
|
|
|
|
|
|
for CHANNEL in `seq 1 ${CHANNELS}`; do
|
|
|
|
DISK=`udev_cr2d ${CHANNEL} ${RANK}`
|
|
|
|
RAIDZ2[${CHANNEL}]="${DEVDIR}/${DISK}"
|
|
|
|
done
|
|
|
|
|
|
|
|
RAIDZ2S[${RANK}]="${RAIDZ2[*]}"
|
|
|
|
done
|
|
|
|
|
|
|
|
return 0
|
|
|
|
}
|
2010-08-26 18:44:39 +00:00
|
|
|
|
|
|
|
run_one_test() {
|
|
|
|
local TEST_NUM=$1
|
|
|
|
local TEST_NAME=$2
|
|
|
|
|
Add zfault zpool configurations and tests
Eleven new zpool configurations were added to allow testing of various
failure cases. The first 5 zpool configurations leverage the 'faulty'
md device type which allow us to simuluate IO errors at the block layer.
The last 6 zpool configurations leverage the scsi_debug module provided
by modern kernels. This device allows you to create virtual scsi
devices which are backed by a ram disk. With this setup we can verify
the full IO stack by injecting faults at the lowest layer. Both methods
of fault injection are important to verifying the IO stack.
The zfs code itself also provides a mechanism for error injection
via the zinject command line tool. While we should also take advantage
of this appraoch to validate the code it does not address any of the
Linux integration issues which are the most concerning. For the
moment we're trusting that the upstream Solaris guys are running
zinject and would have caught internal zfs logic errors.
Currently, there are 6 r/w test cases layered on top of the 'faulty'
md devices. They include 3 writes tests for soft/transient errors,
hard/permenant errors, and all writes error to the device. There
are 3 matching read tests for soft/transient errors, hard/permenant
errors, and fixable read error with a write. Although for this last
case zfs doesn't do anything special.
The seventh test case verifies zfs detects and corrects checksum
errors. In this case one of the drives is extensively damaged and
by dd'ing over large sections of it. We then ensure zfs logs the
issue and correctly rebuilds the damage.
The next test cases use the scsi_debug configuration to injects error
at the bottom of the scsi stack. This ensures we find any flaws in the
scsi midlayer or our usage of it. Plus it stresses the device specific
retry, timeout, and error handling outside of zfs's control.
The eighth test case is to verify that the system correctly handles an
intermittent device timeout. Here the scsi_debug device drops 1 in N
requests resulting in a retry either at the block level. The ZFS code
does specify the FAILFAST option but it turns out that for this case
the Linux IO stack with still retry the command. The FAILFAST logic
located in scsi_noretry_cmd() does no seem to apply to the simply
timeout case. It appears to be more targeted to specific device or
transport errors from the lower layers.
The ninth test case handles a persistent failure in which the device
is removed from the system by Linux. The test verifies that the failure
is detected, the device is made unavailable, and then can be successfully
re-add when brought back online. Additionally, it ensures that errors
and events are logged to the correct places and the no data corruption
has occured due to the failure.
2010-09-28 23:32:12 +00:00
|
|
|
printf "%-4d %-34s " ${TEST_NUM} "${TEST_NAME}"
|
2010-08-26 18:44:39 +00:00
|
|
|
test_${TEST_NUM}
|
|
|
|
}
|
|
|
|
|
|
|
|
skip_one_test() {
|
|
|
|
local TEST_NUM=$1
|
|
|
|
local TEST_NAME=$2
|
|
|
|
|
Add zfault zpool configurations and tests
Eleven new zpool configurations were added to allow testing of various
failure cases. The first 5 zpool configurations leverage the 'faulty'
md device type which allow us to simuluate IO errors at the block layer.
The last 6 zpool configurations leverage the scsi_debug module provided
by modern kernels. This device allows you to create virtual scsi
devices which are backed by a ram disk. With this setup we can verify
the full IO stack by injecting faults at the lowest layer. Both methods
of fault injection are important to verifying the IO stack.
The zfs code itself also provides a mechanism for error injection
via the zinject command line tool. While we should also take advantage
of this appraoch to validate the code it does not address any of the
Linux integration issues which are the most concerning. For the
moment we're trusting that the upstream Solaris guys are running
zinject and would have caught internal zfs logic errors.
Currently, there are 6 r/w test cases layered on top of the 'faulty'
md devices. They include 3 writes tests for soft/transient errors,
hard/permenant errors, and all writes error to the device. There
are 3 matching read tests for soft/transient errors, hard/permenant
errors, and fixable read error with a write. Although for this last
case zfs doesn't do anything special.
The seventh test case verifies zfs detects and corrects checksum
errors. In this case one of the drives is extensively damaged and
by dd'ing over large sections of it. We then ensure zfs logs the
issue and correctly rebuilds the damage.
The next test cases use the scsi_debug configuration to injects error
at the bottom of the scsi stack. This ensures we find any flaws in the
scsi midlayer or our usage of it. Plus it stresses the device specific
retry, timeout, and error handling outside of zfs's control.
The eighth test case is to verify that the system correctly handles an
intermittent device timeout. Here the scsi_debug device drops 1 in N
requests resulting in a retry either at the block level. The ZFS code
does specify the FAILFAST option but it turns out that for this case
the Linux IO stack with still retry the command. The FAILFAST logic
located in scsi_noretry_cmd() does no seem to apply to the simply
timeout case. It appears to be more targeted to specific device or
transport errors from the lower layers.
The ninth test case handles a persistent failure in which the device
is removed from the system by Linux. The test verifies that the failure
is detected, the device is made unavailable, and then can be successfully
re-add when brought back online. Additionally, it ensures that errors
and events are logged to the correct places and the no data corruption
has occured due to the failure.
2010-09-28 23:32:12 +00:00
|
|
|
printf "%-4d %-34s " ${TEST_NUM} "${TEST_NAME}"
|
2010-08-26 18:44:39 +00:00
|
|
|
skip
|
|
|
|
}
|
|
|
|
|
|
|
|
run_test() {
|
|
|
|
local TEST_NUM=$1
|
|
|
|
local TEST_NAME=$2
|
|
|
|
|
|
|
|
for i in ${TESTS_SKIP[@]}; do
|
|
|
|
if [[ $i == ${TEST_NUM} ]] ; then
|
|
|
|
skip_one_test ${TEST_NUM} "${TEST_NAME}"
|
|
|
|
return 0
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
|
|
|
if [ "${TESTS_RUN[0]}" = "*" ]; then
|
|
|
|
run_one_test ${TEST_NUM} "${TEST_NAME}"
|
|
|
|
else
|
|
|
|
for i in ${TESTS_RUN[@]}; do
|
|
|
|
if [[ $i == ${TEST_NUM} ]] ; then
|
|
|
|
run_one_test ${TEST_NUM} "${TEST_NAME}"
|
|
|
|
return 0
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
|
|
|
skip_one_test ${TEST_NUM} "${TEST_NAME}"
|
|
|
|
fi
|
|
|
|
}
|
2010-09-11 04:44:17 +00:00
|
|
|
|
|
|
|
wait_udev() {
|
|
|
|
local DEVICE=$1
|
|
|
|
local DELAY=$2
|
|
|
|
local COUNT=0
|
|
|
|
|
Add zfault zpool configurations and tests
Eleven new zpool configurations were added to allow testing of various
failure cases. The first 5 zpool configurations leverage the 'faulty'
md device type which allow us to simuluate IO errors at the block layer.
The last 6 zpool configurations leverage the scsi_debug module provided
by modern kernels. This device allows you to create virtual scsi
devices which are backed by a ram disk. With this setup we can verify
the full IO stack by injecting faults at the lowest layer. Both methods
of fault injection are important to verifying the IO stack.
The zfs code itself also provides a mechanism for error injection
via the zinject command line tool. While we should also take advantage
of this appraoch to validate the code it does not address any of the
Linux integration issues which are the most concerning. For the
moment we're trusting that the upstream Solaris guys are running
zinject and would have caught internal zfs logic errors.
Currently, there are 6 r/w test cases layered on top of the 'faulty'
md devices. They include 3 writes tests for soft/transient errors,
hard/permenant errors, and all writes error to the device. There
are 3 matching read tests for soft/transient errors, hard/permenant
errors, and fixable read error with a write. Although for this last
case zfs doesn't do anything special.
The seventh test case verifies zfs detects and corrects checksum
errors. In this case one of the drives is extensively damaged and
by dd'ing over large sections of it. We then ensure zfs logs the
issue and correctly rebuilds the damage.
The next test cases use the scsi_debug configuration to injects error
at the bottom of the scsi stack. This ensures we find any flaws in the
scsi midlayer or our usage of it. Plus it stresses the device specific
retry, timeout, and error handling outside of zfs's control.
The eighth test case is to verify that the system correctly handles an
intermittent device timeout. Here the scsi_debug device drops 1 in N
requests resulting in a retry either at the block level. The ZFS code
does specify the FAILFAST option but it turns out that for this case
the Linux IO stack with still retry the command. The FAILFAST logic
located in scsi_noretry_cmd() does no seem to apply to the simply
timeout case. It appears to be more targeted to specific device or
transport errors from the lower layers.
The ninth test case handles a persistent failure in which the device
is removed from the system by Linux. The test verifies that the failure
is detected, the device is made unavailable, and then can be successfully
re-add when brought back online. Additionally, it ensures that errors
and events are logged to the correct places and the no data corruption
has occured due to the failure.
2010-09-28 23:32:12 +00:00
|
|
|
udev_trigger
|
2010-09-11 04:44:17 +00:00
|
|
|
while [ ! -e ${DEVICE} ]; do
|
|
|
|
if [ ${COUNT} -gt ${DELAY} ]; then
|
|
|
|
return 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
let COUNT=${COUNT}+1
|
|
|
|
sleep 1
|
|
|
|
done
|
|
|
|
|
|
|
|
return 0
|
|
|
|
}
|
2011-06-12 05:48:49 +00:00
|
|
|
|
|
|
|
stack_clear() {
|
|
|
|
local STACK_MAX_SIZE=/sys/kernel/debug/tracing/stack_max_size
|
|
|
|
local STACK_TRACER_ENABLED=/proc/sys/kernel/stack_tracer_enabled
|
|
|
|
|
|
|
|
if [ -e $STACK_MAX_SIZE ]; then
|
|
|
|
echo 1 >$STACK_TRACER_ENABLED
|
|
|
|
echo 0 >$STACK_MAX_SIZE
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
stack_check() {
|
|
|
|
local STACK_MAX_SIZE=/sys/kernel/debug/tracing/stack_max_size
|
|
|
|
local STACK_TRACE=/sys/kernel/debug/tracing/stack_trace
|
|
|
|
local STACK_LIMIT=7000
|
|
|
|
|
|
|
|
if [ -e $STACK_MAX_SIZE ]; then
|
|
|
|
STACK_SIZE=`cat $STACK_MAX_SIZE`
|
|
|
|
|
|
|
|
if [ $STACK_SIZE -ge $STACK_LIMIT ]; then
|
|
|
|
echo
|
|
|
|
echo "Warning: max stack size $STACK_SIZE bytes"
|
|
|
|
cat $STACK_TRACE
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
}
|
2014-01-21 21:30:03 +00:00
|
|
|
|
|
|
|
kill_zed() {
|
|
|
|
if [ -f $ZED_PIDFILE ]; then
|
|
|
|
kill $(cat $ZED_PIDFILE)
|
|
|
|
fi
|
|
|
|
}
|