Extend zloop.sh for automated testing

In order to debug issues encountered by ztest during automated
testing it's important that as much debugging information as
possible by dumped at the time of the failure.  The following
changes extend the zloop.sh script in order to make it easier
to integrate with buildbot.

* Add the `-m <maximum cores>` option to zloop.sh to place a
  limit of the number of core dumps generated.  By default, the
  existing behavior is maintained and no limit is set.

* Add the `-l` option to create a 'ztest.core.N' symlink in the
  current directory to the core directory. This functionality
  is provided primarily for buildbot which expects log files to
  have well known names.

* Rename 'ztest.ddt' to 'ztest.zdb' and extend it to dump
  additional basic information on failure for latter analysis.

Reviewed-by: Tim Chase <tim@chase2k.com>
Reviewed by: Thomas Caputi <tcaputi@datto.com>
Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #6999

Conflicts:
	scripts/zloop.sh
This commit is contained in:
Brian Behlendorf 2018-01-22 12:48:39 -08:00 committed by Tony Hutter
parent d1630dda58
commit 137b3e6cff
1 changed files with 32 additions and 10 deletions

View File

@ -52,6 +52,8 @@ function usage
" -s Size of vdev devices.\n" \ " -s Size of vdev devices.\n" \
" -f Specify working directory for ztest vdev files.\n" \ " -f Specify working directory for ztest vdev files.\n" \
" -c Specify a core dump directory to use.\n" \ " -c Specify a core dump directory to use.\n" \
" -m Max number of core dumps to allow before exiting.\n" \
" -l Create 'ztest.core.N' symlink to core directory.\n" \
" -h Print this help message.\n" \ " -h Print this help message.\n" \
"" >&2 "" >&2
} }
@ -105,14 +107,24 @@ function store_core
coreid=$(date "+zloop-%y%m%d-%H%M%S") coreid=$(date "+zloop-%y%m%d-%H%M%S")
foundcrashes=$((foundcrashes + 1)) foundcrashes=$((foundcrashes + 1))
# zdb debugging
zdbcmd="$ZDB -U "$workdir/zpool.cache" -dddMmDDG ztest"
zdbdebug=$($zdbcmd 2>&1)
echo -e "$zdbcmd\n" >>ztest.zdb
echo "$zdbdebug" >>ztest.zdb
dest=$coredir/$coreid dest=$coredir/$coreid
or_die mkdir -p "$dest" or_die mkdir -p "$dest"
or_die mkdir -p "$dest/vdev" or_die mkdir -p "$dest/vdev"
if [[ $symlink -ne 0 ]]; then
or_die ln -sf "$dest" ztest.core.$foundcrashes
fi
echo "*** ztest crash found - moving logs to $dest" echo "*** ztest crash found - moving logs to $dest"
or_die mv ztest.history "$dest/" or_die mv ztest.history "$dest/"
or_die mv ztest.ddt "$dest/" or_die mv ztest.zdb "$dest/"
or_die mv ztest.out "$dest/" or_die mv ztest.out "$dest/"
or_die mv "$workdir/ztest*" "$dest/vdev/" or_die mv "$workdir/ztest*" "$dest/vdev/"
or_die mv "$workdir/zpool.cache" "$dest/vdev/" or_die mv "$workdir/zpool.cache" "$dest/vdev/"
@ -120,7 +132,7 @@ function store_core
# check for core # check for core
if [[ -f "$core" ]]; then if [[ -f "$core" ]]; then
coreprog=$(core_prog "$core") coreprog=$(core_prog "$core")
corestatus=$($GDB --batch --quiet \ coredebug=$($GDB --batch --quiet \
-ex "set print thread-events off" \ -ex "set print thread-events off" \
-ex "printf \"*\n* Backtrace \n*\n\"" \ -ex "printf \"*\n* Backtrace \n*\n\"" \
-ex "bt" \ -ex "bt" \
@ -132,20 +144,26 @@ function store_core
-ex "thread apply all bt" \ -ex "thread apply all bt" \
-ex "printf \"*\n* Backtraces (full) \n*\n\"" \ -ex "printf \"*\n* Backtraces (full) \n*\n\"" \
-ex "thread apply all bt full" \ -ex "thread apply all bt full" \
-ex "quit" "$coreprog" "$core" | grep -v "New LWP") -ex "quit" "$coreprog" "$core" 2>&1 | \
grep -v "New LWP")
# Dump core + logs to stored directory # Dump core + logs to stored directory
echo "$corestatus" >>"$dest/status" echo "$coredebug" >>"$dest/ztest.gdb"
or_die mv "$core" "$dest/" or_die mv "$core" "$dest/"
# Record info in cores logfile # Record info in cores logfile
echo "*** core @ $coredir/$coreid/$core:" | \ echo "*** core @ $coredir/$coreid/$core:" | \
tee -a ztest.cores tee -a ztest.cores
echo "$corestatus" | tee -a ztest.cores
echo "" | tee -a ztest.cores
fi fi
if [[ $coremax -gt 0 ]] &&
[[ $foundcrashes -ge $coremax ]]; then
echo "exiting... max $coremax allowed cores"
exit 1
else
echo "continuing..." echo "continuing..."
fi fi
fi
} }
# parse arguments # parse arguments
@ -155,12 +173,16 @@ basedir=$DEFAULTWORKDIR
rundir="zloop-run" rundir="zloop-run"
timeout=0 timeout=0
size="512m" size="512m"
while getopts ":ht:s:c:f:" opt; do coremax=0
symlink=0
while getopts ":ht:m:s:c:f:l" opt; do
case $opt in case $opt in
t ) [[ $OPTARG -gt 0 ]] && timeout=$OPTARG ;; t ) [[ $OPTARG -gt 0 ]] && timeout=$OPTARG ;;
m ) [[ $OPTARG -gt 0 ]] && coremax=$OPTARG ;;
s ) [[ $OPTARG ]] && size=$OPTARG ;; s ) [[ $OPTARG ]] && size=$OPTARG ;;
c ) [[ $OPTARG ]] && coredir=$OPTARG ;; c ) [[ $OPTARG ]] && coredir=$OPTARG ;;
f ) [[ $OPTARG ]] && basedir=$(readlink -f "$OPTARG") ;; f ) [[ $OPTARG ]] && basedir=$(readlink -f "$OPTARG") ;;
l ) symlink=1 ;;
h ) usage h ) usage
exit 2 exit 2
;; ;;
@ -178,6 +200,7 @@ ulimit -c unlimited
if [[ -f "$(core_file)" ]]; then if [[ -f "$(core_file)" ]]; then
echo -n "There's a core dump here you might want to look at first... " echo -n "There's a core dump here you might want to look at first... "
core_file core_file
echo
exit 1 exit 1
fi fi
@ -192,7 +215,7 @@ if [[ ! -w $coredir ]]; then
fi fi
or_die rm -f ztest.history or_die rm -f ztest.history
or_die rm -f ztest.ddt or_die rm -f ztest.zdb
or_die rm -f ztest.cores or_die rm -f ztest.cores
ztrc=0 # ztest return value ztrc=0 # ztest return value
@ -243,7 +266,6 @@ while [[ $timeout -eq 0 ]] || [[ $curtime -le $((starttime + timeout)) ]]; do
$cmd >>ztest.out 2>&1 $cmd >>ztest.out 2>&1
ztrc=$? ztrc=$?
grep -E '===|WARNING' ztest.out >>ztest.history grep -E '===|WARNING' ztest.out >>ztest.history
$ZDB -U "$workdir/zpool.cache" -DD ztest >>ztest.ddt
store_core store_core