ZTS: Fixes for spurious failures of resilver_restart_001 test

The resilver restart test was reported as failing about 2% of the
time. Two issues were found:

- The event log wasn't large enough, so resilver events were missing
- One 'zpool sync' wasn't enough for resilver to start after zinject

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: Kjeld Schouten <kjeld@schouten-lebbing.nl>
Signed-off-by: John Poduska <jpoduska@datto.com>
Issue #9588
Closes #9677
Closes #9703
This commit is contained in:
John Poduska 2019-12-10 12:10:36 -05:00 committed by Tony Hutter
parent 1be3cba381
commit 504aae708e
1 changed files with 18 additions and 12 deletions

View File

@ -45,14 +45,16 @@
function cleanup function cleanup
{ {
echo $ORIG_RESILVER_MIN_TIME > $ZFS_PARAMS/zfs_resilver_min_time_ms log_must set_tunable32 zfs_resilver_min_time_ms $ORIG_RESILVER_MIN_TIME
echo $ORIG_SCAN_SUSPEND_PROGRESS > $ZFS_PARAMS/zfs_scan_suspend_progress log_must set_tunable32 zfs_scan_suspend_progress \
$ORIG_SCAN_SUSPEND_PROGRESS
log_must set_tunable32 zfs_zevent_len_max $ORIG_ZFS_ZEVENT_LEN_MAX
log_must zinject -c all log_must zinject -c all
destroy_pool $TESTPOOL destroy_pool $TESTPOOL
rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
} }
# Count resilver events in zpool and number of deferred rsilvers on vdevs # count resilver events in zpool and number of deferred rsilvers on vdevs
function verify_restarts # <msg> <cnt> <defer> function verify_restarts # <msg> <cnt> <defer>
{ {
msg=$1 msg=$1
@ -85,9 +87,9 @@ function verify_restarts # <msg> <cnt> <defer>
log_assert "Check for unnecessary resilver restarts" log_assert "Check for unnecessary resilver restarts"
ZFS_PARAMS=/sys/module/zfs/parameters ORIG_RESILVER_MIN_TIME=$(get_tunable zfs_resilver_min_time_ms)
ORIG_RESILVER_MIN_TIME=$(cat $ZFS_PARAMS/zfs_resilver_min_time_ms) ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable zfs_scan_suspend_progress)
ORIG_SCAN_SUSPEND_PROGRESS=$(cat $ZFS_PARAMS/zfs_scan_suspend_progress) ORIG_ZFS_ZEVENT_LEN_MAX=$(get_tunable zfs_zevent_len_max)
set -A RESTARTS -- '1' '2' '2' '2' set -A RESTARTS -- '1' '2' '2' '2'
set -A VDEVS -- '' '' '' '' set -A VDEVS -- '' '' '' ''
@ -98,12 +100,15 @@ VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE"
log_onexit cleanup log_onexit cleanup
# ensure that enough events will be saved
log_must set_tunable32 zfs_zevent_len_max 512
log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE
log_must zpool create -f -o feature@resilver_defer=disabled $TESTPOOL \ log_must zpool create -f -o feature@resilver_defer=disabled $TESTPOOL \
raidz ${VDEV_FILES[@]} raidz ${VDEV_FILES[@]}
# Create 4 filesystems # create 4 filesystems
for fs in fs{0..3} for fs in fs{0..3}
do do
log_must zfs create -o primarycache=none -o recordsize=1k $TESTPOOL/$fs log_must zfs create -o primarycache=none -o recordsize=1k $TESTPOOL/$fs
@ -118,7 +123,7 @@ do
done done
wait wait
# Test without and with deferred resilve feature enabled # test without and with deferred resilve feature enabled
for test in "without" "with" for test in "without" "with"
do do
log_note "Testing $test deferred resilvers" log_note "Testing $test deferred resilvers"
@ -135,11 +140,11 @@ do
log_must zpool events -c log_must zpool events -c
# limit scanning time # limit scanning time
echo 50 > $ZFS_PARAMS/zfs_resilver_min_time_ms log_must set_tunable32 zfs_resilver_min_time_ms 50
# initiate a resilver and suspend the scan as soon as possible # initiate a resilver and suspend the scan as soon as possible
log_must zpool replace $TESTPOOL $VDEV_REPLACE log_must zpool replace $TESTPOOL $VDEV_REPLACE
echo 1 > $ZFS_PARAMS/zfs_scan_suspend_progress log_must set_tunable32 zfs_scan_suspend_progress 1
# there should only be 1 resilver start # there should only be 1 resilver start
verify_restarts '' "${RESTARTS[0]}" "${VDEVS[0]}" verify_restarts '' "${RESTARTS[0]}" "${VDEVS[0]}"
@ -163,8 +168,8 @@ do
verify_restarts ' after zinject' "${RESTARTS[2]}" "${VDEVS[2]}" verify_restarts ' after zinject' "${RESTARTS[2]}" "${VDEVS[2]}"
# unsuspend resilver # unsuspend resilver
echo 0 > $ZFS_PARAMS/zfs_scan_suspend_progress log_must set_tunable32 zfs_scan_suspend_progress 0
echo 3000 > $ZFS_PARAMS/zfs_resilver_min_time_ms log_must set_tunable32 zfs_resilver_min_time_ms 3000
# wait for resilver to finish # wait for resilver to finish
for iter in {0..59} for iter in {0..59}
@ -177,6 +182,7 @@ do
# wait for a few txg's to see if a resilver happens # wait for a few txg's to see if a resilver happens
log_must zpool sync $TESTPOOL log_must zpool sync $TESTPOOL
log_must zpool sync $TESTPOOL
# there should now be 2 resilver starts # there should now be 2 resilver starts
verify_restarts ' after resilver' "${RESTARTS[3]}" "${VDEVS[3]}" verify_restarts ' after resilver' "${RESTARTS[3]}" "${VDEVS[3]}"