Ratelimit deadman zevents as with delay zevents
Just as delay zevents can flood the zevent pipe when a vdev becomes unresponsive, so do the deadman zevents. Ratelimit deadman zevents according to the same tunable as for delay zevents. Enable deadman tests on FreeBSD and add a test for deadman event ratelimiting. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Don Brady <don.brady@delphix.com> Signed-off-by: Ryan Moeller <ryan@iXsystems.com> Closes #11786
This commit is contained in:
parent
96e15d29fa
commit
7822c01eb6
|
@ -458,10 +458,11 @@ struct vdev {
|
|||
kmutex_t vdev_probe_lock; /* protects vdev_probe_zio */
|
||||
|
||||
/*
|
||||
* We rate limit ZIO delay and ZIO checksum events, since they
|
||||
* We rate limit ZIO delay, deadman, and checksum events, since they
|
||||
* can flood ZED with tons of events when a drive is acting up.
|
||||
*/
|
||||
zfs_ratelimit_t vdev_delay_rl;
|
||||
zfs_ratelimit_t vdev_deadman_rl;
|
||||
zfs_ratelimit_t vdev_checksum_rl;
|
||||
};
|
||||
|
||||
|
|
|
@ -1695,7 +1695,8 @@ Default value: \fB64\fR.
|
|||
\fBzfs_slow_io_events_per_second\fR (int)
|
||||
.ad
|
||||
.RS 12n
|
||||
Rate limit delay zevents (which report slow I/Os) to this many per second.
|
||||
Rate limit delay and deadman zevents (which report slow I/Os) to this many per
|
||||
second.
|
||||
.sp
|
||||
Default value: 20
|
||||
.RE
|
||||
|
|
|
@ -625,6 +625,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
|
|||
*/
|
||||
zfs_ratelimit_init(&vd->vdev_delay_rl, &zfs_slow_io_events_per_second,
|
||||
1);
|
||||
zfs_ratelimit_init(&vd->vdev_deadman_rl, &zfs_slow_io_events_per_second,
|
||||
1);
|
||||
zfs_ratelimit_init(&vd->vdev_checksum_rl,
|
||||
&zfs_checksum_events_per_second, 1);
|
||||
|
||||
|
@ -1106,6 +1108,7 @@ vdev_free(vdev_t *vd)
|
|||
cv_destroy(&vd->vdev_rebuild_cv);
|
||||
|
||||
zfs_ratelimit_fini(&vd->vdev_delay_rl);
|
||||
zfs_ratelimit_fini(&vd->vdev_deadman_rl);
|
||||
zfs_ratelimit_fini(&vd->vdev_checksum_rl);
|
||||
|
||||
if (vd == spa->spa_root_vdev)
|
||||
|
|
|
@ -395,8 +395,8 @@ zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector)
|
|||
}
|
||||
|
||||
/*
|
||||
* We want to rate limit ZIO delay and checksum events so as to not
|
||||
* flood ZED when a disk is acting up.
|
||||
* We want to rate limit ZIO delay, deadman, and checksum events so as to not
|
||||
* flood zevent consumers when a disk is acting up.
|
||||
*
|
||||
* Returns 1 if we're ratelimiting, 0 if not.
|
||||
*/
|
||||
|
@ -405,11 +405,13 @@ zfs_is_ratelimiting_event(const char *subclass, vdev_t *vd)
|
|||
{
|
||||
int rc = 0;
|
||||
/*
|
||||
* __ratelimit() returns 1 if we're *not* ratelimiting and 0 if we
|
||||
* zfs_ratelimit() returns 1 if we're *not* ratelimiting and 0 if we
|
||||
* are. Invert it to get our return value.
|
||||
*/
|
||||
if (strcmp(subclass, FM_EREPORT_ZFS_DELAY) == 0) {
|
||||
rc = !zfs_ratelimit(&vd->vdev_delay_rl);
|
||||
} else if (strcmp(subclass, FM_EREPORT_ZFS_DEADMAN) == 0) {
|
||||
rc = !zfs_ratelimit(&vd->vdev_deadman_rl);
|
||||
} else if (strcmp(subclass, FM_EREPORT_ZFS_CHECKSUM) == 0) {
|
||||
rc = !zfs_ratelimit(&vd->vdev_checksum_rl);
|
||||
}
|
||||
|
|
|
@ -577,6 +577,12 @@ tags = ['functional', 'cp_files']
|
|||
tests = ['ctime_001_pos' ]
|
||||
tags = ['functional', 'ctime']
|
||||
|
||||
[tests/functional/deadman]
|
||||
tests = ['deadman_ratelimit', 'deadman_sync', 'deadman_zio']
|
||||
pre =
|
||||
post =
|
||||
tags = ['functional', 'deadman']
|
||||
|
||||
[tests/functional/delegate]
|
||||
tests = ['zfs_allow_001_pos', 'zfs_allow_002_pos', 'zfs_allow_003_pos',
|
||||
'zfs_allow_004_pos', 'zfs_allow_005_pos', 'zfs_allow_006_pos',
|
||||
|
|
|
@ -85,12 +85,6 @@ tags = ['functional', 'cli_root', 'zpool_split']
|
|||
tests = ['compress_004_pos']
|
||||
tags = ['functional', 'compression']
|
||||
|
||||
[tests/functional/deadman:Linux]
|
||||
tests = ['deadman_sync', 'deadman_zio']
|
||||
pre =
|
||||
post =
|
||||
tags = ['functional', 'deadman']
|
||||
|
||||
[tests/functional/devices:Linux]
|
||||
tests = ['devices_001_pos', 'devices_002_neg', 'devices_003_pos']
|
||||
tags = ['functional', 'devices']
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/deadman
|
||||
dist_pkgdata_SCRIPTS = \
|
||||
deadman_ratelimit.ksh \
|
||||
deadman_sync.ksh \
|
||||
deadman_zio.ksh
|
||||
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Portions Copyright 2021 iXsystems, Inc.
|
||||
#
|
||||
|
||||
# DESCRIPTION:
|
||||
# Verify spa deadman events are rate limited
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Reduce the zfs_slow_io_events_per_second to 1.
|
||||
# 2. Reduce the zfs_deadman_ziotime_ms to 1ms.
|
||||
# 3. Write data to a pool and read it back.
|
||||
# 4. Verify deadman events have been produced at a reasonable rate.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/deadman/deadman.cfg
|
||||
|
||||
verify_runnable "both"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
zinject -c all
|
||||
default_cleanup_noexit
|
||||
|
||||
set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS
|
||||
set_tunable64 DEADMAN_ZIOTIME_MS $ZIOTIME_DEFAULT
|
||||
}
|
||||
|
||||
log_assert "Verify spa deadman events are rate limited"
|
||||
log_onexit cleanup
|
||||
|
||||
OLD_SLOW_IO_EVENTS=$(get_tunable SLOW_IO_EVENTS_PER_SECOND)
|
||||
log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND 1
|
||||
log_must set_tunable64 DEADMAN_ZIOTIME_MS 1
|
||||
|
||||
# Create a new pool in order to use the updated deadman settings.
|
||||
default_setup_noexit $DISK1
|
||||
log_must zpool events -c
|
||||
|
||||
mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
|
||||
log_must file_write -b 1048576 -c 8 -o create -d 0 -f $mntpnt/file
|
||||
log_must zpool export $TESTPOOL
|
||||
log_must zpool import $TESTPOOL
|
||||
log_must zinject -d $DISK1 -D 5:1 $TESTPOOL
|
||||
log_must dd if=$mntpnt/file of=$TEST_BASE_DIR/devnull oflag=sync
|
||||
|
||||
events=$(zpool events $TESTPOOL | grep -c ereport.fs.zfs.deadman)
|
||||
log_note "events=$events"
|
||||
if [ "$events" -lt 1 ]; then
|
||||
log_fail "Expect >= 1 deadman events, $events found"
|
||||
fi
|
||||
if [ "$events" -gt 10 ]; then
|
||||
log_fail "Expect <= 10 deadman events, $events found"
|
||||
fi
|
||||
|
||||
log_pass "Verify spa deadman events are rate limited"
|
|
@ -73,7 +73,11 @@ log_must zinject -c all
|
|||
log_must zpool sync
|
||||
|
||||
# Log txg sync times for reference and the zpool event summary.
|
||||
log_must cat /proc/spl/kstat/zfs/$TESTPOOL/txgs
|
||||
if is_freebsd; then
|
||||
log_must sysctl -n kstat.zfs.$TESTPOOL.txgs
|
||||
else
|
||||
log_must cat /proc/spl/kstat/zfs/$TESTPOOL/txgs
|
||||
fi
|
||||
log_must zpool events
|
||||
|
||||
# Verify at least 5 deadman events were logged. The first after 5 seconds,
|
||||
|
|
Loading…
Reference in New Issue