Ratelimit deadman zevents as with delay zevents
Just as delay zevents can flood the zevent pipe when a vdev becomes unresponsive, so do the deadman zevents. Ratelimit deadman zevents according to the same tunable as for delay zevents. Enable deadman tests on FreeBSD and add a test for deadman event ratelimiting. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Don Brady <don.brady@delphix.com> Signed-off-by: Ryan Moeller <ryan@iXsystems.com> Closes #11786
This commit is contained in:
parent
932d471bec
commit
214196e9f5
|
@ -441,10 +441,11 @@ struct vdev {
|
||||||
kmutex_t vdev_probe_lock; /* protects vdev_probe_zio */
|
kmutex_t vdev_probe_lock; /* protects vdev_probe_zio */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We rate limit ZIO delay and ZIO checksum events, since they
|
* We rate limit ZIO delay, deadman, and checksum events, since they
|
||||||
* can flood ZED with tons of events when a drive is acting up.
|
* can flood ZED with tons of events when a drive is acting up.
|
||||||
*/
|
*/
|
||||||
zfs_ratelimit_t vdev_delay_rl;
|
zfs_ratelimit_t vdev_delay_rl;
|
||||||
|
zfs_ratelimit_t vdev_deadman_rl;
|
||||||
zfs_ratelimit_t vdev_checksum_rl;
|
zfs_ratelimit_t vdev_checksum_rl;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -1620,7 +1620,8 @@ Default value: \fB64\fR.
|
||||||
\fBzfs_slow_io_events_per_second\fR (int)
|
\fBzfs_slow_io_events_per_second\fR (int)
|
||||||
.ad
|
.ad
|
||||||
.RS 12n
|
.RS 12n
|
||||||
Rate limit delay zevents (which report slow I/Os) to this many per second.
|
Rate limit delay and deadman zevents (which report slow I/Os) to this many per
|
||||||
|
second.
|
||||||
.sp
|
.sp
|
||||||
Default value: 20
|
Default value: 20
|
||||||
.RE
|
.RE
|
||||||
|
|
|
@ -543,6 +543,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
|
||||||
*/
|
*/
|
||||||
zfs_ratelimit_init(&vd->vdev_delay_rl, &zfs_slow_io_events_per_second,
|
zfs_ratelimit_init(&vd->vdev_delay_rl, &zfs_slow_io_events_per_second,
|
||||||
1);
|
1);
|
||||||
|
zfs_ratelimit_init(&vd->vdev_deadman_rl, &zfs_slow_io_events_per_second,
|
||||||
|
1);
|
||||||
zfs_ratelimit_init(&vd->vdev_checksum_rl,
|
zfs_ratelimit_init(&vd->vdev_checksum_rl,
|
||||||
&zfs_checksum_events_per_second, 1);
|
&zfs_checksum_events_per_second, 1);
|
||||||
|
|
||||||
|
@ -1033,6 +1035,7 @@ vdev_free(vdev_t *vd)
|
||||||
cv_destroy(&vd->vdev_rebuild_io_cv);
|
cv_destroy(&vd->vdev_rebuild_io_cv);
|
||||||
|
|
||||||
zfs_ratelimit_fini(&vd->vdev_delay_rl);
|
zfs_ratelimit_fini(&vd->vdev_delay_rl);
|
||||||
|
zfs_ratelimit_fini(&vd->vdev_deadman_rl);
|
||||||
zfs_ratelimit_fini(&vd->vdev_checksum_rl);
|
zfs_ratelimit_fini(&vd->vdev_checksum_rl);
|
||||||
|
|
||||||
if (vd == spa->spa_root_vdev)
|
if (vd == spa->spa_root_vdev)
|
||||||
|
|
|
@ -357,8 +357,8 @@ zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We want to rate limit ZIO delay and checksum events so as to not
|
* We want to rate limit ZIO delay, deadman, and checksum events so as to not
|
||||||
* flood ZED when a disk is acting up.
|
* flood zevent consumers when a disk is acting up.
|
||||||
*
|
*
|
||||||
* Returns 1 if we're ratelimiting, 0 if not.
|
* Returns 1 if we're ratelimiting, 0 if not.
|
||||||
*/
|
*/
|
||||||
|
@ -367,11 +367,13 @@ zfs_is_ratelimiting_event(const char *subclass, vdev_t *vd)
|
||||||
{
|
{
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
/*
|
/*
|
||||||
* __ratelimit() returns 1 if we're *not* ratelimiting and 0 if we
|
* zfs_ratelimit() returns 1 if we're *not* ratelimiting and 0 if we
|
||||||
* are. Invert it to get our return value.
|
* are. Invert it to get our return value.
|
||||||
*/
|
*/
|
||||||
if (strcmp(subclass, FM_EREPORT_ZFS_DELAY) == 0) {
|
if (strcmp(subclass, FM_EREPORT_ZFS_DELAY) == 0) {
|
||||||
rc = !zfs_ratelimit(&vd->vdev_delay_rl);
|
rc = !zfs_ratelimit(&vd->vdev_delay_rl);
|
||||||
|
} else if (strcmp(subclass, FM_EREPORT_ZFS_DEADMAN) == 0) {
|
||||||
|
rc = !zfs_ratelimit(&vd->vdev_deadman_rl);
|
||||||
} else if (strcmp(subclass, FM_EREPORT_ZFS_CHECKSUM) == 0) {
|
} else if (strcmp(subclass, FM_EREPORT_ZFS_CHECKSUM) == 0) {
|
||||||
rc = !zfs_ratelimit(&vd->vdev_checksum_rl);
|
rc = !zfs_ratelimit(&vd->vdev_checksum_rl);
|
||||||
}
|
}
|
||||||
|
|
|
@ -571,6 +571,12 @@ tags = ['functional', 'cp_files']
|
||||||
tests = ['ctime_001_pos' ]
|
tests = ['ctime_001_pos' ]
|
||||||
tags = ['functional', 'ctime']
|
tags = ['functional', 'ctime']
|
||||||
|
|
||||||
|
[tests/functional/deadman]
|
||||||
|
tests = ['deadman_ratelimit', 'deadman_sync', 'deadman_zio']
|
||||||
|
pre =
|
||||||
|
post =
|
||||||
|
tags = ['functional', 'deadman']
|
||||||
|
|
||||||
[tests/functional/delegate]
|
[tests/functional/delegate]
|
||||||
tests = ['zfs_allow_001_pos', 'zfs_allow_002_pos', 'zfs_allow_003_pos',
|
tests = ['zfs_allow_001_pos', 'zfs_allow_002_pos', 'zfs_allow_003_pos',
|
||||||
'zfs_allow_004_pos', 'zfs_allow_005_pos', 'zfs_allow_006_pos',
|
'zfs_allow_004_pos', 'zfs_allow_005_pos', 'zfs_allow_006_pos',
|
||||||
|
|
|
@ -85,12 +85,6 @@ tags = ['functional', 'cli_root', 'zpool_split']
|
||||||
tests = ['compress_004_pos']
|
tests = ['compress_004_pos']
|
||||||
tags = ['functional', 'compression']
|
tags = ['functional', 'compression']
|
||||||
|
|
||||||
[tests/functional/deadman:Linux]
|
|
||||||
tests = ['deadman_sync', 'deadman_zio']
|
|
||||||
pre =
|
|
||||||
post =
|
|
||||||
tags = ['functional', 'deadman']
|
|
||||||
|
|
||||||
[tests/functional/devices:Linux]
|
[tests/functional/devices:Linux]
|
||||||
tests = ['devices_001_pos', 'devices_002_neg', 'devices_003_pos']
|
tests = ['devices_001_pos', 'devices_002_neg', 'devices_003_pos']
|
||||||
tags = ['functional', 'devices']
|
tags = ['functional', 'devices']
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/deadman
|
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/deadman
|
||||||
dist_pkgdata_SCRIPTS = \
|
dist_pkgdata_SCRIPTS = \
|
||||||
|
deadman_ratelimit.ksh \
|
||||||
deadman_sync.ksh \
|
deadman_sync.ksh \
|
||||||
deadman_zio.ksh
|
deadman_zio.ksh
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,78 @@
|
||||||
|
#!/bin/ksh -p
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the terms of the
|
||||||
|
# Common Development and Distribution License (the "License").
|
||||||
|
# You may not use this file except in compliance with the License.
|
||||||
|
#
|
||||||
|
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
# or http://www.opensolaris.org/os/licensing.
|
||||||
|
# See the License for the specific language governing permissions
|
||||||
|
# and limitations under the License.
|
||||||
|
#
|
||||||
|
# When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
# If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Portions Copyright 2021 iXsystems, Inc.
|
||||||
|
#
|
||||||
|
|
||||||
|
# DESCRIPTION:
|
||||||
|
# Verify spa deadman events are rate limited
|
||||||
|
#
|
||||||
|
# STRATEGY:
|
||||||
|
# 1. Reduce the zfs_slow_io_events_per_second to 1.
|
||||||
|
# 2. Reduce the zfs_deadman_ziotime_ms to 1ms.
|
||||||
|
# 3. Write data to a pool and read it back.
|
||||||
|
# 4. Verify deadman events have been produced at a reasonable rate.
|
||||||
|
#
|
||||||
|
|
||||||
|
. $STF_SUITE/include/libtest.shlib
|
||||||
|
. $STF_SUITE/tests/functional/deadman/deadman.cfg
|
||||||
|
|
||||||
|
verify_runnable "both"
|
||||||
|
|
||||||
|
function cleanup
|
||||||
|
{
|
||||||
|
zinject -c all
|
||||||
|
default_cleanup_noexit
|
||||||
|
|
||||||
|
set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS
|
||||||
|
set_tunable64 DEADMAN_ZIOTIME_MS $ZIOTIME_DEFAULT
|
||||||
|
}
|
||||||
|
|
||||||
|
log_assert "Verify spa deadman events are rate limited"
|
||||||
|
log_onexit cleanup
|
||||||
|
|
||||||
|
OLD_SLOW_IO_EVENTS=$(get_tunable SLOW_IO_EVENTS_PER_SECOND)
|
||||||
|
log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND 1
|
||||||
|
log_must set_tunable64 DEADMAN_ZIOTIME_MS 1
|
||||||
|
|
||||||
|
# Create a new pool in order to use the updated deadman settings.
|
||||||
|
default_setup_noexit $DISK1
|
||||||
|
log_must zpool events -c
|
||||||
|
|
||||||
|
mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
|
||||||
|
log_must file_write -b 1048576 -c 8 -o create -d 0 -f $mntpnt/file
|
||||||
|
log_must zpool export $TESTPOOL
|
||||||
|
log_must zpool import $TESTPOOL
|
||||||
|
log_must zinject -d $DISK1 -D 5:1 $TESTPOOL
|
||||||
|
log_must dd if=$mntpnt/file of=$TEST_BASE_DIR/devnull oflag=sync
|
||||||
|
|
||||||
|
events=$(zpool events $TESTPOOL | grep -c ereport.fs.zfs.deadman)
|
||||||
|
log_note "events=$events"
|
||||||
|
if [ "$events" -lt 1 ]; then
|
||||||
|
log_fail "Expect >= 1 deadman events, $events found"
|
||||||
|
fi
|
||||||
|
if [ "$events" -gt 10 ]; then
|
||||||
|
log_fail "Expect <= 10 deadman events, $events found"
|
||||||
|
fi
|
||||||
|
|
||||||
|
log_pass "Verify spa deadman events are rate limited"
|
|
@ -73,7 +73,11 @@ log_must zinject -c all
|
||||||
log_must zpool sync
|
log_must zpool sync
|
||||||
|
|
||||||
# Log txg sync times for reference and the zpool event summary.
|
# Log txg sync times for reference and the zpool event summary.
|
||||||
log_must cat /proc/spl/kstat/zfs/$TESTPOOL/txgs
|
if is_freebsd; then
|
||||||
|
log_must sysctl -n kstat.zfs.$TESTPOOL.txgs
|
||||||
|
else
|
||||||
|
log_must cat /proc/spl/kstat/zfs/$TESTPOOL/txgs
|
||||||
|
fi
|
||||||
log_must zpool events
|
log_must zpool events
|
||||||
|
|
||||||
# Verify at least 5 deadman events were logged. The first after 5 seconds,
|
# Verify at least 5 deadman events were logged. The first after 5 seconds,
|
||||||
|
|
Loading…
Reference in New Issue