From 214196e9f5351dd637ecf42816ce1496cd667793 Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Wed, 7 Apr 2021 19:23:57 -0400 Subject: [PATCH] Ratelimit deadman zevents as with delay zevents Just as delay zevents can flood the zevent pipe when a vdev becomes unresponsive, so do the deadman zevents. Ratelimit deadman zevents according to the same tunable as for delay zevents. Enable deadman tests on FreeBSD and add a test for deadman event ratelimiting. Reviewed-by: Brian Behlendorf Reviewed-by: Don Brady Signed-off-by: Ryan Moeller Closes #11786 --- include/sys/vdev_impl.h | 3 +- man/man5/zfs-module-parameters.5 | 3 +- module/zfs/vdev.c | 3 + module/zfs/zfs_fm.c | 8 +- tests/runfiles/common.run | 6 ++ tests/runfiles/linux.run | 6 -- .../tests/functional/deadman/Makefile.am | 1 + .../functional/deadman/deadman_ratelimit.ksh | 78 +++++++++++++++++++ .../tests/functional/deadman/deadman_sync.ksh | 6 +- 9 files changed, 102 insertions(+), 12 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/deadman/deadman_ratelimit.ksh diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 48c1e7838f..6c76293a94 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -441,10 +441,11 @@ struct vdev { kmutex_t vdev_probe_lock; /* protects vdev_probe_zio */ /* - * We rate limit ZIO delay and ZIO checksum events, since they + * We rate limit ZIO delay, deadman, and checksum events, since they * can flood ZED with tons of events when a drive is acting up. */ zfs_ratelimit_t vdev_delay_rl; + zfs_ratelimit_t vdev_deadman_rl; zfs_ratelimit_t vdev_checksum_rl; }; diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 3838bd03a8..0a14e62f38 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -1620,7 +1620,8 @@ Default value: \fB64\fR. \fBzfs_slow_io_events_per_second\fR (int) .ad .RS 12n -Rate limit delay zevents (which report slow I/Os) to this many per second. +Rate limit delay and deadman zevents (which report slow I/Os) to this many per +second. .sp Default value: 20 .RE diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 95fe9ec633..f7d582ffde 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -543,6 +543,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) */ zfs_ratelimit_init(&vd->vdev_delay_rl, &zfs_slow_io_events_per_second, 1); + zfs_ratelimit_init(&vd->vdev_deadman_rl, &zfs_slow_io_events_per_second, + 1); zfs_ratelimit_init(&vd->vdev_checksum_rl, &zfs_checksum_events_per_second, 1); @@ -1033,6 +1035,7 @@ vdev_free(vdev_t *vd) cv_destroy(&vd->vdev_rebuild_io_cv); zfs_ratelimit_fini(&vd->vdev_delay_rl); + zfs_ratelimit_fini(&vd->vdev_deadman_rl); zfs_ratelimit_fini(&vd->vdev_checksum_rl); if (vd == spa->spa_root_vdev) diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index a8341f50ba..9e93bee338 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -357,8 +357,8 @@ zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector) } /* - * We want to rate limit ZIO delay and checksum events so as to not - * flood ZED when a disk is acting up. + * We want to rate limit ZIO delay, deadman, and checksum events so as to not + * flood zevent consumers when a disk is acting up. * * Returns 1 if we're ratelimiting, 0 if not. */ @@ -367,11 +367,13 @@ zfs_is_ratelimiting_event(const char *subclass, vdev_t *vd) { int rc = 0; /* - * __ratelimit() returns 1 if we're *not* ratelimiting and 0 if we + * zfs_ratelimit() returns 1 if we're *not* ratelimiting and 0 if we * are. Invert it to get our return value. */ if (strcmp(subclass, FM_EREPORT_ZFS_DELAY) == 0) { rc = !zfs_ratelimit(&vd->vdev_delay_rl); + } else if (strcmp(subclass, FM_EREPORT_ZFS_DEADMAN) == 0) { + rc = !zfs_ratelimit(&vd->vdev_deadman_rl); } else if (strcmp(subclass, FM_EREPORT_ZFS_CHECKSUM) == 0) { rc = !zfs_ratelimit(&vd->vdev_checksum_rl); } diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 7316bb55a2..22688fdc1e 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -571,6 +571,12 @@ tags = ['functional', 'cp_files'] tests = ['ctime_001_pos' ] tags = ['functional', 'ctime'] +[tests/functional/deadman] +tests = ['deadman_ratelimit', 'deadman_sync', 'deadman_zio'] +pre = +post = +tags = ['functional', 'deadman'] + [tests/functional/delegate] tests = ['zfs_allow_001_pos', 'zfs_allow_002_pos', 'zfs_allow_003_pos', 'zfs_allow_004_pos', 'zfs_allow_005_pos', 'zfs_allow_006_pos', diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index d8312afd38..3f4d2eb5ce 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -85,12 +85,6 @@ tags = ['functional', 'cli_root', 'zpool_split'] tests = ['compress_004_pos'] tags = ['functional', 'compression'] -[tests/functional/deadman:Linux] -tests = ['deadman_sync', 'deadman_zio'] -pre = -post = -tags = ['functional', 'deadman'] - [tests/functional/devices:Linux] tests = ['devices_001_pos', 'devices_002_neg', 'devices_003_pos'] tags = ['functional', 'devices'] diff --git a/tests/zfs-tests/tests/functional/deadman/Makefile.am b/tests/zfs-tests/tests/functional/deadman/Makefile.am index 7b70ca09df..097f23e884 100644 --- a/tests/zfs-tests/tests/functional/deadman/Makefile.am +++ b/tests/zfs-tests/tests/functional/deadman/Makefile.am @@ -1,5 +1,6 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/deadman dist_pkgdata_SCRIPTS = \ + deadman_ratelimit.ksh \ deadman_sync.ksh \ deadman_zio.ksh diff --git a/tests/zfs-tests/tests/functional/deadman/deadman_ratelimit.ksh b/tests/zfs-tests/tests/functional/deadman/deadman_ratelimit.ksh new file mode 100755 index 0000000000..469117a56c --- /dev/null +++ b/tests/zfs-tests/tests/functional/deadman/deadman_ratelimit.ksh @@ -0,0 +1,78 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Portions Copyright 2021 iXsystems, Inc. +# + +# DESCRIPTION: +# Verify spa deadman events are rate limited +# +# STRATEGY: +# 1. Reduce the zfs_slow_io_events_per_second to 1. +# 2. Reduce the zfs_deadman_ziotime_ms to 1ms. +# 3. Write data to a pool and read it back. +# 4. Verify deadman events have been produced at a reasonable rate. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/deadman/deadman.cfg + +verify_runnable "both" + +function cleanup +{ + zinject -c all + default_cleanup_noexit + + set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS + set_tunable64 DEADMAN_ZIOTIME_MS $ZIOTIME_DEFAULT +} + +log_assert "Verify spa deadman events are rate limited" +log_onexit cleanup + +OLD_SLOW_IO_EVENTS=$(get_tunable SLOW_IO_EVENTS_PER_SECOND) +log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND 1 +log_must set_tunable64 DEADMAN_ZIOTIME_MS 1 + +# Create a new pool in order to use the updated deadman settings. +default_setup_noexit $DISK1 +log_must zpool events -c + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +log_must file_write -b 1048576 -c 8 -o create -d 0 -f $mntpnt/file +log_must zpool export $TESTPOOL +log_must zpool import $TESTPOOL +log_must zinject -d $DISK1 -D 5:1 $TESTPOOL +log_must dd if=$mntpnt/file of=$TEST_BASE_DIR/devnull oflag=sync + +events=$(zpool events $TESTPOOL | grep -c ereport.fs.zfs.deadman) +log_note "events=$events" +if [ "$events" -lt 1 ]; then + log_fail "Expect >= 1 deadman events, $events found" +fi +if [ "$events" -gt 10 ]; then + log_fail "Expect <= 10 deadman events, $events found" +fi + +log_pass "Verify spa deadman events are rate limited" diff --git a/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh b/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh index 5d803af85b..b0b03f5d52 100755 --- a/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh +++ b/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh @@ -73,7 +73,11 @@ log_must zinject -c all log_must zpool sync # Log txg sync times for reference and the zpool event summary. -log_must cat /proc/spl/kstat/zfs/$TESTPOOL/txgs +if is_freebsd; then + log_must sysctl -n kstat.zfs.$TESTPOOL.txgs +else + log_must cat /proc/spl/kstat/zfs/$TESTPOOL/txgs +fi log_must zpool events # Verify at least 5 deadman events were logged. The first after 5 seconds,