Change checksum & IO delay ratelimit values

Change checksum & IO delay ratelimit thresholds from 5/sec to 20/sec.
This allows zed to actually trigger if a bunch of these events arrive in
a short period of time (zed has a threshold of 10 events in 10 sec).
Previously, if you had, say, 100 checksum errors in 1 sec, it would get
ratelimited to 5/sec which wouldn't trigger zed to fault the drive.

Also, convert the checksum and IO delay thresholds to module params for
easy testing.

Reviewed-by: loli10K <ezomori.nozomu@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Closes #7252
This commit is contained in:
Tony Hutter 2018-03-04 17:34:51 -08:00 committed by Brian Behlendorf
parent 5666a994f2
commit 80d52c3919
6 changed files with 56 additions and 15 deletions

View File

@ -262,8 +262,6 @@ struct vdev {
* We rate limit ZIO delay and ZIO checksum events, since they * We rate limit ZIO delay and ZIO checksum events, since they
* can flood ZED with tons of events when a drive is acting up. * can flood ZED with tons of events when a drive is acting up.
*/ */
#define DELAYS_PER_SECOND 5
#define CHECKSUMS_PER_SECOND 5
zfs_ratelimit_t vdev_delay_rl; zfs_ratelimit_t vdev_delay_rl;
zfs_ratelimit_t vdev_checksum_rl; zfs_ratelimit_t vdev_checksum_rl;
}; };

View File

@ -25,13 +25,19 @@
typedef struct { typedef struct {
hrtime_t start; hrtime_t start;
unsigned int count; unsigned int count;
unsigned int burst; /* Number to allow per interval */
unsigned int interval; /* Interval length in seconds */ /*
* Pointer to number of events per interval. We do this to
* allow the burst to be a (changeable) module parameter.
*/
unsigned int *burst;
unsigned int interval; /* Interval length in seconds */
kmutex_t lock; kmutex_t lock;
} zfs_ratelimit_t; } zfs_ratelimit_t;
int zfs_ratelimit(zfs_ratelimit_t *rl); int zfs_ratelimit(zfs_ratelimit_t *rl);
void zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int burst, void zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int *burst,
unsigned int interval); unsigned int interval);
void zfs_ratelimit_fini(zfs_ratelimit_t *rl); void zfs_ratelimit_fini(zfs_ratelimit_t *rl);

View File

@ -753,6 +753,19 @@ Disable pool import at module load by ignoring the cache file (typically \fB/etc
Use \fB1\fR for yes (default) and \fB0\fR for no. Use \fB1\fR for yes (default) and \fB0\fR for no.
.RE .RE
.sp
.ne 2
.na
\fBzfs_checksums_per_second\fR (int)
.ad
.RS 12n
Rate limit checksum events to this many per second. Note that this should
not be set below the zed thresholds (currently 10 checksums over 10 sec)
or else zed may not trigger any action.
.sp
Default value: 20
.RE
.sp .sp
.ne 2 .ne 2
.na .na
@ -929,6 +942,17 @@ Note: \fBzfs_delay_scale\fR * \fBzfs_dirty_data_max\fR must be < 2^64.
Default value: \fB500,000\fR. Default value: \fB500,000\fR.
.RE .RE
.sp
.ne 2
.na
\fBzfs_delays_per_second\fR (int)
.ad
.RS 12n
Rate limit IO delay events to this many per second.
.sp
Default value: 20
.RE
.sp .sp
.ne 2 .ne 2
.na .na

View File

@ -56,6 +56,16 @@
*/ */
int metaslabs_per_vdev = 200; int metaslabs_per_vdev = 200;
/*
* Rate limit delay events to this many IO delays per second.
*/
unsigned int zfs_delays_per_second = 20;
/*
* Rate limit checksum events after this many checksum errors per second.
*/
unsigned int zfs_checksums_per_second = 20;
/* /*
* Virtual device management. * Virtual device management.
*/ */
@ -351,8 +361,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
* and checksum events so that we don't overwhelm ZED with thousands * and checksum events so that we don't overwhelm ZED with thousands
* of events when a disk is acting up. * of events when a disk is acting up.
*/ */
zfs_ratelimit_init(&vd->vdev_delay_rl, DELAYS_PER_SECOND, 1); zfs_ratelimit_init(&vd->vdev_delay_rl, &zfs_delays_per_second, 1);
zfs_ratelimit_init(&vd->vdev_checksum_rl, CHECKSUMS_PER_SECOND, 1); zfs_ratelimit_init(&vd->vdev_checksum_rl, &zfs_checksums_per_second, 1);
list_link_init(&vd->vdev_config_dirty_node); list_link_init(&vd->vdev_config_dirty_node);
list_link_init(&vd->vdev_state_dirty_node); list_link_init(&vd->vdev_state_dirty_node);
@ -3752,5 +3762,14 @@ module_param(metaslabs_per_vdev, int, 0644);
MODULE_PARM_DESC(metaslabs_per_vdev, MODULE_PARM_DESC(metaslabs_per_vdev,
"Divide added vdev into approximately (but no more than) this number " "Divide added vdev into approximately (but no more than) this number "
"of metaslabs"); "of metaslabs");
module_param(zfs_delays_per_second, uint, 0644);
MODULE_PARM_DESC(zfs_delays_per_second, "Rate limit delay events to this many "
"IO delays per second");
module_param(zfs_checksums_per_second, uint, 0644);
MODULE_PARM_DESC(zfs_checksums_per_second, "Rate limit checksum events "
"to this many checksum errors per second (do not set below zed"
"threshold).");
/* END CSTYLED */ /* END CSTYLED */
#endif #endif

View File

@ -33,7 +33,7 @@
* interval: Interval time in seconds * interval: Interval time in seconds
*/ */
void void
zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int burst, zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int *burst,
unsigned int interval) unsigned int interval)
{ {
rl->count = 0; rl->count = 0;
@ -89,7 +89,7 @@ zfs_ratelimit(zfs_ratelimit_t *rl)
rl->start = now; rl->start = now;
rl->count = 0; rl->count = 0;
} else { } else {
if (rl->count >= rl->burst) { if (rl->count >= *rl->burst) {
error = 0; /* We're ratelimiting */ error = 0; /* We're ratelimiting */
} }
} }

View File

@ -68,14 +68,8 @@ for type in "mirror" "raidz" "raidz2"; do
log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=16 log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=16
# 4. Inject CHECKSUM ERRORS on read with a zinject error handler # 4. Inject CHECKSUM ERRORS on read with a zinject error handler
# NOTE: checksum events are ratelimited to max 5 per second, ZED needs
# 10 to kick in a spare
log_must zinject -d $FAULT_FILE -e corrupt -f 50 -T read $TESTPOOL log_must zinject -d $FAULT_FILE -e corrupt -f 50 -T read $TESTPOOL
log_must cp $TESTFILE /dev/null log_must cp $TESTFILE /dev/null
log_must sleep 1
log_must cp $TESTFILE /dev/null
log_must sleep 1
log_must cp $TESTFILE /dev/null
# 5. Verify the ZED kicks in a hot spare and expected pool/device status # 5. Verify the ZED kicks in a hot spare and expected pool/device status
log_note "Wait for ZED to auto-spare" log_note "Wait for ZED to auto-spare"