Change checksum & IO delay ratelimit values
Change checksum & IO delay ratelimit thresholds from 5/sec to 20/sec. This allows zed to actually trigger if a bunch of these events arrive in a short period of time (zed has a threshold of 10 events in 10 sec). Previously, if you had, say, 100 checksum errors in 1 sec, it would get ratelimited to 5/sec which wouldn't trigger zed to fault the drive. Also, convert the checksum and IO delay thresholds to module params for easy testing. Reviewed-by: loli10K <ezomori.nozomu@gmail.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov> Signed-off-by: Tony Hutter <hutter2@llnl.gov> Closes #7252
This commit is contained in:
parent
5666a994f2
commit
80d52c3919
|
@ -262,8 +262,6 @@ struct vdev {
|
||||||
* We rate limit ZIO delay and ZIO checksum events, since they
|
* We rate limit ZIO delay and ZIO checksum events, since they
|
||||||
* can flood ZED with tons of events when a drive is acting up.
|
* can flood ZED with tons of events when a drive is acting up.
|
||||||
*/
|
*/
|
||||||
#define DELAYS_PER_SECOND 5
|
|
||||||
#define CHECKSUMS_PER_SECOND 5
|
|
||||||
zfs_ratelimit_t vdev_delay_rl;
|
zfs_ratelimit_t vdev_delay_rl;
|
||||||
zfs_ratelimit_t vdev_checksum_rl;
|
zfs_ratelimit_t vdev_checksum_rl;
|
||||||
};
|
};
|
||||||
|
|
|
@ -25,13 +25,19 @@
|
||||||
typedef struct {
|
typedef struct {
|
||||||
hrtime_t start;
|
hrtime_t start;
|
||||||
unsigned int count;
|
unsigned int count;
|
||||||
unsigned int burst; /* Number to allow per interval */
|
|
||||||
|
/*
|
||||||
|
* Pointer to number of events per interval. We do this to
|
||||||
|
* allow the burst to be a (changeable) module parameter.
|
||||||
|
*/
|
||||||
|
unsigned int *burst;
|
||||||
|
|
||||||
unsigned int interval; /* Interval length in seconds */
|
unsigned int interval; /* Interval length in seconds */
|
||||||
kmutex_t lock;
|
kmutex_t lock;
|
||||||
} zfs_ratelimit_t;
|
} zfs_ratelimit_t;
|
||||||
|
|
||||||
int zfs_ratelimit(zfs_ratelimit_t *rl);
|
int zfs_ratelimit(zfs_ratelimit_t *rl);
|
||||||
void zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int burst,
|
void zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int *burst,
|
||||||
unsigned int interval);
|
unsigned int interval);
|
||||||
void zfs_ratelimit_fini(zfs_ratelimit_t *rl);
|
void zfs_ratelimit_fini(zfs_ratelimit_t *rl);
|
||||||
|
|
||||||
|
|
|
@ -753,6 +753,19 @@ Disable pool import at module load by ignoring the cache file (typically \fB/etc
|
||||||
Use \fB1\fR for yes (default) and \fB0\fR for no.
|
Use \fB1\fR for yes (default) and \fB0\fR for no.
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
|
.sp
|
||||||
|
.ne 2
|
||||||
|
.na
|
||||||
|
\fBzfs_checksums_per_second\fR (int)
|
||||||
|
.ad
|
||||||
|
.RS 12n
|
||||||
|
Rate limit checksum events to this many per second. Note that this should
|
||||||
|
not be set below the zed thresholds (currently 10 checksums over 10 sec)
|
||||||
|
or else zed may not trigger any action.
|
||||||
|
.sp
|
||||||
|
Default value: 20
|
||||||
|
.RE
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
.ne 2
|
.ne 2
|
||||||
.na
|
.na
|
||||||
|
@ -929,6 +942,17 @@ Note: \fBzfs_delay_scale\fR * \fBzfs_dirty_data_max\fR must be < 2^64.
|
||||||
Default value: \fB500,000\fR.
|
Default value: \fB500,000\fR.
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
|
.sp
|
||||||
|
.ne 2
|
||||||
|
.na
|
||||||
|
\fBzfs_delays_per_second\fR (int)
|
||||||
|
.ad
|
||||||
|
.RS 12n
|
||||||
|
Rate limit IO delay events to this many per second.
|
||||||
|
.sp
|
||||||
|
Default value: 20
|
||||||
|
.RE
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
.ne 2
|
.ne 2
|
||||||
.na
|
.na
|
||||||
|
|
|
@ -56,6 +56,16 @@
|
||||||
*/
|
*/
|
||||||
int metaslabs_per_vdev = 200;
|
int metaslabs_per_vdev = 200;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Rate limit delay events to this many IO delays per second.
|
||||||
|
*/
|
||||||
|
unsigned int zfs_delays_per_second = 20;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Rate limit checksum events after this many checksum errors per second.
|
||||||
|
*/
|
||||||
|
unsigned int zfs_checksums_per_second = 20;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Virtual device management.
|
* Virtual device management.
|
||||||
*/
|
*/
|
||||||
|
@ -351,8 +361,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
|
||||||
* and checksum events so that we don't overwhelm ZED with thousands
|
* and checksum events so that we don't overwhelm ZED with thousands
|
||||||
* of events when a disk is acting up.
|
* of events when a disk is acting up.
|
||||||
*/
|
*/
|
||||||
zfs_ratelimit_init(&vd->vdev_delay_rl, DELAYS_PER_SECOND, 1);
|
zfs_ratelimit_init(&vd->vdev_delay_rl, &zfs_delays_per_second, 1);
|
||||||
zfs_ratelimit_init(&vd->vdev_checksum_rl, CHECKSUMS_PER_SECOND, 1);
|
zfs_ratelimit_init(&vd->vdev_checksum_rl, &zfs_checksums_per_second, 1);
|
||||||
|
|
||||||
list_link_init(&vd->vdev_config_dirty_node);
|
list_link_init(&vd->vdev_config_dirty_node);
|
||||||
list_link_init(&vd->vdev_state_dirty_node);
|
list_link_init(&vd->vdev_state_dirty_node);
|
||||||
|
@ -3752,5 +3762,14 @@ module_param(metaslabs_per_vdev, int, 0644);
|
||||||
MODULE_PARM_DESC(metaslabs_per_vdev,
|
MODULE_PARM_DESC(metaslabs_per_vdev,
|
||||||
"Divide added vdev into approximately (but no more than) this number "
|
"Divide added vdev into approximately (but no more than) this number "
|
||||||
"of metaslabs");
|
"of metaslabs");
|
||||||
|
|
||||||
|
module_param(zfs_delays_per_second, uint, 0644);
|
||||||
|
MODULE_PARM_DESC(zfs_delays_per_second, "Rate limit delay events to this many "
|
||||||
|
"IO delays per second");
|
||||||
|
|
||||||
|
module_param(zfs_checksums_per_second, uint, 0644);
|
||||||
|
MODULE_PARM_DESC(zfs_checksums_per_second, "Rate limit checksum events "
|
||||||
|
"to this many checksum errors per second (do not set below zed"
|
||||||
|
"threshold).");
|
||||||
/* END CSTYLED */
|
/* END CSTYLED */
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -33,7 +33,7 @@
|
||||||
* interval: Interval time in seconds
|
* interval: Interval time in seconds
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int burst,
|
zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int *burst,
|
||||||
unsigned int interval)
|
unsigned int interval)
|
||||||
{
|
{
|
||||||
rl->count = 0;
|
rl->count = 0;
|
||||||
|
@ -89,7 +89,7 @@ zfs_ratelimit(zfs_ratelimit_t *rl)
|
||||||
rl->start = now;
|
rl->start = now;
|
||||||
rl->count = 0;
|
rl->count = 0;
|
||||||
} else {
|
} else {
|
||||||
if (rl->count >= rl->burst) {
|
if (rl->count >= *rl->burst) {
|
||||||
error = 0; /* We're ratelimiting */
|
error = 0; /* We're ratelimiting */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,14 +68,8 @@ for type in "mirror" "raidz" "raidz2"; do
|
||||||
log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=16
|
log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=16
|
||||||
|
|
||||||
# 4. Inject CHECKSUM ERRORS on read with a zinject error handler
|
# 4. Inject CHECKSUM ERRORS on read with a zinject error handler
|
||||||
# NOTE: checksum events are ratelimited to max 5 per second, ZED needs
|
|
||||||
# 10 to kick in a spare
|
|
||||||
log_must zinject -d $FAULT_FILE -e corrupt -f 50 -T read $TESTPOOL
|
log_must zinject -d $FAULT_FILE -e corrupt -f 50 -T read $TESTPOOL
|
||||||
log_must cp $TESTFILE /dev/null
|
log_must cp $TESTFILE /dev/null
|
||||||
log_must sleep 1
|
|
||||||
log_must cp $TESTFILE /dev/null
|
|
||||||
log_must sleep 1
|
|
||||||
log_must cp $TESTFILE /dev/null
|
|
||||||
|
|
||||||
# 5. Verify the ZED kicks in a hot spare and expected pool/device status
|
# 5. Verify the ZED kicks in a hot spare and expected pool/device status
|
||||||
log_note "Wait for ZED to auto-spare"
|
log_note "Wait for ZED to auto-spare"
|
||||||
|
|
Loading…
Reference in New Issue