Change checksum & IO delay ratelimit values

Change checksum & IO delay ratelimit thresholds from 5/sec to 20/sec.
This allows zed to actually trigger if a bunch of these events arrive in
a short period of time (zed has a threshold of 10 events in 10 sec).
Previously, if you had, say, 100 checksum errors in 1 sec, it would get
ratelimited to 5/sec which wouldn't trigger zed to fault the drive.

Also, convert the checksum and IO delay thresholds to module params for
easy testing.

Reviewed-by: loli10K <ezomori.nozomu@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Closes #7252
This commit is contained in:
Tony Hutter 2018-03-04 17:34:51 -08:00
parent 792f88131c
commit 6dc40e2ada
5 changed files with 71 additions and 9 deletions

View File

@ -255,8 +255,6 @@ struct vdev {
* We rate limit ZIO delay and ZIO checksum events, since they * We rate limit ZIO delay and ZIO checksum events, since they
* can flood ZED with tons of events when a drive is acting up. * can flood ZED with tons of events when a drive is acting up.
*/ */
#define DELAYS_PER_SECOND 5
#define CHECKSUMS_PER_SECOND 5
zfs_ratelimit_t vdev_delay_rl; zfs_ratelimit_t vdev_delay_rl;
zfs_ratelimit_t vdev_checksum_rl; zfs_ratelimit_t vdev_checksum_rl;
}; };

View File

@ -25,13 +25,19 @@
typedef struct { typedef struct {
hrtime_t start; hrtime_t start;
unsigned int count; unsigned int count;
unsigned int burst; /* Number to allow per interval */
/*
* Pointer to number of events per interval. We do this to
* allow the burst to be a (changeable) module parameter.
*/
unsigned int *burst;
unsigned int interval; /* Interval length in seconds */ unsigned int interval; /* Interval length in seconds */
kmutex_t lock; kmutex_t lock;
} zfs_ratelimit_t; } zfs_ratelimit_t;
int zfs_ratelimit(zfs_ratelimit_t *rl); int zfs_ratelimit(zfs_ratelimit_t *rl);
void zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int burst, void zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int *burst,
unsigned int interval); unsigned int interval);
void zfs_ratelimit_fini(zfs_ratelimit_t *rl); void zfs_ratelimit_fini(zfs_ratelimit_t *rl);

View File

@ -739,6 +739,34 @@ Disable pool import at module load by ignoring the cache file (typically \fB/etc
Use \fB1\fR for yes (default) and \fB0\fR for no. Use \fB1\fR for yes (default) and \fB0\fR for no.
.RE .RE
.sp
.ne 2
.na
\fBzfs_checksums_per_second\fR (int)
.ad
.RS 12n
Rate limit checksum events to this many per second. Note that this should
not be set below the zed thresholds (currently 10 checksums over 10 sec)
or else zed may not trigger any action.
.sp
Default value: 20
.RE
.sp
.ne 2
.na
\fBzfs_commit_timeout_pct\fR (int)
.ad
.RS 12n
This controls the amount of time that a ZIL block (lwb) will remain "open"
when it isn't "full", and it has a thread waiting for it to be committed to
stable storage. The timeout is scaled based on a percentage of the last lwb
latency to avoid significantly impacting the latency of each individual
transaction record (itx).
.sp
Default value: \fB5\fR%.
.RE
.sp .sp
.ne 2 .ne 2
.na .na
@ -866,6 +894,17 @@ Note: \fBzfs_delay_scale\fR * \fBzfs_dirty_data_max\fR must be < 2^64.
Default value: \fB500,000\fR. Default value: \fB500,000\fR.
.RE .RE
.sp
.ne 2
.na
\fBzfs_delays_per_second\fR (int)
.ad
.RS 12n
Rate limit IO delay events to this many per second.
.sp
Default value: 20
.RE
.sp .sp
.ne 2 .ne 2
.na .na

View File

@ -215,7 +215,7 @@ const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS] = {
* interval: Interval time in seconds * interval: Interval time in seconds
*/ */
void void
zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int burst, zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int *burst,
unsigned int interval) unsigned int interval)
{ {
rl->count = 0; rl->count = 0;
@ -270,7 +270,7 @@ zfs_ratelimit(zfs_ratelimit_t *rl)
rl->start = now; rl->start = now;
rl->count = 0; rl->count = 0;
} else { } else {
if (rl->count >= rl->burst) { if (rl->count >= *rl->burst) {
rc = 0; /* We're ratelimiting */ rc = 0; /* We're ratelimiting */
} }
} }

View File

@ -56,6 +56,16 @@
*/ */
int metaslabs_per_vdev = 200; int metaslabs_per_vdev = 200;
/*
* Rate limit delay events to this many IO delays per second.
*/
unsigned int zfs_delays_per_second = 20;
/*
* Rate limit checksum events after this many checksum errors per second.
*/
unsigned int zfs_checksums_per_second = 20;
/* /*
* Virtual device management. * Virtual device management.
*/ */
@ -357,8 +367,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
* and checksum events so that we don't overwhelm ZED with thousands * and checksum events so that we don't overwhelm ZED with thousands
* of events when a disk is acting up. * of events when a disk is acting up.
*/ */
zfs_ratelimit_init(&vd->vdev_delay_rl, DELAYS_PER_SECOND, 1); zfs_ratelimit_init(&vd->vdev_delay_rl, &zfs_delays_per_second, 1);
zfs_ratelimit_init(&vd->vdev_checksum_rl, CHECKSUMS_PER_SECOND, 1); zfs_ratelimit_init(&vd->vdev_checksum_rl, &zfs_checksums_per_second, 1);
list_link_init(&vd->vdev_config_dirty_node); list_link_init(&vd->vdev_config_dirty_node);
list_link_init(&vd->vdev_state_dirty_node); list_link_init(&vd->vdev_state_dirty_node);
@ -3776,5 +3786,14 @@ module_param(metaslabs_per_vdev, int, 0644);
MODULE_PARM_DESC(metaslabs_per_vdev, MODULE_PARM_DESC(metaslabs_per_vdev,
"Divide added vdev into approximately (but no more than) this number " "Divide added vdev into approximately (but no more than) this number "
"of metaslabs"); "of metaslabs");
module_param(zfs_delays_per_second, uint, 0644);
MODULE_PARM_DESC(zfs_delays_per_second, "Rate limit delay events to this many "
"IO delays per second");
module_param(zfs_checksums_per_second, uint, 0644);
MODULE_PARM_DESC(zfs_checksums_per_second, "Rate limit checksum events "
"to this many checksum errors per second (do not set below zed"
"threshold).");
/* END CSTYLED */ /* END CSTYLED */
#endif #endif