Inject zinject(8) a percentage amount of dev errs
In the original form of device error injection, it was an all or nothing situation. To help simulate intermittent error conditions, you can now specify a real number percentage value. This is also very useful for our ZFS fault diagnosis testing and for injecting intermittent errors during load testing. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Don Brady <don.brady@intel.com> Closes #6227
This commit is contained in:
parent
05a5357a6c
commit
0241e491a0
|
@ -21,6 +21,7 @@
|
|||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
*/
|
||||
|
||||
/*
|
||||
|
@ -124,7 +125,7 @@
|
|||
* cache.
|
||||
*
|
||||
* The '-f' flag controls the frequency of errors injected, expressed as a
|
||||
* integer percentage between 1 and 100. The default is 100.
|
||||
* real number percentage between 0.0001 and 100. The default is 100.
|
||||
*
|
||||
* The this form is responsible for actually injecting the handler into the
|
||||
* framework. It takes the arguments described above, translates them to the
|
||||
|
@ -230,11 +231,13 @@ usage(void)
|
|||
"\t\tspa_vdev_exit() will trigger a panic.\n"
|
||||
"\n"
|
||||
"\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
|
||||
"\t [-T <read|write|free|claim|all> pool\n"
|
||||
"\t [-T <read|write|free|claim|all>] [-f frequency] pool\n"
|
||||
"\t\tInject a fault into a particular device or the device's\n"
|
||||
"\t\tlabel. Label injection can either be 'nvlist', 'uber',\n "
|
||||
"\t\t'pad1', or 'pad2'.\n"
|
||||
"\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n"
|
||||
"\t\t'frequency' is a value between 0.0001 and 100.0 that limits\n"
|
||||
"\t\tdevice error injection to a percentage of the IOs.\n"
|
||||
"\n"
|
||||
"\tzinject -d device -A <degrade|fault> -D <delay secs> pool\n"
|
||||
"\t\tPerform a specific action on a particular device.\n"
|
||||
|
@ -305,7 +308,7 @@ usage(void)
|
|||
"\t\t-u\tUnload the associated pool. Can be specified with only\n"
|
||||
"\t\t\ta pool object.\n"
|
||||
"\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n"
|
||||
"\t\t\ta percentage between 1 and 100.\n"
|
||||
"\t\t\ta percentage between 0.0001 and 100.\n"
|
||||
"\n"
|
||||
"\t-t data\t\tInject an error into the plain file contents of a\n"
|
||||
"\t\t\tfile. The object must be specified as a complete path\n"
|
||||
|
@ -645,6 +648,27 @@ parse_delay(char *str, uint64_t *delay, uint64_t *nlanes)
|
|||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
parse_frequency(const char *str, uint32_t *percent)
|
||||
{
|
||||
double val;
|
||||
char *post;
|
||||
|
||||
val = strtod(str, &post);
|
||||
if (post == NULL || *post != '\0')
|
||||
return (EINVAL);
|
||||
|
||||
/* valid range is [0.0001, 100.0] */
|
||||
val /= 100.0f;
|
||||
if (val < 0.000001f || val > 1.0f)
|
||||
return (ERANGE);
|
||||
|
||||
/* convert to an integer for use by kernel */
|
||||
*percent = ((uint32_t)(val * ZI_PERCENTAGE_MAX));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
|
@ -760,10 +784,12 @@ main(int argc, char **argv)
|
|||
}
|
||||
break;
|
||||
case 'f':
|
||||
record.zi_freq = atoi(optarg);
|
||||
if (record.zi_freq < 1 || record.zi_freq > 100) {
|
||||
(void) fprintf(stderr, "frequency range must "
|
||||
"be in the range (0, 100]\n");
|
||||
ret = parse_frequency(optarg, &record.zi_freq);
|
||||
if (ret != 0) {
|
||||
(void) fprintf(stderr, "%sfrequency value must "
|
||||
"be in the range [0.0001, 100.0]\n",
|
||||
ret == EINVAL ? "invalid value: " :
|
||||
ret == ERANGE ? "out of range: " : "");
|
||||
libzfs_fini(g_zfs);
|
||||
return (1);
|
||||
}
|
||||
|
@ -898,7 +924,8 @@ main(int argc, char **argv)
|
|||
* '-c' is invalid with any other options.
|
||||
*/
|
||||
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
|
||||
level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED) {
|
||||
level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
|
||||
record.zi_freq > 0) {
|
||||
(void) fprintf(stderr, "cancel (-c) incompatible with "
|
||||
"any other options\n");
|
||||
usage();
|
||||
|
@ -972,7 +999,8 @@ main(int argc, char **argv)
|
|||
|
||||
} else if (raw != NULL) {
|
||||
if (range != NULL || type != TYPE_INVAL || level != 0 ||
|
||||
record.zi_cmd != ZINJECT_UNINITIALIZED) {
|
||||
record.zi_cmd != ZINJECT_UNINITIALIZED ||
|
||||
record.zi_freq > 0) {
|
||||
(void) fprintf(stderr, "raw (-b) format with "
|
||||
"any other options\n");
|
||||
usage();
|
||||
|
@ -1007,7 +1035,7 @@ main(int argc, char **argv)
|
|||
error = EIO;
|
||||
} else if (record.zi_cmd == ZINJECT_PANIC) {
|
||||
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
|
||||
level != 0 || device != NULL) {
|
||||
level != 0 || device != NULL || record.zi_freq > 0) {
|
||||
(void) fprintf(stderr, "panic (-p) incompatible with "
|
||||
"other options\n");
|
||||
usage();
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||
* Copyright 2016 RackTop Systems.
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZFS_IOCTL_H
|
||||
|
@ -338,6 +339,10 @@ typedef struct zinject_record {
|
|||
#define ZEVENT_SEEK_START 0
|
||||
#define ZEVENT_SEEK_END UINT64_MAX
|
||||
|
||||
/* scaled frequency ranges */
|
||||
#define ZI_PERCENTAGE_MIN 4294UL
|
||||
#define ZI_PERCENTAGE_MAX UINT32_MAX
|
||||
|
||||
typedef enum zinject_type {
|
||||
ZINJECT_UNINITIALIZED,
|
||||
ZINJECT_DATA_FAULT,
|
||||
|
|
|
@ -76,7 +76,7 @@ create 3 lanes on the device; one lane with a latency
|
|||
of 10 ms and two lanes with a 25 ms latency.
|
||||
|
||||
.TP
|
||||
.B "zinject \-d \fIvdev\fB [\-e \fIdevice_error\fB] [\-L \fIlabel_error\fB] [\-T \fIfailure\fB] [\-F] \fIpool\fB"
|
||||
.B "zinject \-d \fIvdev\fB [\-e \fIdevice_error\fB] [\-L \fIlabel_error\fB] [\-T \fIfailure\fB] [\-f \fIfrequency\fB] [\-F] \fIpool\fB"
|
||||
Force a vdev error.
|
||||
.TP
|
||||
.B "zinject \-I [\-s \fIseconds\fB | \-g \fItxgs\fB] \fIpool\fB"
|
||||
|
@ -113,8 +113,8 @@ Specify
|
|||
.BR "nxio" " for an ENXIO error where reopening the device will fail."
|
||||
.TP
|
||||
.BI "\-f" " frequency"
|
||||
Only inject errors a fraction of the time. Expressed as an integer
|
||||
percentage between 1 and 100.
|
||||
Only inject errors a fraction of the time. Expressed as a real number
|
||||
percentage between 0.0001 and 100.
|
||||
.TP
|
||||
.BI "\-F"
|
||||
Fail faster. Do fewer checks.
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
*/
|
||||
|
||||
/*
|
||||
|
@ -98,6 +99,26 @@ static kmutex_t inject_delay_mtx;
|
|||
*/
|
||||
static int inject_next_id = 1;
|
||||
|
||||
/*
|
||||
* Test if the requested frequency was triggered
|
||||
*/
|
||||
static boolean_t
|
||||
freq_triggered(uint32_t frequency)
|
||||
{
|
||||
/*
|
||||
* zero implies always (100%)
|
||||
*/
|
||||
if (frequency == 0)
|
||||
return (B_TRUE);
|
||||
|
||||
/*
|
||||
* Note: we still handle legacy (unscaled) frequecy values
|
||||
*/
|
||||
uint32_t maximum = (frequency <= 100) ? 100 : ZI_PERCENTAGE_MAX;
|
||||
|
||||
return (spa_get_random(maximum) < frequency);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if the given record matches the I/O in progress.
|
||||
*/
|
||||
|
@ -113,8 +134,7 @@ zio_match_handler(zbookmark_phys_t *zb, uint64_t type,
|
|||
record->zi_object == DMU_META_DNODE_OBJECT) {
|
||||
if (record->zi_type == DMU_OT_NONE ||
|
||||
type == record->zi_type)
|
||||
return (record->zi_freq == 0 ||
|
||||
spa_get_random(100) < record->zi_freq);
|
||||
return (freq_triggered(record->zi_freq));
|
||||
else
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
@ -128,8 +148,7 @@ zio_match_handler(zbookmark_phys_t *zb, uint64_t type,
|
|||
zb->zb_blkid >= record->zi_start &&
|
||||
zb->zb_blkid <= record->zi_end &&
|
||||
error == record->zi_error)
|
||||
return (record->zi_freq == 0 ||
|
||||
spa_get_random(100) < record->zi_freq);
|
||||
return (freq_triggered(record->zi_freq));
|
||||
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
@ -293,6 +312,12 @@ zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
|
|||
continue;
|
||||
|
||||
if (handler->zi_record.zi_error == error) {
|
||||
/*
|
||||
* limit error injection if requested
|
||||
*/
|
||||
if (!freq_triggered(handler->zi_record.zi_freq))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* For a failed open, pretend like the device
|
||||
* has gone away.
|
||||
|
@ -466,10 +491,8 @@ zio_handle_io_delay(zio_t *zio)
|
|||
if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO)
|
||||
continue;
|
||||
|
||||
if (handler->zi_record.zi_freq != 0 &&
|
||||
spa_get_random(100) >= handler->zi_record.zi_freq) {
|
||||
if (!freq_triggered(handler->zi_record.zi_freq))
|
||||
continue;
|
||||
}
|
||||
|
||||
if (vd->vdev_guid != handler->zi_record.zi_guid)
|
||||
continue;
|
||||
|
|
Loading…
Reference in New Issue