Inject zinject(8) a percentage amount of dev errs
In the original form of device error injection, it was an all or nothing situation. To help simulate intermittent error conditions, you can now specify a real number percentage value. This is also very useful for our ZFS fault diagnosis testing and for injecting intermittent errors during load testing. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Don Brady <don.brady@intel.com> Closes #6227
This commit is contained in:
parent
05a5357a6c
commit
0241e491a0
|
@ -21,6 +21,7 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||||
|
* Copyright (c) 2017, Intel Corporation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -124,7 +125,7 @@
|
||||||
* cache.
|
* cache.
|
||||||
*
|
*
|
||||||
* The '-f' flag controls the frequency of errors injected, expressed as a
|
* The '-f' flag controls the frequency of errors injected, expressed as a
|
||||||
* integer percentage between 1 and 100. The default is 100.
|
* real number percentage between 0.0001 and 100. The default is 100.
|
||||||
*
|
*
|
||||||
* The this form is responsible for actually injecting the handler into the
|
* The this form is responsible for actually injecting the handler into the
|
||||||
* framework. It takes the arguments described above, translates them to the
|
* framework. It takes the arguments described above, translates them to the
|
||||||
|
@ -230,11 +231,13 @@ usage(void)
|
||||||
"\t\tspa_vdev_exit() will trigger a panic.\n"
|
"\t\tspa_vdev_exit() will trigger a panic.\n"
|
||||||
"\n"
|
"\n"
|
||||||
"\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
|
"\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
|
||||||
"\t [-T <read|write|free|claim|all> pool\n"
|
"\t [-T <read|write|free|claim|all>] [-f frequency] pool\n"
|
||||||
"\t\tInject a fault into a particular device or the device's\n"
|
"\t\tInject a fault into a particular device or the device's\n"
|
||||||
"\t\tlabel. Label injection can either be 'nvlist', 'uber',\n "
|
"\t\tlabel. Label injection can either be 'nvlist', 'uber',\n "
|
||||||
"\t\t'pad1', or 'pad2'.\n"
|
"\t\t'pad1', or 'pad2'.\n"
|
||||||
"\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n"
|
"\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n"
|
||||||
|
"\t\t'frequency' is a value between 0.0001 and 100.0 that limits\n"
|
||||||
|
"\t\tdevice error injection to a percentage of the IOs.\n"
|
||||||
"\n"
|
"\n"
|
||||||
"\tzinject -d device -A <degrade|fault> -D <delay secs> pool\n"
|
"\tzinject -d device -A <degrade|fault> -D <delay secs> pool\n"
|
||||||
"\t\tPerform a specific action on a particular device.\n"
|
"\t\tPerform a specific action on a particular device.\n"
|
||||||
|
@ -305,7 +308,7 @@ usage(void)
|
||||||
"\t\t-u\tUnload the associated pool. Can be specified with only\n"
|
"\t\t-u\tUnload the associated pool. Can be specified with only\n"
|
||||||
"\t\t\ta pool object.\n"
|
"\t\t\ta pool object.\n"
|
||||||
"\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n"
|
"\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n"
|
||||||
"\t\t\ta percentage between 1 and 100.\n"
|
"\t\t\ta percentage between 0.0001 and 100.\n"
|
||||||
"\n"
|
"\n"
|
||||||
"\t-t data\t\tInject an error into the plain file contents of a\n"
|
"\t-t data\t\tInject an error into the plain file contents of a\n"
|
||||||
"\t\t\tfile. The object must be specified as a complete path\n"
|
"\t\t\tfile. The object must be specified as a complete path\n"
|
||||||
|
@ -645,6 +648,27 @@ parse_delay(char *str, uint64_t *delay, uint64_t *nlanes)
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
parse_frequency(const char *str, uint32_t *percent)
|
||||||
|
{
|
||||||
|
double val;
|
||||||
|
char *post;
|
||||||
|
|
||||||
|
val = strtod(str, &post);
|
||||||
|
if (post == NULL || *post != '\0')
|
||||||
|
return (EINVAL);
|
||||||
|
|
||||||
|
/* valid range is [0.0001, 100.0] */
|
||||||
|
val /= 100.0f;
|
||||||
|
if (val < 0.000001f || val > 1.0f)
|
||||||
|
return (ERANGE);
|
||||||
|
|
||||||
|
/* convert to an integer for use by kernel */
|
||||||
|
*percent = ((uint32_t)(val * ZI_PERCENTAGE_MAX));
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
main(int argc, char **argv)
|
main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
|
@ -760,10 +784,12 @@ main(int argc, char **argv)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'f':
|
case 'f':
|
||||||
record.zi_freq = atoi(optarg);
|
ret = parse_frequency(optarg, &record.zi_freq);
|
||||||
if (record.zi_freq < 1 || record.zi_freq > 100) {
|
if (ret != 0) {
|
||||||
(void) fprintf(stderr, "frequency range must "
|
(void) fprintf(stderr, "%sfrequency value must "
|
||||||
"be in the range (0, 100]\n");
|
"be in the range [0.0001, 100.0]\n",
|
||||||
|
ret == EINVAL ? "invalid value: " :
|
||||||
|
ret == ERANGE ? "out of range: " : "");
|
||||||
libzfs_fini(g_zfs);
|
libzfs_fini(g_zfs);
|
||||||
return (1);
|
return (1);
|
||||||
}
|
}
|
||||||
|
@ -898,7 +924,8 @@ main(int argc, char **argv)
|
||||||
* '-c' is invalid with any other options.
|
* '-c' is invalid with any other options.
|
||||||
*/
|
*/
|
||||||
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
|
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
|
||||||
level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED) {
|
level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
|
||||||
|
record.zi_freq > 0) {
|
||||||
(void) fprintf(stderr, "cancel (-c) incompatible with "
|
(void) fprintf(stderr, "cancel (-c) incompatible with "
|
||||||
"any other options\n");
|
"any other options\n");
|
||||||
usage();
|
usage();
|
||||||
|
@ -972,7 +999,8 @@ main(int argc, char **argv)
|
||||||
|
|
||||||
} else if (raw != NULL) {
|
} else if (raw != NULL) {
|
||||||
if (range != NULL || type != TYPE_INVAL || level != 0 ||
|
if (range != NULL || type != TYPE_INVAL || level != 0 ||
|
||||||
record.zi_cmd != ZINJECT_UNINITIALIZED) {
|
record.zi_cmd != ZINJECT_UNINITIALIZED ||
|
||||||
|
record.zi_freq > 0) {
|
||||||
(void) fprintf(stderr, "raw (-b) format with "
|
(void) fprintf(stderr, "raw (-b) format with "
|
||||||
"any other options\n");
|
"any other options\n");
|
||||||
usage();
|
usage();
|
||||||
|
@ -1007,7 +1035,7 @@ main(int argc, char **argv)
|
||||||
error = EIO;
|
error = EIO;
|
||||||
} else if (record.zi_cmd == ZINJECT_PANIC) {
|
} else if (record.zi_cmd == ZINJECT_PANIC) {
|
||||||
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
|
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
|
||||||
level != 0 || device != NULL) {
|
level != 0 || device != NULL || record.zi_freq > 0) {
|
||||||
(void) fprintf(stderr, "panic (-p) incompatible with "
|
(void) fprintf(stderr, "panic (-p) incompatible with "
|
||||||
"other options\n");
|
"other options\n");
|
||||||
usage();
|
usage();
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||||
* Copyright 2016 RackTop Systems.
|
* Copyright 2016 RackTop Systems.
|
||||||
|
* Copyright (c) 2017, Intel Corporation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _SYS_ZFS_IOCTL_H
|
#ifndef _SYS_ZFS_IOCTL_H
|
||||||
|
@ -338,6 +339,10 @@ typedef struct zinject_record {
|
||||||
#define ZEVENT_SEEK_START 0
|
#define ZEVENT_SEEK_START 0
|
||||||
#define ZEVENT_SEEK_END UINT64_MAX
|
#define ZEVENT_SEEK_END UINT64_MAX
|
||||||
|
|
||||||
|
/* scaled frequency ranges */
|
||||||
|
#define ZI_PERCENTAGE_MIN 4294UL
|
||||||
|
#define ZI_PERCENTAGE_MAX UINT32_MAX
|
||||||
|
|
||||||
typedef enum zinject_type {
|
typedef enum zinject_type {
|
||||||
ZINJECT_UNINITIALIZED,
|
ZINJECT_UNINITIALIZED,
|
||||||
ZINJECT_DATA_FAULT,
|
ZINJECT_DATA_FAULT,
|
||||||
|
|
|
@ -76,7 +76,7 @@ create 3 lanes on the device; one lane with a latency
|
||||||
of 10 ms and two lanes with a 25 ms latency.
|
of 10 ms and two lanes with a 25 ms latency.
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
.B "zinject \-d \fIvdev\fB [\-e \fIdevice_error\fB] [\-L \fIlabel_error\fB] [\-T \fIfailure\fB] [\-F] \fIpool\fB"
|
.B "zinject \-d \fIvdev\fB [\-e \fIdevice_error\fB] [\-L \fIlabel_error\fB] [\-T \fIfailure\fB] [\-f \fIfrequency\fB] [\-F] \fIpool\fB"
|
||||||
Force a vdev error.
|
Force a vdev error.
|
||||||
.TP
|
.TP
|
||||||
.B "zinject \-I [\-s \fIseconds\fB | \-g \fItxgs\fB] \fIpool\fB"
|
.B "zinject \-I [\-s \fIseconds\fB | \-g \fItxgs\fB] \fIpool\fB"
|
||||||
|
@ -113,8 +113,8 @@ Specify
|
||||||
.BR "nxio" " for an ENXIO error where reopening the device will fail."
|
.BR "nxio" " for an ENXIO error where reopening the device will fail."
|
||||||
.TP
|
.TP
|
||||||
.BI "\-f" " frequency"
|
.BI "\-f" " frequency"
|
||||||
Only inject errors a fraction of the time. Expressed as an integer
|
Only inject errors a fraction of the time. Expressed as a real number
|
||||||
percentage between 1 and 100.
|
percentage between 0.0001 and 100.
|
||||||
.TP
|
.TP
|
||||||
.BI "\-F"
|
.BI "\-F"
|
||||||
Fail faster. Do fewer checks.
|
Fail faster. Do fewer checks.
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||||
|
* Copyright (c) 2017, Intel Corporation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -98,6 +99,26 @@ static kmutex_t inject_delay_mtx;
|
||||||
*/
|
*/
|
||||||
static int inject_next_id = 1;
|
static int inject_next_id = 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test if the requested frequency was triggered
|
||||||
|
*/
|
||||||
|
static boolean_t
|
||||||
|
freq_triggered(uint32_t frequency)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* zero implies always (100%)
|
||||||
|
*/
|
||||||
|
if (frequency == 0)
|
||||||
|
return (B_TRUE);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note: we still handle legacy (unscaled) frequecy values
|
||||||
|
*/
|
||||||
|
uint32_t maximum = (frequency <= 100) ? 100 : ZI_PERCENTAGE_MAX;
|
||||||
|
|
||||||
|
return (spa_get_random(maximum) < frequency);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns true if the given record matches the I/O in progress.
|
* Returns true if the given record matches the I/O in progress.
|
||||||
*/
|
*/
|
||||||
|
@ -113,8 +134,7 @@ zio_match_handler(zbookmark_phys_t *zb, uint64_t type,
|
||||||
record->zi_object == DMU_META_DNODE_OBJECT) {
|
record->zi_object == DMU_META_DNODE_OBJECT) {
|
||||||
if (record->zi_type == DMU_OT_NONE ||
|
if (record->zi_type == DMU_OT_NONE ||
|
||||||
type == record->zi_type)
|
type == record->zi_type)
|
||||||
return (record->zi_freq == 0 ||
|
return (freq_triggered(record->zi_freq));
|
||||||
spa_get_random(100) < record->zi_freq);
|
|
||||||
else
|
else
|
||||||
return (B_FALSE);
|
return (B_FALSE);
|
||||||
}
|
}
|
||||||
|
@ -128,8 +148,7 @@ zio_match_handler(zbookmark_phys_t *zb, uint64_t type,
|
||||||
zb->zb_blkid >= record->zi_start &&
|
zb->zb_blkid >= record->zi_start &&
|
||||||
zb->zb_blkid <= record->zi_end &&
|
zb->zb_blkid <= record->zi_end &&
|
||||||
error == record->zi_error)
|
error == record->zi_error)
|
||||||
return (record->zi_freq == 0 ||
|
return (freq_triggered(record->zi_freq));
|
||||||
spa_get_random(100) < record->zi_freq);
|
|
||||||
|
|
||||||
return (B_FALSE);
|
return (B_FALSE);
|
||||||
}
|
}
|
||||||
|
@ -293,6 +312,12 @@ zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (handler->zi_record.zi_error == error) {
|
if (handler->zi_record.zi_error == error) {
|
||||||
|
/*
|
||||||
|
* limit error injection if requested
|
||||||
|
*/
|
||||||
|
if (!freq_triggered(handler->zi_record.zi_freq))
|
||||||
|
continue;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For a failed open, pretend like the device
|
* For a failed open, pretend like the device
|
||||||
* has gone away.
|
* has gone away.
|
||||||
|
@ -466,10 +491,8 @@ zio_handle_io_delay(zio_t *zio)
|
||||||
if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO)
|
if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (handler->zi_record.zi_freq != 0 &&
|
if (!freq_triggered(handler->zi_record.zi_freq))
|
||||||
spa_get_random(100) >= handler->zi_record.zi_freq) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
if (vd->vdev_guid != handler->zi_record.zi_guid)
|
if (vd->vdev_guid != handler->zi_record.zi_guid)
|
||||||
continue;
|
continue;
|
||||||
|
|
Loading…
Reference in New Issue