Add corruption failure option to zinject(8)
Added a 'corrupt' error option that will flip a bit in the data after a read operation. This is useful for generating checksum errors at the device layer (in a mirror config for example). It is also used to validate the diagnosis of checksum errors from the zfs diagnosis engine. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Don Brady <don.brady@intel.com> Closes #6345
This commit is contained in:
parent
42a76fc8d7
commit
d977122da9
|
@ -36,12 +36,15 @@
|
||||||
*
|
*
|
||||||
* Errors can be injected into a particular vdev using the '-d' option. This
|
* Errors can be injected into a particular vdev using the '-d' option. This
|
||||||
* option takes a path or vdev GUID to uniquely identify the device within a
|
* option takes a path or vdev GUID to uniquely identify the device within a
|
||||||
* pool. There are two types of errors that can be injected, EIO and ENXIO,
|
* pool. There are four types of errors that can be injected, IO, ENXIO,
|
||||||
* that can be controlled through the '-e' option. The default is ENXIO. For
|
* ECHILD, and EILSEQ. These can be controlled through the '-e' option and the
|
||||||
* EIO failures, any attempt to read data from the device will return EIO, but
|
* default is ENXIO. For EIO failures, any attempt to read data from the device
|
||||||
* subsequent attempt to reopen the device will succeed. For ENXIO failures,
|
* will return EIO, but a subsequent attempt to reopen the device will succeed.
|
||||||
* any attempt to read from the device will return EIO, but any attempt to
|
* For ENXIO failures, any attempt to read from the device will return EIO, but
|
||||||
* reopen the device will also return ENXIO.
|
* any attempt to reopen the device will also return ENXIO. The EILSEQ failures
|
||||||
|
* only apply to read operations (-T read) and will flip a bit after the device
|
||||||
|
* has read the original data.
|
||||||
|
*
|
||||||
* For label faults, the -L option must be specified. This allows faults
|
* For label faults, the -L option must be specified. This allows faults
|
||||||
* to be injected into either the nvlist, uberblock, pad1, or pad2 region
|
* to be injected into either the nvlist, uberblock, pad1, or pad2 region
|
||||||
* of all the labels for the specified device.
|
* of all the labels for the specified device.
|
||||||
|
@ -231,11 +234,12 @@ usage(void)
|
||||||
"\t\tspa_vdev_exit() will trigger a panic.\n"
|
"\t\tspa_vdev_exit() will trigger a panic.\n"
|
||||||
"\n"
|
"\n"
|
||||||
"\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
|
"\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
|
||||||
"\t [-T <read|write|free|claim|all>] [-f frequency] pool\n"
|
"\t\t[-T <read|write|free|claim|all>] [-f frequency] pool\n\n"
|
||||||
"\t\tInject a fault into a particular device or the device's\n"
|
"\t\tInject a fault into a particular device or the device's\n"
|
||||||
"\t\tlabel. Label injection can either be 'nvlist', 'uber',\n "
|
"\t\tlabel. Label injection can either be 'nvlist', 'uber',\n "
|
||||||
"\t\t'pad1', or 'pad2'.\n"
|
"\t\t'pad1', or 'pad2'.\n"
|
||||||
"\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n"
|
"\t\t'errno' can be 'nxio' (the default), 'io', 'dtl', or\n"
|
||||||
|
"\t\t'corrupt' (bit flip).\n"
|
||||||
"\t\t'frequency' is a value between 0.0001 and 100.0 that limits\n"
|
"\t\t'frequency' is a value between 0.0001 and 100.0 that limits\n"
|
||||||
"\t\tdevice error injection to a percentage of the IOs.\n"
|
"\t\tdevice error injection to a percentage of the IOs.\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
@ -774,6 +778,8 @@ main(int argc, char **argv)
|
||||||
error = ENXIO;
|
error = ENXIO;
|
||||||
} else if (strcasecmp(optarg, "dtl") == 0) {
|
} else if (strcasecmp(optarg, "dtl") == 0) {
|
||||||
error = ECHILD;
|
error = ECHILD;
|
||||||
|
} else if (strcasecmp(optarg, "corrupt") == 0) {
|
||||||
|
error = EILSEQ;
|
||||||
} else {
|
} else {
|
||||||
(void) fprintf(stderr, "invalid error type "
|
(void) fprintf(stderr, "invalid error type "
|
||||||
"'%s': must be 'io', 'checksum' or "
|
"'%s': must be 'io', 'checksum' or "
|
||||||
|
@ -981,7 +987,15 @@ main(int argc, char **argv)
|
||||||
|
|
||||||
if (error == ECKSUM) {
|
if (error == ECKSUM) {
|
||||||
(void) fprintf(stderr, "device error type must be "
|
(void) fprintf(stderr, "device error type must be "
|
||||||
"'io' or 'nxio'\n");
|
"'io', 'nxio' or 'corrupt'\n");
|
||||||
|
libzfs_fini(g_zfs);
|
||||||
|
return (1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (error == EILSEQ &&
|
||||||
|
(record.zi_freq == 0 || io_type != ZIO_TYPE_READ)) {
|
||||||
|
(void) fprintf(stderr, "device corrupt errors require "
|
||||||
|
"io type read and a frequency value\n");
|
||||||
libzfs_fini(g_zfs);
|
libzfs_fini(g_zfs);
|
||||||
return (1);
|
return (1);
|
||||||
}
|
}
|
||||||
|
@ -1109,7 +1123,7 @@ main(int argc, char **argv)
|
||||||
return (2);
|
return (2);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (error == ENXIO) {
|
if (error == ENXIO || error == EILSEQ) {
|
||||||
(void) fprintf(stderr, "data error type must be "
|
(void) fprintf(stderr, "data error type must be "
|
||||||
"'checksum' or 'io'\n");
|
"'checksum' or 'io'\n");
|
||||||
libzfs_fini(g_zfs);
|
libzfs_fini(g_zfs);
|
||||||
|
|
|
@ -619,6 +619,8 @@ extern int zio_clear_fault(int id);
|
||||||
extern void zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type);
|
extern void zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type);
|
||||||
extern int zio_handle_fault_injection(zio_t *zio, int error);
|
extern int zio_handle_fault_injection(zio_t *zio, int error);
|
||||||
extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error);
|
extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error);
|
||||||
|
extern int zio_handle_device_injections(vdev_t *vd, zio_t *zio, int err1,
|
||||||
|
int err2);
|
||||||
extern int zio_handle_label_injection(zio_t *zio, int error);
|
extern int zio_handle_label_injection(zio_t *zio, int error);
|
||||||
extern void zio_handle_ignored_writes(zio_t *zio);
|
extern void zio_handle_ignored_writes(zio_t *zio);
|
||||||
extern hrtime_t zio_handle_io_delay(zio_t *zio);
|
extern hrtime_t zio_handle_io_delay(zio_t *zio);
|
||||||
|
|
|
@ -108,6 +108,7 @@ A vdev specified by path or GUID.
|
||||||
.BI "\-e" " device_error"
|
.BI "\-e" " device_error"
|
||||||
Specify
|
Specify
|
||||||
.BR "checksum" " for an ECKSUM error,"
|
.BR "checksum" " for an ECKSUM error,"
|
||||||
|
.BR "corrupt" " to flip a bit in the data after a read,"
|
||||||
.BR "dtl" " for an ECHILD error,"
|
.BR "dtl" " for an ECHILD error,"
|
||||||
.BR "io" " for an EIO error where reopening the device will succeed, or"
|
.BR "io" " for an EIO error where reopening the device will succeed, or"
|
||||||
.BR "nxio" " for an ENXIO error where reopening the device will fail."
|
.BR "nxio" " for an ENXIO error where reopening the device will fail."
|
||||||
|
|
|
@ -3472,8 +3472,8 @@ zio_vdev_io_done(zio_t *zio)
|
||||||
vdev_cache_write(zio);
|
vdev_cache_write(zio);
|
||||||
|
|
||||||
if (zio_injection_enabled && zio->io_error == 0)
|
if (zio_injection_enabled && zio->io_error == 0)
|
||||||
zio->io_error = zio_handle_device_injection(vd,
|
zio->io_error = zio_handle_device_injections(vd, zio,
|
||||||
zio, EIO);
|
EIO, EILSEQ);
|
||||||
|
|
||||||
if (zio_injection_enabled && zio->io_error == 0)
|
if (zio_injection_enabled && zio->io_error == 0)
|
||||||
zio->io_error = zio_handle_label_injection(zio, EIO);
|
zio->io_error = zio_handle_label_injection(zio, EIO);
|
||||||
|
|
|
@ -271,9 +271,24 @@ zio_handle_label_injection(zio_t *zio, int error)
|
||||||
return (ret);
|
return (ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*ARGSUSED*/
|
||||||
|
static int
|
||||||
|
zio_inject_bitflip_cb(void *data, size_t len, void *private)
|
||||||
|
{
|
||||||
|
ASSERTV(zio_t *zio = private);
|
||||||
|
uint8_t *buffer = data;
|
||||||
|
uint_t byte = spa_get_random(len);
|
||||||
|
|
||||||
int
|
ASSERT(zio->io_type == ZIO_TYPE_READ);
|
||||||
zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
|
|
||||||
|
/* flip a single random bit in an abd data buffer */
|
||||||
|
buffer[byte] ^= 1 << spa_get_random(8);
|
||||||
|
|
||||||
|
return (1); /* stop after first flip */
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
zio_handle_device_injection_impl(vdev_t *vd, zio_t *zio, int err1, int err2)
|
||||||
{
|
{
|
||||||
inject_handler_t *handler;
|
inject_handler_t *handler;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
@ -311,7 +326,8 @@ zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
|
||||||
handler->zi_record.zi_iotype != zio->io_type)
|
handler->zi_record.zi_iotype != zio->io_type)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (handler->zi_record.zi_error == error) {
|
if (handler->zi_record.zi_error == err1 ||
|
||||||
|
handler->zi_record.zi_error == err2) {
|
||||||
/*
|
/*
|
||||||
* limit error injection if requested
|
* limit error injection if requested
|
||||||
*/
|
*/
|
||||||
|
@ -322,7 +338,7 @@ zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
|
||||||
* For a failed open, pretend like the device
|
* For a failed open, pretend like the device
|
||||||
* has gone away.
|
* has gone away.
|
||||||
*/
|
*/
|
||||||
if (error == ENXIO)
|
if (err1 == ENXIO)
|
||||||
vd->vdev_stat.vs_aux =
|
vd->vdev_stat.vs_aux =
|
||||||
VDEV_AUX_OPEN_FAILED;
|
VDEV_AUX_OPEN_FAILED;
|
||||||
|
|
||||||
|
@ -335,7 +351,21 @@ zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
|
||||||
zio != NULL)
|
zio != NULL)
|
||||||
zio->io_flags |= ZIO_FLAG_IO_RETRY;
|
zio->io_flags |= ZIO_FLAG_IO_RETRY;
|
||||||
|
|
||||||
ret = error;
|
/*
|
||||||
|
* EILSEQ means flip a bit after a read
|
||||||
|
*/
|
||||||
|
if (handler->zi_record.zi_error == EILSEQ) {
|
||||||
|
if (zio == NULL)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* locate buffer data and flip a bit */
|
||||||
|
(void) abd_iterate_func(zio->io_abd, 0,
|
||||||
|
zio->io_size, zio_inject_bitflip_cb,
|
||||||
|
zio);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = handler->zi_record.zi_error;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (handler->zi_record.zi_error == ENXIO) {
|
if (handler->zi_record.zi_error == ENXIO) {
|
||||||
|
@ -350,6 +380,18 @@ zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
|
||||||
return (ret);
|
return (ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
|
||||||
|
{
|
||||||
|
return (zio_handle_device_injection_impl(vd, zio, error, INT_MAX));
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
zio_handle_device_injections(vdev_t *vd, zio_t *zio, int err1, int err2)
|
||||||
|
{
|
||||||
|
return (zio_handle_device_injection_impl(vd, zio, err1, err2));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Simulate hardware that ignores cache flushes. For requested number
|
* Simulate hardware that ignores cache flushes. For requested number
|
||||||
* of seconds nix the actual writing to disk.
|
* of seconds nix the actual writing to disk.
|
||||||
|
|
Loading…
Reference in New Issue