Force fault a vdev with 'zpool offline -f'

This patch adds a '-f' option to 'zpool offline' to fault a vdev
instead of bringing it offline.  Unlike the OFFLINE state, the
FAULTED state will trigger the FMA code, allowing for things like
autoreplace and triggering the slot fault LED.  The -f faults
persist across imports, unless they were set with the temporary
(-t) flag.  Both persistent and temporary faults can be cleared
with zpool clear.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Closes #6094
This commit is contained in:
Tony Hutter 2017-05-19 12:30:16 -07:00 committed by Brian Behlendorf
parent a32df59e18
commit 4a283c7f77
13 changed files with 266 additions and 29 deletions

View File

@ -326,7 +326,7 @@ get_usage(zpool_help_t idx)
return (gettext("\tlist [-gHLpPv] [-o property[,...]] " return (gettext("\tlist [-gHLpPv] [-o property[,...]] "
"[-T d|u] [pool] ... [interval [count]]\n")); "[-T d|u] [pool] ... [interval [count]]\n"));
case HELP_OFFLINE: case HELP_OFFLINE:
return (gettext("\toffline [-t] <pool> <device> ...\n")); return (gettext("\toffline [-f] [-t] <pool> <device> ...\n"));
case HELP_ONLINE: case HELP_ONLINE:
return (gettext("\tonline <pool> <device> ...\n")); return (gettext("\tonline <pool> <device> ...\n"));
case HELP_REPLACE: case HELP_REPLACE:
@ -5437,11 +5437,9 @@ zpool_do_online(int argc, char **argv)
/* /*
* zpool offline [-ft] <pool> <device> ... * zpool offline [-ft] <pool> <device> ...
* *
* -f Force the device into the offline state, even if doing * -f Force the device into a faulted state.
* so would appear to compromise pool availability.
* (not supported yet)
* *
* -t Only take the device off-line temporarily. The offline * -t Only take the device off-line temporarily. The offline/faulted
* state will not be persistent across reboots. * state will not be persistent across reboots.
*/ */
/* ARGSUSED */ /* ARGSUSED */
@ -5453,14 +5451,17 @@ zpool_do_offline(int argc, char **argv)
zpool_handle_t *zhp; zpool_handle_t *zhp;
int ret = 0; int ret = 0;
boolean_t istmp = B_FALSE; boolean_t istmp = B_FALSE;
boolean_t fault = B_FALSE;
/* check options */ /* check options */
while ((c = getopt(argc, argv, "ft")) != -1) { while ((c = getopt(argc, argv, "ft")) != -1) {
switch (c) { switch (c) {
case 'f':
fault = B_TRUE;
break;
case 't': case 't':
istmp = B_TRUE; istmp = B_TRUE;
break; break;
case 'f':
case '?': case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"), (void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt); optopt);
@ -5487,8 +5488,22 @@ zpool_do_offline(int argc, char **argv)
return (1); return (1);
for (i = 1; i < argc; i++) { for (i = 1; i < argc; i++) {
if (zpool_vdev_offline(zhp, argv[i], istmp) != 0) if (fault) {
ret = 1; uint64_t guid = zpool_vdev_path_to_guid(zhp, argv[i]);
vdev_aux_t aux;
if (istmp == B_FALSE) {
/* Force the fault to persist across imports */
aux = VDEV_AUX_EXTERNAL_PERSIST;
} else {
aux = VDEV_AUX_EXTERNAL;
}
if (guid == 0 || zpool_vdev_fault(zhp, guid, aux) != 0)
ret = 1;
} else {
if (zpool_vdev_offline(zhp, argv[i], istmp) != 0)
ret = 1;
}
} }
zpool_close(zhp); zpool_close(zhp);

View File

@ -284,6 +284,7 @@ extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *,
boolean_t *, boolean_t *, boolean_t *); boolean_t *, boolean_t *, boolean_t *);
extern int zpool_label_disk_wait(char *, int); extern int zpool_label_disk_wait(char *, int);
extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *); extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *);
extern uint64_t zpool_vdev_path_to_guid(zpool_handle_t *zhp, const char *path);
int zfs_dev_is_dm(char *dev_name); int zfs_dev_is_dm(char *dev_name);
int zfs_dev_is_whole_disk(char *dev_name); int zfs_dev_is_whole_disk(char *dev_name);

View File

@ -731,9 +731,10 @@ typedef enum vdev_aux {
VDEV_AUX_ERR_EXCEEDED, /* too many errors */ VDEV_AUX_ERR_EXCEEDED, /* too many errors */
VDEV_AUX_IO_FAILURE, /* experienced I/O failure */ VDEV_AUX_IO_FAILURE, /* experienced I/O failure */
VDEV_AUX_BAD_LOG, /* cannot read log chain(s) */ VDEV_AUX_BAD_LOG, /* cannot read log chain(s) */
VDEV_AUX_EXTERNAL, /* external diagnosis */ VDEV_AUX_EXTERNAL, /* external diagnosis or forced fault */
VDEV_AUX_SPLIT_POOL, /* vdev was split off into another pool */ VDEV_AUX_SPLIT_POOL, /* vdev was split off into another pool */
VDEV_AUX_BAD_ASHIFT /* vdev ashift is invalid */ VDEV_AUX_BAD_ASHIFT, /* vdev ashift is invalid */
VDEV_AUX_EXTERNAL_PERSIST /* persistent forced fault */
} vdev_aux_t; } vdev_aux_t;
/* /*

View File

@ -2370,6 +2370,43 @@ zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
return (0); return (0);
} }
/*
* Convert a vdev path to a GUID. Returns GUID or 0 on error.
*
* If is_spare, is_l2cache, or is_log is non-NULL, then store within it
* if the VDEV is a spare, l2cache, or log device. If they're NULL then
* ignore them.
*/
static uint64_t
zpool_vdev_path_to_guid_impl(zpool_handle_t *zhp, const char *path,
boolean_t *is_spare, boolean_t *is_l2cache, boolean_t *is_log)
{
uint64_t guid;
boolean_t spare = B_FALSE, l2cache = B_FALSE, log = B_FALSE;
nvlist_t *tgt;
if ((tgt = zpool_find_vdev(zhp, path, &spare, &l2cache,
&log)) == NULL)
return (0);
verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &guid) == 0);
if (is_spare != NULL)
*is_spare = spare;
if (is_l2cache != NULL)
*is_l2cache = l2cache;
if (is_log != NULL)
*is_log = log;
return (guid);
}
/* Convert a vdev path to a GUID. Returns GUID or 0 on error. */
uint64_t
zpool_vdev_path_to_guid(zpool_handle_t *zhp, const char *path)
{
return (zpool_vdev_path_to_guid_impl(zhp, path, NULL, NULL, NULL));
}
/* /*
* Bring the specified vdev online. The 'flags' parameter is a set of the * Bring the specified vdev online. The 'flags' parameter is a set of the
* ZFS_ONLINE_* flags. * ZFS_ONLINE_* flags.

View File

@ -114,7 +114,7 @@ zpool \- configures ZFS storage pools
.LP .LP
.nf .nf
\fBzpool offline\fR [\fB-t\fR] \fIpool\fR \fIdevice\fR ... \fBzpool offline\fR [\fB-f\fR] [\fB-t\fR] \fIpool\fR \fIdevice\fR ...
.fi .fi
.LP .LP
@ -1908,13 +1908,21 @@ Verbose statistics. Reports usage statistics for individual \fIvdevs\fR within t
.sp .sp
.ne 2 .ne 2
.na .na
\fB\fBzpool offline\fR [\fB-t\fR] \fIpool\fR \fIdevice\fR ...\fR \fB\fBzpool offline\fR [\fB-f\fR] [\fB-t\fR] \fIpool\fR \fIdevice\fR ...\fR
.ad .ad
.sp .6 .sp .6
.RS 4n .RS 4n
Takes the specified physical device offline. While the \fIdevice\fR is offline, no attempt is made to read or write to the device. Takes the specified physical device offline. While the \fIdevice\fR is offline, no attempt is made to read or write to the device.
.sp .sp
This command is not applicable to spares or cache devices. .ne 2
.na
\fB\fB-f\fR\fR
.ad
.RS 6n
Force fault. Instead of offlining the disk, put it into a faulted state. The
fault will persist across imports unless the \fB-t\fR flag was specified.
.RE
.sp .sp
.ne 2 .ne 2
.na .na

View File

@ -1181,13 +1181,21 @@ int
spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error) spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
{ {
boolean_t config_changed = B_FALSE; boolean_t config_changed = B_FALSE;
vdev_t *vdev_top;
if (vd == NULL || vd == spa->spa_root_vdev) {
vdev_top = spa->spa_root_vdev;
} else {
vdev_top = vd->vdev_top;
}
if (vd != NULL || error == 0) if (vd != NULL || error == 0)
vdev_dtl_reassess(vd ? vd->vdev_top : spa->spa_root_vdev, vdev_dtl_reassess(vdev_top, 0, 0, B_FALSE);
0, 0, B_FALSE);
if (vd != NULL) { if (vd != NULL) {
vdev_state_dirty(vd->vdev_top); if (vd != spa->spa_root_vdev)
vdev_state_dirty(vdev_top);
config_changed = B_TRUE; config_changed = B_TRUE;
spa->spa_config_generation++; spa->spa_config_generation++;
} }

View File

@ -394,6 +394,8 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
char *type; char *type;
uint64_t guid = 0, islog, nparity; uint64_t guid = 0, islog, nparity;
vdev_t *vd; vdev_t *vd;
char *tmp = NULL;
int rc;
ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
@ -487,6 +489,19 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0) if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0)
vd->vdev_path = spa_strdup(vd->vdev_path); vd->vdev_path = spa_strdup(vd->vdev_path);
/*
* ZPOOL_CONFIG_AUX_STATE = "external" means we previously forced a
* fault on a vdev and want it to persist across imports (like with
* zpool offline -f).
*/
rc = nvlist_lookup_string(nv, ZPOOL_CONFIG_AUX_STATE, &tmp);
if (rc == 0 && tmp != NULL && strcmp(tmp, "external") == 0) {
vd->vdev_stat.vs_aux = VDEV_AUX_EXTERNAL;
vd->vdev_faulted = 1;
vd->vdev_label_aux = VDEV_AUX_EXTERNAL;
}
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &vd->vdev_devid) == 0) if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &vd->vdev_devid) == 0)
vd->vdev_devid = spa_strdup(vd->vdev_devid); vd->vdev_devid = spa_strdup(vd->vdev_devid);
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PHYS_PATH, if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PHYS_PATH,
@ -591,12 +606,17 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
&vd->vdev_resilver_txg); &vd->vdev_resilver_txg);
/* /*
* When importing a pool, we want to ignore the persistent fault * In general, when importing a pool we want to ignore the
* state, as the diagnosis made on another system may not be * persistent fault state, as the diagnosis made on another
* valid in the current context. Local vdevs will * system may not be valid in the current context. The only
* remain in the faulted state. * exception is if we forced a vdev to a persistently faulted
* state with 'zpool offline -f'. The persistent fault will
* remain across imports until cleared.
*
* Local vdevs will remain in the faulted state.
*/ */
if (spa_load_state(spa) == SPA_LOAD_OPEN) { if (spa_load_state(spa) == SPA_LOAD_OPEN ||
spa_load_state(spa) == SPA_LOAD_IMPORT) {
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED,
&vd->vdev_faulted); &vd->vdev_faulted);
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DEGRADED, (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DEGRADED,
@ -2478,6 +2498,32 @@ vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux)
tvd = vd->vdev_top; tvd = vd->vdev_top;
/*
* If user did a 'zpool offline -f' then make the fault persist across
* reboots.
*/
if (aux == VDEV_AUX_EXTERNAL_PERSIST) {
/*
* There are two kinds of forced faults: temporary and
* persistent. Temporary faults go away at pool import, while
* persistent faults stay set. Both types of faults can be
* cleared with a zpool clear.
*
* We tell if a vdev is persistently faulted by looking at the
* ZPOOL_CONFIG_AUX_STATE nvpair. If it's set to "external" at
* import then it's a persistent fault. Otherwise, it's
* temporary. We get ZPOOL_CONFIG_AUX_STATE set to "external"
* by setting vd.vdev_stat.vs_aux to VDEV_AUX_EXTERNAL. This
* tells vdev_config_generate() (which gets run later) to set
* ZPOOL_CONFIG_AUX_STATE to "external" in the nvlist.
*/
vd->vdev_stat.vs_aux = VDEV_AUX_EXTERNAL;
vd->vdev_tmpoffline = B_FALSE;
aux = VDEV_AUX_EXTERNAL;
} else {
vd->vdev_tmpoffline = B_TRUE;
}
/* /*
* We don't directly use the aux state here, but if we do a * We don't directly use the aux state here, but if we do a
* vdev_reopen(), we need this value to be present to remember why we * vdev_reopen(), we need this value to be present to remember why we
@ -2753,7 +2799,6 @@ vdev_clear(spa_t *spa, vdev_t *vd)
*/ */
if (vd->vdev_faulted || vd->vdev_degraded || if (vd->vdev_faulted || vd->vdev_degraded ||
!vdev_readable(vd) || !vdev_writeable(vd)) { !vdev_readable(vd) || !vdev_writeable(vd)) {
/* /*
* When reopening in response to a clear event, it may be due to * When reopening in response to a clear event, it may be due to
* a fmadm repair request. In this case, if the device is * a fmadm repair request. In this case, if the device is
@ -2764,6 +2809,7 @@ vdev_clear(spa_t *spa, vdev_t *vd)
vd->vdev_faulted = vd->vdev_degraded = 0ULL; vd->vdev_faulted = vd->vdev_degraded = 0ULL;
vd->vdev_cant_read = B_FALSE; vd->vdev_cant_read = B_FALSE;
vd->vdev_cant_write = B_FALSE; vd->vdev_cant_write = B_FALSE;
vd->vdev_stat.vs_aux = 0;
vdev_reopen(vd == rvd ? rvd : vd->vdev_top); vdev_reopen(vd == rvd ? rvd : vd->vdev_top);

View File

@ -519,6 +519,7 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
if (vd->vdev_ishole) if (vd->vdev_ishole)
fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_HOLE, B_TRUE); fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_HOLE, B_TRUE);
/* Set the reason why we're FAULTED/DEGRADED. */
switch (vd->vdev_stat.vs_aux) { switch (vd->vdev_stat.vs_aux) {
case VDEV_AUX_ERR_EXCEEDED: case VDEV_AUX_ERR_EXCEEDED:
aux = "err_exceeded"; aux = "err_exceeded";
@ -529,8 +530,15 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
break; break;
} }
if (aux != NULL) if (aux != NULL && !vd->vdev_tmpoffline) {
fnvlist_add_string(nv, ZPOOL_CONFIG_AUX_STATE, aux); fnvlist_add_string(nv, ZPOOL_CONFIG_AUX_STATE, aux);
} else {
/*
* We're healthy - clear any previous AUX_STATE values.
*/
if (nvlist_exists(nv, ZPOOL_CONFIG_AUX_STATE))
nvlist_remove_all(nv, ZPOOL_CONFIG_AUX_STATE);
}
if (vd->vdev_splitting && vd->vdev_orig_guid != 0LL) { if (vd->vdev_splitting && vd->vdev_orig_guid != 0LL) {
fnvlist_add_uint64(nv, ZPOOL_CONFIG_ORIG_GUID, fnvlist_add_uint64(nv, ZPOOL_CONFIG_ORIG_GUID,

View File

@ -156,6 +156,7 @@
#include <sys/spa.h> #include <sys/spa.h>
#include <sys/spa_impl.h> #include <sys/spa_impl.h>
#include <sys/vdev.h> #include <sys/vdev.h>
#include <sys/vdev_impl.h>
#include <sys/priv_impl.h> #include <sys/priv_impl.h>
#include <sys/dmu.h> #include <sys/dmu.h>
#include <sys/dsl_dir.h> #include <sys/dsl_dir.h>
@ -1896,7 +1897,8 @@ zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
case VDEV_STATE_FAULTED: case VDEV_STATE_FAULTED:
if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED && if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
zc->zc_obj != VDEV_AUX_EXTERNAL) zc->zc_obj != VDEV_AUX_EXTERNAL &&
zc->zc_obj != VDEV_AUX_EXTERNAL_PERSIST)
zc->zc_obj = VDEV_AUX_ERR_EXCEEDED; zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
error = vdev_fault(spa, zc->zc_guid, zc->zc_obj); error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
@ -4919,7 +4921,7 @@ zfs_ioc_clear(zfs_cmd_t *zc)
vdev_clear(spa, vd); vdev_clear(spa, vd);
(void) spa_vdev_state_exit(spa, NULL, 0); (void) spa_vdev_state_exit(spa, spa->spa_root_vdev, 0);
/* /*
* Resume any suspended I/Os. * Resume any suspended I/Os.

View File

@ -287,7 +287,7 @@ pre =
post = post =
[tests/functional/cli_root/zpool_offline] [tests/functional/cli_root/zpool_offline]
tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg'] tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg', 'zpool_offline_003_pos']
[tests/functional/cli_root/zpool_online] [tests/functional/cli_root/zpool_online]
tests = ['zpool_online_001_pos', 'zpool_online_002_neg'] tests = ['zpool_online_001_pos', 'zpool_online_002_neg']

View File

@ -3,4 +3,5 @@ dist_pkgdata_SCRIPTS = \
setup.ksh \ setup.ksh \
cleanup.ksh \ cleanup.ksh \
zpool_offline_001_pos.ksh \ zpool_offline_001_pos.ksh \
zpool_offline_002_neg.ksh zpool_offline_002_neg.ksh \
zpool_offline_003_pos.ksh

View File

@ -39,7 +39,6 @@
# 1. Create an array of correctly formed 'zpool offline' options # 1. Create an array of correctly formed 'zpool offline' options
# 2. Execute each element of the array. # 2. Execute each element of the array.
# 3. Verify use of each option is successful. # 3. Verify use of each option is successful.
#
verify_runnable "global" verify_runnable "global"
@ -122,4 +121,4 @@ for disk in $DISKLIST; do
fi fi
done done
log_pass "'zpool offline' with correct options succeeded" log_pass "'zpool offline -f' succeeded"

View File

@ -0,0 +1,111 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
# Copyright (c) 2016 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
#
# DESCRIPTION:
# Test force faulting a VDEV with 'zpool offline -f'
#
# STRATEGY:
# For both temporary and persistent faults, do the following:
# 1. Force fault a vdev, and clear the fault.
# 2. Offline a vdev, force fault it, clear the fault, and online it.
# 3. Force fault a vdev, export it, then import it.
verify_runnable "global"
DISKLIST=$(get_disklist $TESTPOOL)
set -A disks $DISKLIST
typeset -i num=${#disks[*]}
set -A args "" "-t"
function cleanup
{
# Ensure we don't leave disks in the offline state
for disk in $DISKLIST; do
log_must zpool online $TESTPOOL $disk
check_state $TESTPOOL $disk "online"
if [[ $? != 0 ]]; then
log_fail "Unable to online $disk"
fi
done
}
log_assert "Executing 'zpool offline -f' with correct options succeeds"
log_onexit cleanup
if [[ -z $DISKLIST ]]; then
log_fail "DISKLIST is empty."
fi
typeset -i i=0
typeset -i j=1
# Get name of the first disk in the pool
disk=${DISKLIST%% *}
# Test temporary and persistent faults
for arg in f tf ; do
# Force fault disk, and clear the fault
log_must zpool offline -$arg $TESTPOOL $disk
check_state $TESTPOOL $disk "faulted"
log_must zpool clear $TESTPOOL $disk
check_state $TESTPOOL $disk "online"
# Offline a disk, force fault it, clear the fault, and online it
log_must zpool offline $TESTPOOL $disk
check_state $TESTPOOL $disk "offline"
log_must zpool offline -$arg $TESTPOOL $disk
check_state $TESTPOOL $disk "faulted"
log_must zpool clear $TESTPOOL $disk
check_state $TESTPOOL $disk "offline"
log_must zpool online $TESTPOOL $disk
check_state $TESTPOOL $disk "online"
# Test faults across imports
log_must zpool offline -tf $TESTPOOL $disk
check_state $TESTPOOL $disk "faulted"
log_must zpool export $TESTPOOL
log_must zpool import $TESTPOOL
log_note "-$arg now imported"
if [[ "$arg" = "f" ]] ; then
# Persistent fault
check_state $TESTPOOL $disk "faulted"
log_must zpool clear $TESTPOOL $disk
else
# Temporary faults get cleared by imports
check_state $TESTPOOL $disk "online"
fi
done
log_pass "'zpool offline -f' with correct options succeeded"