From 4a283c7f77eb5065e9f03b122bf8ead4f4a1e2be Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Fri, 19 May 2017 12:30:16 -0700 Subject: [PATCH] Force fault a vdev with 'zpool offline -f' This patch adds a '-f' option to 'zpool offline' to fault a vdev instead of bringing it offline. Unlike the OFFLINE state, the FAULTED state will trigger the FMA code, allowing for things like autoreplace and triggering the slot fault LED. The -f faults persist across imports, unless they were set with the temporary (-t) flag. Both persistent and temporary faults can be cleared with zpool clear. Reviewed-by: Brian Behlendorf Signed-off-by: Tony Hutter Closes #6094 --- cmd/zpool/zpool_main.c | 31 +++-- include/libzfs.h | 1 + include/sys/fs/zfs.h | 5 +- lib/libzfs/libzfs_pool.c | 37 ++++++ man/man8/zpool.8 | 14 ++- module/zfs/spa_misc.c | 14 ++- module/zfs/vdev.c | 58 ++++++++- module/zfs/vdev_label.c | 10 +- module/zfs/zfs_ioctl.c | 6 +- tests/runfiles/linux.run | 2 +- .../cli_root/zpool_offline/Makefile.am | 3 +- .../zpool_offline/zpool_offline_001_pos.ksh | 3 +- .../zpool_offline/zpool_offline_003_pos.ksh | 111 ++++++++++++++++++ 13 files changed, 266 insertions(+), 29 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_offline/zpool_offline_003_pos.ksh diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 6a54c92c30..0d06dcc05d 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -326,7 +326,7 @@ get_usage(zpool_help_t idx) return (gettext("\tlist [-gHLpPv] [-o property[,...]] " "[-T d|u] [pool] ... [interval [count]]\n")); case HELP_OFFLINE: - return (gettext("\toffline [-t] ...\n")); + return (gettext("\toffline [-f] [-t] ...\n")); case HELP_ONLINE: return (gettext("\tonline ...\n")); case HELP_REPLACE: @@ -5437,11 +5437,9 @@ zpool_do_online(int argc, char **argv) /* * zpool offline [-ft] ... * - * -f Force the device into the offline state, even if doing - * so would appear to compromise pool availability. - * (not supported yet) + * -f Force the device into a faulted state. * - * -t Only take the device off-line temporarily. The offline + * -t Only take the device off-line temporarily. The offline/faulted * state will not be persistent across reboots. */ /* ARGSUSED */ @@ -5453,14 +5451,17 @@ zpool_do_offline(int argc, char **argv) zpool_handle_t *zhp; int ret = 0; boolean_t istmp = B_FALSE; + boolean_t fault = B_FALSE; /* check options */ while ((c = getopt(argc, argv, "ft")) != -1) { switch (c) { + case 'f': + fault = B_TRUE; + break; case 't': istmp = B_TRUE; break; - case 'f': case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -5487,8 +5488,22 @@ zpool_do_offline(int argc, char **argv) return (1); for (i = 1; i < argc; i++) { - if (zpool_vdev_offline(zhp, argv[i], istmp) != 0) - ret = 1; + if (fault) { + uint64_t guid = zpool_vdev_path_to_guid(zhp, argv[i]); + vdev_aux_t aux; + if (istmp == B_FALSE) { + /* Force the fault to persist across imports */ + aux = VDEV_AUX_EXTERNAL_PERSIST; + } else { + aux = VDEV_AUX_EXTERNAL; + } + + if (guid == 0 || zpool_vdev_fault(zhp, guid, aux) != 0) + ret = 1; + } else { + if (zpool_vdev_offline(zhp, argv[i], istmp) != 0) + ret = 1; + } } zpool_close(zhp); diff --git a/include/libzfs.h b/include/libzfs.h index 1240c23d2f..08c7813ede 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -284,6 +284,7 @@ extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); extern int zpool_label_disk_wait(char *, int); extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *); +extern uint64_t zpool_vdev_path_to_guid(zpool_handle_t *zhp, const char *path); int zfs_dev_is_dm(char *dev_name); int zfs_dev_is_whole_disk(char *dev_name); diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 5b7bbb6898..d59489de67 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -731,9 +731,10 @@ typedef enum vdev_aux { VDEV_AUX_ERR_EXCEEDED, /* too many errors */ VDEV_AUX_IO_FAILURE, /* experienced I/O failure */ VDEV_AUX_BAD_LOG, /* cannot read log chain(s) */ - VDEV_AUX_EXTERNAL, /* external diagnosis */ + VDEV_AUX_EXTERNAL, /* external diagnosis or forced fault */ VDEV_AUX_SPLIT_POOL, /* vdev was split off into another pool */ - VDEV_AUX_BAD_ASHIFT /* vdev ashift is invalid */ + VDEV_AUX_BAD_ASHIFT, /* vdev ashift is invalid */ + VDEV_AUX_EXTERNAL_PERSIST /* persistent forced fault */ } vdev_aux_t; /* diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index e64d0365a2..28ccf8f4d6 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -2370,6 +2370,43 @@ zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg) return (0); } +/* + * Convert a vdev path to a GUID. Returns GUID or 0 on error. + * + * If is_spare, is_l2cache, or is_log is non-NULL, then store within it + * if the VDEV is a spare, l2cache, or log device. If they're NULL then + * ignore them. + */ +static uint64_t +zpool_vdev_path_to_guid_impl(zpool_handle_t *zhp, const char *path, + boolean_t *is_spare, boolean_t *is_l2cache, boolean_t *is_log) +{ + uint64_t guid; + boolean_t spare = B_FALSE, l2cache = B_FALSE, log = B_FALSE; + nvlist_t *tgt; + + if ((tgt = zpool_find_vdev(zhp, path, &spare, &l2cache, + &log)) == NULL) + return (0); + + verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &guid) == 0); + if (is_spare != NULL) + *is_spare = spare; + if (is_l2cache != NULL) + *is_l2cache = l2cache; + if (is_log != NULL) + *is_log = log; + + return (guid); +} + +/* Convert a vdev path to a GUID. Returns GUID or 0 on error. */ +uint64_t +zpool_vdev_path_to_guid(zpool_handle_t *zhp, const char *path) +{ + return (zpool_vdev_path_to_guid_impl(zhp, path, NULL, NULL, NULL)); +} + /* * Bring the specified vdev online. The 'flags' parameter is a set of the * ZFS_ONLINE_* flags. diff --git a/man/man8/zpool.8 b/man/man8/zpool.8 index 89ef1bc7be..8b793623e1 100644 --- a/man/man8/zpool.8 +++ b/man/man8/zpool.8 @@ -114,7 +114,7 @@ zpool \- configures ZFS storage pools .LP .nf -\fBzpool offline\fR [\fB-t\fR] \fIpool\fR \fIdevice\fR ... +\fBzpool offline\fR [\fB-f\fR] [\fB-t\fR] \fIpool\fR \fIdevice\fR ... .fi .LP @@ -1908,13 +1908,21 @@ Verbose statistics. Reports usage statistics for individual \fIvdevs\fR within t .sp .ne 2 .na -\fB\fBzpool offline\fR [\fB-t\fR] \fIpool\fR \fIdevice\fR ...\fR +\fB\fBzpool offline\fR [\fB-f\fR] [\fB-t\fR] \fIpool\fR \fIdevice\fR ...\fR .ad .sp .6 .RS 4n Takes the specified physical device offline. While the \fIdevice\fR is offline, no attempt is made to read or write to the device. .sp -This command is not applicable to spares or cache devices. +.ne 2 +.na +\fB\fB-f\fR\fR +.ad +.RS 6n +Force fault. Instead of offlining the disk, put it into a faulted state. The +fault will persist across imports unless the \fB-t\fR flag was specified. +.RE + .sp .ne 2 .na diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 831f83b339..fb425e121c 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -1181,13 +1181,21 @@ int spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error) { boolean_t config_changed = B_FALSE; + vdev_t *vdev_top; + + if (vd == NULL || vd == spa->spa_root_vdev) { + vdev_top = spa->spa_root_vdev; + } else { + vdev_top = vd->vdev_top; + } if (vd != NULL || error == 0) - vdev_dtl_reassess(vd ? vd->vdev_top : spa->spa_root_vdev, - 0, 0, B_FALSE); + vdev_dtl_reassess(vdev_top, 0, 0, B_FALSE); if (vd != NULL) { - vdev_state_dirty(vd->vdev_top); + if (vd != spa->spa_root_vdev) + vdev_state_dirty(vdev_top); + config_changed = B_TRUE; spa->spa_config_generation++; } diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index f44d338ef8..1bca227dbb 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -394,6 +394,8 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, char *type; uint64_t guid = 0, islog, nparity; vdev_t *vd; + char *tmp = NULL; + int rc; ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); @@ -487,6 +489,19 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0) vd->vdev_path = spa_strdup(vd->vdev_path); + + /* + * ZPOOL_CONFIG_AUX_STATE = "external" means we previously forced a + * fault on a vdev and want it to persist across imports (like with + * zpool offline -f). + */ + rc = nvlist_lookup_string(nv, ZPOOL_CONFIG_AUX_STATE, &tmp); + if (rc == 0 && tmp != NULL && strcmp(tmp, "external") == 0) { + vd->vdev_stat.vs_aux = VDEV_AUX_EXTERNAL; + vd->vdev_faulted = 1; + vd->vdev_label_aux = VDEV_AUX_EXTERNAL; + } + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &vd->vdev_devid) == 0) vd->vdev_devid = spa_strdup(vd->vdev_devid); if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PHYS_PATH, @@ -591,12 +606,17 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, &vd->vdev_resilver_txg); /* - * When importing a pool, we want to ignore the persistent fault - * state, as the diagnosis made on another system may not be - * valid in the current context. Local vdevs will - * remain in the faulted state. + * In general, when importing a pool we want to ignore the + * persistent fault state, as the diagnosis made on another + * system may not be valid in the current context. The only + * exception is if we forced a vdev to a persistently faulted + * state with 'zpool offline -f'. The persistent fault will + * remain across imports until cleared. + * + * Local vdevs will remain in the faulted state. */ - if (spa_load_state(spa) == SPA_LOAD_OPEN) { + if (spa_load_state(spa) == SPA_LOAD_OPEN || + spa_load_state(spa) == SPA_LOAD_IMPORT) { (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &vd->vdev_faulted); (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DEGRADED, @@ -2478,6 +2498,32 @@ vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux) tvd = vd->vdev_top; + /* + * If user did a 'zpool offline -f' then make the fault persist across + * reboots. + */ + if (aux == VDEV_AUX_EXTERNAL_PERSIST) { + /* + * There are two kinds of forced faults: temporary and + * persistent. Temporary faults go away at pool import, while + * persistent faults stay set. Both types of faults can be + * cleared with a zpool clear. + * + * We tell if a vdev is persistently faulted by looking at the + * ZPOOL_CONFIG_AUX_STATE nvpair. If it's set to "external" at + * import then it's a persistent fault. Otherwise, it's + * temporary. We get ZPOOL_CONFIG_AUX_STATE set to "external" + * by setting vd.vdev_stat.vs_aux to VDEV_AUX_EXTERNAL. This + * tells vdev_config_generate() (which gets run later) to set + * ZPOOL_CONFIG_AUX_STATE to "external" in the nvlist. + */ + vd->vdev_stat.vs_aux = VDEV_AUX_EXTERNAL; + vd->vdev_tmpoffline = B_FALSE; + aux = VDEV_AUX_EXTERNAL; + } else { + vd->vdev_tmpoffline = B_TRUE; + } + /* * We don't directly use the aux state here, but if we do a * vdev_reopen(), we need this value to be present to remember why we @@ -2753,7 +2799,6 @@ vdev_clear(spa_t *spa, vdev_t *vd) */ if (vd->vdev_faulted || vd->vdev_degraded || !vdev_readable(vd) || !vdev_writeable(vd)) { - /* * When reopening in response to a clear event, it may be due to * a fmadm repair request. In this case, if the device is @@ -2764,6 +2809,7 @@ vdev_clear(spa_t *spa, vdev_t *vd) vd->vdev_faulted = vd->vdev_degraded = 0ULL; vd->vdev_cant_read = B_FALSE; vd->vdev_cant_write = B_FALSE; + vd->vdev_stat.vs_aux = 0; vdev_reopen(vd == rvd ? rvd : vd->vdev_top); diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index 20c0ac86a0..021f4774b5 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -519,6 +519,7 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, if (vd->vdev_ishole) fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_HOLE, B_TRUE); + /* Set the reason why we're FAULTED/DEGRADED. */ switch (vd->vdev_stat.vs_aux) { case VDEV_AUX_ERR_EXCEEDED: aux = "err_exceeded"; @@ -529,8 +530,15 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, break; } - if (aux != NULL) + if (aux != NULL && !vd->vdev_tmpoffline) { fnvlist_add_string(nv, ZPOOL_CONFIG_AUX_STATE, aux); + } else { + /* + * We're healthy - clear any previous AUX_STATE values. + */ + if (nvlist_exists(nv, ZPOOL_CONFIG_AUX_STATE)) + nvlist_remove_all(nv, ZPOOL_CONFIG_AUX_STATE); + } if (vd->vdev_splitting && vd->vdev_orig_guid != 0LL) { fnvlist_add_uint64(nv, ZPOOL_CONFIG_ORIG_GUID, diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index f94bf3b1ca..268e797146 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -156,6 +156,7 @@ #include #include #include +#include #include #include #include @@ -1896,7 +1897,8 @@ zfs_ioc_vdev_set_state(zfs_cmd_t *zc) case VDEV_STATE_FAULTED: if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED && - zc->zc_obj != VDEV_AUX_EXTERNAL) + zc->zc_obj != VDEV_AUX_EXTERNAL && + zc->zc_obj != VDEV_AUX_EXTERNAL_PERSIST) zc->zc_obj = VDEV_AUX_ERR_EXCEEDED; error = vdev_fault(spa, zc->zc_guid, zc->zc_obj); @@ -4919,7 +4921,7 @@ zfs_ioc_clear(zfs_cmd_t *zc) vdev_clear(spa, vd); - (void) spa_vdev_state_exit(spa, NULL, 0); + (void) spa_vdev_state_exit(spa, spa->spa_root_vdev, 0); /* * Resume any suspended I/Os. diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 232fd234fd..0e46220234 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -287,7 +287,7 @@ pre = post = [tests/functional/cli_root/zpool_offline] -tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg'] +tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg', 'zpool_offline_003_pos'] [tests/functional/cli_root/zpool_online] tests = ['zpool_online_001_pos', 'zpool_online_002_neg'] diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_offline/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_offline/Makefile.am index 6bb5fbf6aa..33fbb18d66 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_offline/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_offline/Makefile.am @@ -3,4 +3,5 @@ dist_pkgdata_SCRIPTS = \ setup.ksh \ cleanup.ksh \ zpool_offline_001_pos.ksh \ - zpool_offline_002_neg.ksh + zpool_offline_002_neg.ksh \ + zpool_offline_003_pos.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_offline/zpool_offline_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_offline/zpool_offline_001_pos.ksh index c0ccf0bb86..6f4c2e3182 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_offline/zpool_offline_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_offline/zpool_offline_001_pos.ksh @@ -39,7 +39,6 @@ # 1. Create an array of correctly formed 'zpool offline' options # 2. Execute each element of the array. # 3. Verify use of each option is successful. -# verify_runnable "global" @@ -122,4 +121,4 @@ for disk in $DISKLIST; do fi done -log_pass "'zpool offline' with correct options succeeded" +log_pass "'zpool offline -f' succeeded" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_offline/zpool_offline_003_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_offline/zpool_offline_003_pos.ksh new file mode 100755 index 0000000000..7b5d21cba2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_offline/zpool_offline_003_pos.ksh @@ -0,0 +1,111 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2016 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Test force faulting a VDEV with 'zpool offline -f' +# +# STRATEGY: +# For both temporary and persistent faults, do the following: +# 1. Force fault a vdev, and clear the fault. +# 2. Offline a vdev, force fault it, clear the fault, and online it. +# 3. Force fault a vdev, export it, then import it. + +verify_runnable "global" + +DISKLIST=$(get_disklist $TESTPOOL) +set -A disks $DISKLIST +typeset -i num=${#disks[*]} + +set -A args "" "-t" + +function cleanup +{ + # Ensure we don't leave disks in the offline state + for disk in $DISKLIST; do + log_must zpool online $TESTPOOL $disk + check_state $TESTPOOL $disk "online" + if [[ $? != 0 ]]; then + log_fail "Unable to online $disk" + fi + done +} + +log_assert "Executing 'zpool offline -f' with correct options succeeds" + +log_onexit cleanup + +if [[ -z $DISKLIST ]]; then + log_fail "DISKLIST is empty." +fi + +typeset -i i=0 +typeset -i j=1 + +# Get name of the first disk in the pool +disk=${DISKLIST%% *} + +# Test temporary and persistent faults +for arg in f tf ; do + # Force fault disk, and clear the fault + log_must zpool offline -$arg $TESTPOOL $disk + check_state $TESTPOOL $disk "faulted" + log_must zpool clear $TESTPOOL $disk + check_state $TESTPOOL $disk "online" + + # Offline a disk, force fault it, clear the fault, and online it + log_must zpool offline $TESTPOOL $disk + check_state $TESTPOOL $disk "offline" + log_must zpool offline -$arg $TESTPOOL $disk + check_state $TESTPOOL $disk "faulted" + log_must zpool clear $TESTPOOL $disk + check_state $TESTPOOL $disk "offline" + log_must zpool online $TESTPOOL $disk + check_state $TESTPOOL $disk "online" + + # Test faults across imports + log_must zpool offline -tf $TESTPOOL $disk + check_state $TESTPOOL $disk "faulted" + log_must zpool export $TESTPOOL + log_must zpool import $TESTPOOL + log_note "-$arg now imported" + if [[ "$arg" = "f" ]] ; then + # Persistent fault + check_state $TESTPOOL $disk "faulted" + log_must zpool clear $TESTPOOL $disk + else + # Temporary faults get cleared by imports + check_state $TESTPOOL $disk "online" + fi +done +log_pass "'zpool offline -f' with correct options succeeded"