From f5ead99f347695b53eac423e4fd3fe3779ea4876 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Sun, 2 Apr 2023 12:02:51 +1000 Subject: [PATCH] zpool create: warn on suboptimal pool layout Its possible to create pools that are perfectly valid but are perhaps not the "best" choice for a given set of devices. An example is a raidz1 of two devices. I have seen inexperienced users create this because it looks on the surface like a traditional RAID-1, that is, a mirror. It even appears to work, but presents problems later when they want to upgrade the drives, and of course does not perform as well as a mirror. This changes `zpool create` to reject such "suboptimal" pool layouts, and suggest a possible better alternative. It checks for raidz and draid where the number of devices are parity+1, and could be extended in the future. It adds a switch, --force=layout, to disable the check and the warning and return the old behaviour, for those who know what they're doing. Included is a utility function to work with option flags. The existing -f switch to `zpool create` is now an alias for `--force=vdevs`. Signed-off-by: Rob Norris --- cmd/zpool/zpool_main.c | 83 +++++++++++++++- cmd/zpool/zpool_util.c | 66 +++++++++++++ cmd/zpool/zpool_util.h | 6 ++ man/man8/zpool-create.8 | 24 +++-- tests/runfiles/common.run | 3 +- tests/runfiles/sanity.run | 2 +- tests/zfs-tests/tests/Makefile.am | 1 + .../zpool_create_force_layout.ksh | 94 +++++++++++++++++++ 8 files changed, 265 insertions(+), 14 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_force_layout.ksh diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 10a3b5b14f..6d12547d6b 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -801,6 +801,50 @@ print_spare_list(nvlist_t *nv, int indent) } } +static boolean_t +check_layout(nvlist_t *nv) +{ + boolean_t success = B_TRUE; + + nvlist_t **child; + uint_t c, children = 0; + + if (!nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) == 0) + return (B_TRUE); + + for (c = 0; c < children; c++) { + const char *type = NULL; + (void) nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type); + if (type == NULL) + continue; + + if (strcmp(type, VDEV_TYPE_RAIDZ) != 0 && + strcmp(type, VDEV_TYPE_DRAID) != 0) + continue; + + uint64_t nparity = 0; + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_NPARITY, + &nparity); + + nvlist_t **dev; + uint_t ndevs = 0; + (void) nvlist_lookup_nvlist_array(child[c], + ZPOOL_CONFIG_CHILDREN, &dev, &ndevs); + + if (ndevs == nparity + 1) { + (void) fprintf(stderr, gettext("suboptimal vdev " + "specification: a '%s%lu' vdev with %u devices " + "is inefficient; consider 'mirror' instead.\n"), + type, nparity, ndevs); + success = B_FALSE; + continue; + } + } + + return (success); +} + static boolean_t prop_list_contains_feature(nvlist_t *proplist) { @@ -1486,10 +1530,12 @@ errout: int zpool_do_create(int argc, char **argv) { - boolean_t force = B_FALSE; boolean_t dryrun = B_FALSE; boolean_t enable_pool_features = B_TRUE; + int force_vdevs = 0; + int force_layout = 0; + int c; nvlist_t *nvroot = NULL; char *poolname; @@ -1502,11 +1548,32 @@ zpool_do_create(int argc, char **argv) nvlist_t *props = NULL; char *propval; + struct option long_options[] = { + {"force", required_argument, NULL, 'f'}, + {0, 0, 0, 0} + }; + + struct zpool_option_flag force_flags[] = { + {"vdevs", &force_vdevs}, + {"layout", &force_layout}, + {0, 0}, + }; + /* check options */ - while ((c = getopt(argc, argv, ":fndR:m:o:O:t:")) != -1) { + while ((c = getopt_long(argc, argv, ":fndR:m:o:O:t:", long_options, + NULL)) != -1) { switch (c) { case 'f': - force = B_TRUE; + if (optarg) { + char *unk = zpool_option_flag_apply( + optarg, force_flags); + if (unk) { + fprintf(stderr, gettext("unknown force " + "flag: %s\n"), unk); + goto errout; + } + } else + force_vdevs = 1; break; case 'n': dryrun = B_TRUE; @@ -1641,11 +1708,17 @@ zpool_do_create(int argc, char **argv) } /* pass off to make_root_vdev for bulk processing */ - nvroot = make_root_vdev(NULL, props, force, !force, B_FALSE, dryrun, - argc - 1, argv + 1); + nvroot = make_root_vdev(NULL, props, force_vdevs, !force_vdevs, + B_FALSE, dryrun, argc - 1, argv + 1); if (nvroot == NULL) goto errout; + if (!force_layout && !check_layout(nvroot)) { + (void) fprintf(stderr, "use '--force=layout' to ignore " + "this.\n"); + goto errout; + } + /* make_root_vdev() allows 0 toplevel children if there are spares */ if (!zfs_allocatable_devs(nvroot)) { (void) fprintf(stderr, gettext("invalid vdev " diff --git a/cmd/zpool/zpool_util.c b/cmd/zpool/zpool_util.c index e7ff739e5b..3e2018d862 100644 --- a/cmd/zpool/zpool_util.c +++ b/cmd/zpool/zpool_util.c @@ -139,3 +139,69 @@ lowbit64(uint64_t i) return (__builtin_ffsll(i)); } + +/* + * Given a string of comma-separated flag names, set or clear the corresponding + * flag variables, as defined in flagspec. If an unknown flag name is offered, + * returns it. On succes, returns NULL. + * + * This is most useful when combined with getopt, to allow related options to + * be set at once. + * + * Example: + * + * int main(int arvc, char **argv) { + * struct option long_options[] = { + * {"frobnicate", required_argument, NULL, 1}, + * {0, 0, 0, 0}, + * }; + * struct zpool_option_flag frobnicate_flags[] = { + * {"gizmo", &frobnicate_gizmo}, + * {"widget", &frobnicate_widget}, + * {"thingy", &frobnicate_thingy}, + * }; + * int do_gizmo = 0, do_widget = 1, do_thingy = 0; + * + * while ((c = getopt_long(argc, argv, "", long_options, NULL)) != -1) { + * switch (c) { + * case 1: + * char *unknown = zpool_option_flag_apply(optarg, frobnicate_flags); + * if (unknown) + * printf("unknown flag to --frobnicate: %s\n", unknown); + * } + * + * printf("gizmo %d widget %d thingy %d\n", do_gizmo, do_widget, do_thingy); + * return (0); + * } + * + * $ myprogram --frobnicate='!widget,thingy' + * gizmo 0 widget 0 thingy 1 + * + * + */ +char * +zpool_option_flag_apply(char *argstr, struct zpool_option_flag *flagspec) +{ + char *name, *tmp = NULL; + while ((name = strtok_r(argstr, ",", &tmp)) != NULL) { + argstr = NULL; + int newval = 1; + if (*name == '!') { + newval = 0; + name++; + } + int found = 0; + for (struct zpool_option_flag *f = flagspec; f->name; f++) { + if (strcmp(name, f->name) == 0) { + if (f->flag) + *f->flag = newval; + found = 1; + break; + } + } + if (!found) + return (name); + } + + return (NULL); +} diff --git a/cmd/zpool/zpool_util.h b/cmd/zpool/zpool_util.h index b35dea0cd4..56d6350640 100644 --- a/cmd/zpool/zpool_util.h +++ b/cmd/zpool/zpool_util.h @@ -52,6 +52,12 @@ int lowbit64(uint64_t i); */ char *zpool_get_cmd_search_path(void); +struct zpool_option_flag { + const char *name; + int *flag; +}; +char *zpool_option_flag_apply(char *, struct zpool_option_flag *); + /* * Virtual device functions */ diff --git a/man/man8/zpool-create.8 b/man/man8/zpool-create.8 index 8449520944..4aa273551a 100644 --- a/man/man8/zpool-create.8 +++ b/man/man8/zpool-create.8 @@ -27,7 +27,7 @@ .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" Copyright (c) 2021, Colm Buckley .\" -.Dd March 16, 2022 +.Dd July 15, 2023 .Dt ZPOOL-CREATE 8 .Os . @@ -37,7 +37,7 @@ .Sh SYNOPSIS .Nm zpool .Cm create -.Op Fl dfn +.Op Fl dn .Op Fl m Ar mountpoint .Oo Fl o Ar property Ns = Ns Ar value Oc Ns … .Oo Fl o Sy feature@ Ns Ar feature Ns = Ns Ar value Oc @@ -45,6 +45,7 @@ .Oo Fl O Ar file-system-property Ns = Ns Ar value Oc Ns … .Op Fl R Ar root .Op Fl t Ar tname +.Op Fl -force= Ns Ar flag Ns Oo , Ns Ar flag Oc Ns … .Ar pool .Ar vdev Ns … . @@ -142,11 +143,6 @@ with See .Xr zpool-features 7 for details about feature properties. -.It Fl f -Forces use of -.Ar vdev Ns s , -even if they appear in use or specify a conflicting replication level. -Not all devices can be overridden in this manner. .It Fl m Ar mountpoint Sets the mount point for the root dataset. The default mount point is @@ -204,6 +200,20 @@ This is intended to handle name space collisions when creating pools for other systems, such as virtual machines or physical machines whose pools live on network block devices. +.It Fl -force= Ns Ar flag Ns Oo , Ns Ar flag Oc Ns … +.Bl -tag -width Ds +.It Sy vdevs +Forces use of +.Ar vdev Ns s , +even if they appear in use or specify a conflicting replication level. +Not all devices can be overridden in this manner. +.It Sy layout +Allows certain kinds of suboptimal pool layouts to be created. +.El +.It Fl f +Alias for +.Fl -force=vdevs +. .El . .Sh EXAMPLES diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 342f56d50d..ff111c1895 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -368,7 +368,8 @@ tests = ['zpool_create_001_pos', 'zpool_create_002_pos', 'zpool_create_features_005_pos', 'zpool_create_features_006_pos', 'zpool_create_features_007_pos', 'zpool_create_features_008_pos', 'zpool_create_features_009_pos', 'create-o_ashift', - 'zpool_create_tempname', 'zpool_create_dryrun_output'] + 'zpool_create_tempname', 'zpool_create_dryrun_output', + 'zpool_create_force_layout'] tags = ['functional', 'cli_root', 'zpool_create'] [tests/functional/cli_root/zpool_destroy] diff --git a/tests/runfiles/sanity.run b/tests/runfiles/sanity.run index 449bf1c0f5..f4829f123d 100644 --- a/tests/runfiles/sanity.run +++ b/tests/runfiles/sanity.run @@ -267,7 +267,7 @@ tests = ['zpool_create_001_pos', 'zpool_create_002_pos', 'zpool_create_encrypted', 'zpool_create_features_001_pos', 'zpool_create_features_002_pos', 'zpool_create_features_003_pos', 'zpool_create_features_004_neg', - 'zpool_create_features_005_pos'] + 'zpool_create_features_005_pos', 'zpool_create_force_layout'] tags = ['functional', 'cli_root', 'zpool_create'] [tests/functional/cli_root/zpool_destroy] diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index ff65dc1ac2..477cdd14ce 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1009,6 +1009,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_create/zpool_create_features_007_pos.ksh \ functional/cli_root/zpool_create/zpool_create_features_008_pos.ksh \ functional/cli_root/zpool_create/zpool_create_features_009_pos.ksh \ + functional/cli_root/zpool_create/zpool_create_force_layout.ksh \ functional/cli_root/zpool_create/zpool_create_tempname.ksh \ functional/cli_root/zpool_destroy/zpool_destroy_001_pos.ksh \ functional/cli_root/zpool_destroy/zpool_destroy_002_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_force_layout.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_force_layout.ksh new file mode 100755 index 0000000000..3fa4da90ec --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_force_layout.ksh @@ -0,0 +1,94 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023, Rob Norris +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_create/zpool_create.shlib + +# +# DESCRIPTION: +# 'zpool create ...' should reject suboptimal pool layouts by +# default, but allow them if the --force=layout switch is offered. +# +# STRATEGY: +# 1. Try to create pools with suboptimal layouts, verify fail +# 2. Try again to create them with --force=layout, verify success +# + +verify_runnable "global" + +TMPDIR=${TMPDIR:-$TEST_BASE_DIR} + +DISK1="$TMPDIR/dsk1" +DISK2="$TMPDIR/dsk2" +DISK3="$TMPDIR/dsk3" +DISK4="$TMPDIR/dsk4" +DISKS="$DISK1 $DISK2 $DISK3 $DISK4" + +log_must mkfile $(($MINVDEVSIZE * 2)) $DISK1 +log_must mkfile $(($MINVDEVSIZE * 2)) $DISK2 +log_must mkfile $(($MINVDEVSIZE * 2)) $DISK3 +log_must mkfile $(($MINVDEVSIZE * 2)) $DISK4 + +function cleanup +{ + default_cleanup_noexit + log_must rm -f $DISKS +} + +log_assert "'zpool create ...' rejects suboptimal layouts," \ + " unless the --force=layout switch is provided." + +log_onexit cleanup + +function check_suboptimal +{ + typeset spec=$1 + log_mustnot zpool create $TESTPOOL $spec + log_mustnot poolexists $TESTPOOL + log_must zpool create --force=layout $TESTPOOL $spec + log_must poolexists $TESTPOOL + log_must destroy_pool $TESTPOOL +} + +log_note "raidz1 with two drives is suboptimal" +log_must check_suboptimal "raidz1 $DISK1 $DISK2" + +log_note "raidz2 with three drives is suboptimal" +log_must check_suboptimal "raidz2 $DISK1 $DISK2 $DISK3" + +log_note "raidz3 with four drives is suboptimal" +log_must check_suboptimal "raidz3 $DISK1 $DISK2 $DISK3 $DISK4" + +log_note "draid1 with two drives is suboptimal" +log_must check_suboptimal "draid1 $DISK1 $DISK2" + +log_note "draid2 with three drives is suboptimal" +log_must check_suboptimal "draid2 $DISK1 $DISK2 $DISK3" + +log_note "draid3 with four drives is suboptimal" +log_must check_suboptimal "draid3 $DISK1 $DISK2 $DISK3 $DISK4" + +log_pass "'zpool create --force=layout ...' success."