zpool create: warn on suboptimal pool layout

Its possible to create pools that are perfectly valid but are perhaps
not the "best" choice for a given set of devices.

An example is a raidz1 of two devices. I have seen inexperienced users
create this because it looks on the surface like a traditional RAID-1,
that is, a mirror. It even appears to work, but presents problems later
when they want to upgrade the drives, and of course does not perform as
well as a mirror.

This changes `zpool create` to reject such "suboptimal" pool layouts,
and suggest a possible better alternative. It checks for raidz and draid
where the number of devices are parity+1, and could be extended in the
future.

It adds a switch, --force=layout, to disable the check and the warning
and return the old behaviour, for those who know what they're doing.

Included is a utility function to work with option flags. The existing
-f switch to `zpool create` is now an alias for `--force=vdevs`.

Signed-off-by: Rob Norris <robn@despairlabs.com>
This commit is contained in:
Rob Norris 2023-04-02 12:02:51 +10:00
parent d3d63cac4d
commit f5ead99f34
8 changed files with 265 additions and 14 deletions

View File

@ -801,6 +801,50 @@ print_spare_list(nvlist_t *nv, int indent)
}
}
static boolean_t
check_layout(nvlist_t *nv)
{
boolean_t success = B_TRUE;
nvlist_t **child;
uint_t c, children = 0;
if (!nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) == 0)
return (B_TRUE);
for (c = 0; c < children; c++) {
const char *type = NULL;
(void) nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type);
if (type == NULL)
continue;
if (strcmp(type, VDEV_TYPE_RAIDZ) != 0 &&
strcmp(type, VDEV_TYPE_DRAID) != 0)
continue;
uint64_t nparity = 0;
(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_NPARITY,
&nparity);
nvlist_t **dev;
uint_t ndevs = 0;
(void) nvlist_lookup_nvlist_array(child[c],
ZPOOL_CONFIG_CHILDREN, &dev, &ndevs);
if (ndevs == nparity + 1) {
(void) fprintf(stderr, gettext("suboptimal vdev "
"specification: a '%s%lu' vdev with %u devices "
"is inefficient; consider 'mirror' instead.\n"),
type, nparity, ndevs);
success = B_FALSE;
continue;
}
}
return (success);
}
static boolean_t
prop_list_contains_feature(nvlist_t *proplist)
{
@ -1486,10 +1530,12 @@ errout:
int
zpool_do_create(int argc, char **argv)
{
boolean_t force = B_FALSE;
boolean_t dryrun = B_FALSE;
boolean_t enable_pool_features = B_TRUE;
int force_vdevs = 0;
int force_layout = 0;
int c;
nvlist_t *nvroot = NULL;
char *poolname;
@ -1502,11 +1548,32 @@ zpool_do_create(int argc, char **argv)
nvlist_t *props = NULL;
char *propval;
struct option long_options[] = {
{"force", required_argument, NULL, 'f'},
{0, 0, 0, 0}
};
struct zpool_option_flag force_flags[] = {
{"vdevs", &force_vdevs},
{"layout", &force_layout},
{0, 0},
};
/* check options */
while ((c = getopt(argc, argv, ":fndR:m:o:O:t:")) != -1) {
while ((c = getopt_long(argc, argv, ":fndR:m:o:O:t:", long_options,
NULL)) != -1) {
switch (c) {
case 'f':
force = B_TRUE;
if (optarg) {
char *unk = zpool_option_flag_apply(
optarg, force_flags);
if (unk) {
fprintf(stderr, gettext("unknown force "
"flag: %s\n"), unk);
goto errout;
}
} else
force_vdevs = 1;
break;
case 'n':
dryrun = B_TRUE;
@ -1641,11 +1708,17 @@ zpool_do_create(int argc, char **argv)
}
/* pass off to make_root_vdev for bulk processing */
nvroot = make_root_vdev(NULL, props, force, !force, B_FALSE, dryrun,
argc - 1, argv + 1);
nvroot = make_root_vdev(NULL, props, force_vdevs, !force_vdevs,
B_FALSE, dryrun, argc - 1, argv + 1);
if (nvroot == NULL)
goto errout;
if (!force_layout && !check_layout(nvroot)) {
(void) fprintf(stderr, "use '--force=layout' to ignore "
"this.\n");
goto errout;
}
/* make_root_vdev() allows 0 toplevel children if there are spares */
if (!zfs_allocatable_devs(nvroot)) {
(void) fprintf(stderr, gettext("invalid vdev "

View File

@ -139,3 +139,69 @@ lowbit64(uint64_t i)
return (__builtin_ffsll(i));
}
/*
* Given a string of comma-separated flag names, set or clear the corresponding
* flag variables, as defined in flagspec. If an unknown flag name is offered,
* returns it. On succes, returns NULL.
*
* This is most useful when combined with getopt, to allow related options to
* be set at once.
*
* Example:
*
* int main(int arvc, char **argv) {
* struct option long_options[] = {
* {"frobnicate", required_argument, NULL, 1},
* {0, 0, 0, 0},
* };
* struct zpool_option_flag frobnicate_flags[] = {
* {"gizmo", &frobnicate_gizmo},
* {"widget", &frobnicate_widget},
* {"thingy", &frobnicate_thingy},
* };
* int do_gizmo = 0, do_widget = 1, do_thingy = 0;
*
* while ((c = getopt_long(argc, argv, "", long_options, NULL)) != -1) {
* switch (c) {
* case 1:
* char *unknown = zpool_option_flag_apply(optarg, frobnicate_flags);
* if (unknown)
* printf("unknown flag to --frobnicate: %s\n", unknown);
* }
*
* printf("gizmo %d widget %d thingy %d\n", do_gizmo, do_widget, do_thingy);
* return (0);
* }
*
* $ myprogram --frobnicate='!widget,thingy'
* gizmo 0 widget 0 thingy 1
*
*
*/
char *
zpool_option_flag_apply(char *argstr, struct zpool_option_flag *flagspec)
{
char *name, *tmp = NULL;
while ((name = strtok_r(argstr, ",", &tmp)) != NULL) {
argstr = NULL;
int newval = 1;
if (*name == '!') {
newval = 0;
name++;
}
int found = 0;
for (struct zpool_option_flag *f = flagspec; f->name; f++) {
if (strcmp(name, f->name) == 0) {
if (f->flag)
*f->flag = newval;
found = 1;
break;
}
}
if (!found)
return (name);
}
return (NULL);
}

View File

@ -52,6 +52,12 @@ int lowbit64(uint64_t i);
*/
char *zpool_get_cmd_search_path(void);
struct zpool_option_flag {
const char *name;
int *flag;
};
char *zpool_option_flag_apply(char *, struct zpool_option_flag *);
/*
* Virtual device functions
*/

View File

@ -27,7 +27,7 @@
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
.\"
.Dd March 16, 2022
.Dd July 15, 2023
.Dt ZPOOL-CREATE 8
.Os
.
@ -37,7 +37,7 @@
.Sh SYNOPSIS
.Nm zpool
.Cm create
.Op Fl dfn
.Op Fl dn
.Op Fl m Ar mountpoint
.Oo Fl o Ar property Ns = Ns Ar value Oc Ns
.Oo Fl o Sy feature@ Ns Ar feature Ns = Ns Ar value Oc
@ -45,6 +45,7 @@
.Oo Fl O Ar file-system-property Ns = Ns Ar value Oc Ns
.Op Fl R Ar root
.Op Fl t Ar tname
.Op Fl -force= Ns Ar flag Ns Oo , Ns Ar flag Oc Ns
.Ar pool
.Ar vdev Ns
.
@ -142,11 +143,6 @@ with
See
.Xr zpool-features 7
for details about feature properties.
.It Fl f
Forces use of
.Ar vdev Ns s ,
even if they appear in use or specify a conflicting replication level.
Not all devices can be overridden in this manner.
.It Fl m Ar mountpoint
Sets the mount point for the root dataset.
The default mount point is
@ -204,6 +200,20 @@ This is intended
to handle name space collisions when creating pools for other systems,
such as virtual machines or physical machines whose pools live on network
block devices.
.It Fl -force= Ns Ar flag Ns Oo , Ns Ar flag Oc Ns
.Bl -tag -width Ds
.It Sy vdevs
Forces use of
.Ar vdev Ns s ,
even if they appear in use or specify a conflicting replication level.
Not all devices can be overridden in this manner.
.It Sy layout
Allows certain kinds of suboptimal pool layouts to be created.
.El
.It Fl f
Alias for
.Fl -force=vdevs
.
.El
.
.Sh EXAMPLES

View File

@ -368,7 +368,8 @@ tests = ['zpool_create_001_pos', 'zpool_create_002_pos',
'zpool_create_features_005_pos', 'zpool_create_features_006_pos',
'zpool_create_features_007_pos', 'zpool_create_features_008_pos',
'zpool_create_features_009_pos', 'create-o_ashift',
'zpool_create_tempname', 'zpool_create_dryrun_output']
'zpool_create_tempname', 'zpool_create_dryrun_output',
'zpool_create_force_layout']
tags = ['functional', 'cli_root', 'zpool_create']
[tests/functional/cli_root/zpool_destroy]

View File

@ -267,7 +267,7 @@ tests = ['zpool_create_001_pos', 'zpool_create_002_pos',
'zpool_create_encrypted',
'zpool_create_features_001_pos', 'zpool_create_features_002_pos',
'zpool_create_features_003_pos', 'zpool_create_features_004_neg',
'zpool_create_features_005_pos']
'zpool_create_features_005_pos', 'zpool_create_force_layout']
tags = ['functional', 'cli_root', 'zpool_create']
[tests/functional/cli_root/zpool_destroy]

View File

@ -1009,6 +1009,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_create/zpool_create_features_007_pos.ksh \
functional/cli_root/zpool_create/zpool_create_features_008_pos.ksh \
functional/cli_root/zpool_create/zpool_create_features_009_pos.ksh \
functional/cli_root/zpool_create/zpool_create_force_layout.ksh \
functional/cli_root/zpool_create/zpool_create_tempname.ksh \
functional/cli_root/zpool_destroy/zpool_destroy_001_pos.ksh \
functional/cli_root/zpool_destroy/zpool_destroy_002_pos.ksh \

View File

@ -0,0 +1,94 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023, Rob Norris <robn@despairlabs.com>
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zpool_create/zpool_create.shlib
#
# DESCRIPTION:
# 'zpool create <pool> <vspec> ...' should reject suboptimal pool layouts by
# default, but allow them if the --force=layout switch is offered.
#
# STRATEGY:
# 1. Try to create pools with suboptimal layouts, verify fail
# 2. Try again to create them with --force=layout, verify success
#
verify_runnable "global"
TMPDIR=${TMPDIR:-$TEST_BASE_DIR}
DISK1="$TMPDIR/dsk1"
DISK2="$TMPDIR/dsk2"
DISK3="$TMPDIR/dsk3"
DISK4="$TMPDIR/dsk4"
DISKS="$DISK1 $DISK2 $DISK3 $DISK4"
log_must mkfile $(($MINVDEVSIZE * 2)) $DISK1
log_must mkfile $(($MINVDEVSIZE * 2)) $DISK2
log_must mkfile $(($MINVDEVSIZE * 2)) $DISK3
log_must mkfile $(($MINVDEVSIZE * 2)) $DISK4
function cleanup
{
default_cleanup_noexit
log_must rm -f $DISKS
}
log_assert "'zpool create <pool> <vspec> ...' rejects suboptimal layouts," \
" unless the --force=layout switch is provided."
log_onexit cleanup
function check_suboptimal
{
typeset spec=$1
log_mustnot zpool create $TESTPOOL $spec
log_mustnot poolexists $TESTPOOL
log_must zpool create --force=layout $TESTPOOL $spec
log_must poolexists $TESTPOOL
log_must destroy_pool $TESTPOOL
}
log_note "raidz1 with two drives is suboptimal"
log_must check_suboptimal "raidz1 $DISK1 $DISK2"
log_note "raidz2 with three drives is suboptimal"
log_must check_suboptimal "raidz2 $DISK1 $DISK2 $DISK3"
log_note "raidz3 with four drives is suboptimal"
log_must check_suboptimal "raidz3 $DISK1 $DISK2 $DISK3 $DISK4"
log_note "draid1 with two drives is suboptimal"
log_must check_suboptimal "draid1 $DISK1 $DISK2"
log_note "draid2 with three drives is suboptimal"
log_must check_suboptimal "draid2 $DISK1 $DISK2 $DISK3"
log_note "draid3 with four drives is suboptimal"
log_must check_suboptimal "draid3 $DISK1 $DISK2 $DISK3 $DISK4"
log_pass "'zpool create --force=layout ...' success."