From 2a673e76a928cca4df7794cdcaa02e0be149c4da Mon Sep 17 00:00:00 2001 From: Allan Jude Date: Tue, 30 Nov 2021 09:46:25 -0500 Subject: [PATCH] Vdev Properties Feature Add properties, similar to pool properties, to each vdev. This makes use of the existing per-vdev ZAP that was added as part of device evacuation/removal. A large number of read-only properties are exposed, many of the members of struct vdev_t, that provide useful statistics. Adds support for read-only "removing" vdev property. Adds the "allocating" property that defaults to "on" and can be set to "off" to prevent future allocations from that top-level vdev. Supports user-defined vdev properties. Includes support for properties.vdev in SYSFS. Co-authored-by: Allan Jude Co-authored-by: Mark Maybee Reviewed-by: Matthew Ahrens Reviewed-by: Mark Maybee Signed-off-by: Allan Jude Closes #11711 --- cmd/zpool/zpool_iter.c | 17 +- cmd/zpool/zpool_main.c | 453 ++++++++++++----- cmd/zpool/zpool_util.h | 5 +- contrib/pyzfs/libzfs_core/_constants.py | 2 + include/libzfs.h | 32 +- include/libzfs_core.h | 4 + include/sys/fs/zfs.h | 90 +++- include/sys/spa.h | 3 +- include/sys/spa_impl.h | 1 + include/sys/vdev.h | 3 + include/sys/vdev_impl.h | 1 + include/sys/zfs_sysfs.h | 1 + include/zfs_prop.h | 11 +- lib/libzfs/libzfs.abi | 199 +++++++- lib/libzfs/libzfs_pool.c | 459 ++++++++++++++++- lib/libzfs/libzfs_util.c | 40 +- lib/libzfs/os/freebsd/libzfs_compat.c | 4 + lib/libzfs_core/libzfs_core.abi | 14 + lib/libzfs_core/libzfs_core.c | 12 + lib/libzutil/zutil_import.c | 17 +- man/man7/vdevprops.7 | 172 +++++++ man/man8/zpool-get.8 | 79 +++ module/os/linux/zfs/zfs_sysfs.c | 36 +- module/zcommon/zfs_prop.c | 14 +- module/zcommon/zpool_prop.c | 250 ++++++++++ module/zcommon/zprop_common.c | 22 +- module/zfs/spa.c | 2 +- module/zfs/spa_misc.c | 38 +- module/zfs/vdev.c | 626 +++++++++++++++++++++++- module/zfs/vdev_label.c | 4 + module/zfs/vdev_removal.c | 274 +++++++++-- module/zfs/zfs_ioctl.c | 102 +++- module/zfs/zio.c | 2 +- 33 files changed, 2746 insertions(+), 243 deletions(-) create mode 100644 man/man7/vdevprops.7 diff --git a/cmd/zpool/zpool_iter.c b/cmd/zpool/zpool_iter.c index abfa2b7f6b..8cf6bab42f 100644 --- a/cmd/zpool/zpool_iter.c +++ b/cmd/zpool/zpool_iter.c @@ -60,6 +60,7 @@ struct zpool_list { uu_avl_t *zl_avl; uu_avl_pool_t *zl_pool; zprop_list_t **zl_proplist; + zfs_type_t zl_type; }; /* ARGSUSED */ @@ -90,8 +91,7 @@ add_pool(zpool_handle_t *zhp, void *data) if (uu_avl_find(zlp->zl_avl, node, NULL, &idx) == NULL) { if (zlp->zl_proplist && zpool_expand_proplist(zhp, zlp->zl_proplist, - zlp->zl_literal) - != 0) { + zlp->zl_type, zlp->zl_literal) != 0) { zpool_close(zhp); free(node); return (-1); @@ -113,7 +113,7 @@ add_pool(zpool_handle_t *zhp, void *data) * line. */ zpool_list_t * -pool_list_get(int argc, char **argv, zprop_list_t **proplist, +pool_list_get(int argc, char **argv, zprop_list_t **proplist, zfs_type_t type, boolean_t literal, int *err) { zpool_list_t *zlp; @@ -131,6 +131,7 @@ pool_list_get(int argc, char **argv, zprop_list_t **proplist, zpool_no_memory(); zlp->zl_proplist = proplist; + zlp->zl_type = type; zlp->zl_literal = literal; @@ -248,12 +249,14 @@ pool_list_count(zpool_list_t *zlp) */ int for_each_pool(int argc, char **argv, boolean_t unavail, - zprop_list_t **proplist, boolean_t literal, zpool_iter_f func, void *data) + zprop_list_t **proplist, zfs_type_t type, boolean_t literal, + zpool_iter_f func, void *data) { zpool_list_t *list; int ret = 0; - if ((list = pool_list_get(argc, argv, proplist, literal, &ret)) == NULL) + if ((list = pool_list_get(argc, argv, proplist, type, literal, + &ret)) == NULL) return (1); if (pool_list_iter(list, unavail, func, data) != 0) @@ -678,8 +681,8 @@ all_pools_for_each_vdev_run(int argc, char **argv, char *cmd, vcdl->g_zfs = g_zfs; /* Gather our list of all vdevs in all pools */ - for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE, - all_pools_for_each_vdev_gather_cb, vcdl); + for_each_pool(argc, argv, B_TRUE, NULL, ZFS_TYPE_POOL, + B_FALSE, all_pools_for_each_vdev_gather_cb, vcdl); /* Run command on all vdevs in all pools */ all_pools_for_each_vdev_run_vcdl(vcdl); diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 3a2caa9a81..4e2f828cb2 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -32,6 +32,7 @@ * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2019, loli10K * Copyright (c) 2021, Colm Buckley + * Copyright (c) 2021, Klara Inc. * Copyright [2021] Hewlett Packard Enterprise Development LP */ @@ -335,6 +336,7 @@ static zpool_command_t command_table[] = { #define VDEV_ALLOC_CLASS_LOGS "logs" static zpool_command_t *current_command; +static zfs_type_t current_prop_type = (ZFS_TYPE_POOL | ZFS_TYPE_VDEV); static char history_str[HIS_MAX_RECORD_LEN]; static boolean_t log_history = B_TRUE; static uint_t timestamp_fmt = NODATE; @@ -470,7 +472,7 @@ zpool_collect_leaves(zpool_handle_t *zhp, nvlist_t *nvroot, nvlist_t *res) * Callback routine that will print out a pool property value. */ static int -print_prop_cb(int prop, void *cb) +print_pool_prop_cb(int prop, void *cb) { FILE *fp = cb; @@ -489,6 +491,29 @@ print_prop_cb(int prop, void *cb) return (ZPROP_CONT); } +/* + * Callback routine that will print out a vdev property value. + */ +static int +print_vdev_prop_cb(int prop, void *cb) +{ + FILE *fp = cb; + + (void) fprintf(fp, "\t%-19s ", vdev_prop_to_name(prop)); + + if (vdev_prop_readonly(prop)) + (void) fprintf(fp, " NO "); + else + (void) fprintf(fp, " YES "); + + if (vdev_prop_values(prop) == NULL) + (void) fprintf(fp, "-\n"); + else + (void) fprintf(fp, "%s\n", vdev_prop_values(prop)); + + return (ZPROP_CONT); +} + /* * Display usage message. If we're inside a command, display only the usage for * that command. Otherwise, iterate over the entire command table and display @@ -519,6 +544,7 @@ usage(boolean_t requested) } if (current_command != NULL && + current_prop_type != (ZFS_TYPE_POOL | ZFS_TYPE_VDEV) && ((strcmp(current_command->name, "set") == 0) || (strcmp(current_command->name, "get") == 0) || (strcmp(current_command->name, "list") == 0))) { @@ -530,14 +556,21 @@ usage(boolean_t requested) "PROPERTY", "EDIT", "VALUES"); /* Iterate over all properties */ - (void) zprop_iter(print_prop_cb, fp, B_FALSE, B_TRUE, - ZFS_TYPE_POOL); + if (current_prop_type == ZFS_TYPE_POOL) { + (void) zprop_iter(print_pool_prop_cb, fp, B_FALSE, + B_TRUE, current_prop_type); - (void) fprintf(fp, "\t%-19s ", "feature@..."); - (void) fprintf(fp, "YES disabled | enabled | active\n"); + (void) fprintf(fp, "\t%-19s ", "feature@..."); + (void) fprintf(fp, "YES " + "disabled | enabled | active\n"); - (void) fprintf(fp, gettext("\nThe feature@ properties must be " - "appended with a feature name.\nSee zpool-features(7).\n")); + (void) fprintf(fp, gettext("\nThe feature@ properties " + "must be appended with a feature name.\n" + "See zpool-features(7).\n")); + } else if (current_prop_type == ZFS_TYPE_VDEV) { + (void) zprop_iter(print_vdev_prop_cb, fp, B_FALSE, + B_TRUE, current_prop_type); + } } /* @@ -795,9 +828,10 @@ add_prop_list(const char *propname, char *propval, nvlist_t **props, zpool_prop_to_name(ZPOOL_PROP_COMPATIBILITY); if ((prop = zpool_name_to_prop(propname)) == ZPOOL_PROP_INVAL && - !zpool_prop_feature(propname)) { + (!zpool_prop_feature(propname) && + !zpool_prop_vdev(propname))) { (void) fprintf(stderr, gettext("property '%s' is " - "not a valid pool property\n"), propname); + "not a valid pool or vdev property\n"), propname); return (2); } @@ -832,7 +866,7 @@ add_prop_list(const char *propname, char *propval, nvlist_t **props, return (2); } - if (zpool_prop_feature(propname)) + if (zpool_prop_feature(propname) || zpool_prop_vdev(propname)) normnm = propname; else normnm = zpool_prop_to_name(prop); @@ -1930,7 +1964,7 @@ zpool_do_export(int argc, char **argv) } return (for_each_pool(argc, argv, B_TRUE, NULL, - B_FALSE, zpool_export_one, &cb)); + ZFS_TYPE_POOL, B_FALSE, zpool_export_one, &cb)); } /* check arguments */ @@ -1939,8 +1973,8 @@ zpool_do_export(int argc, char **argv) usage(B_FALSE); } - ret = for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE, zpool_export_one, - &cb); + ret = for_each_pool(argc, argv, B_TRUE, NULL, ZFS_TYPE_POOL, + B_FALSE, zpool_export_one, &cb); return (ret); } @@ -2436,6 +2470,12 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name, 1 << vs->vs_configured_ashift, 1 << vs->vs_physical_ashift); } + if (vs->vs_scan_removing != 0) { + (void) printf(gettext(" (removing)")); + } else if (vs->vs_noalloc != 0) { + (void) printf(gettext(" (non-allocating)")); + } + /* The root vdev has the scrub/resilver stats */ root = fnvlist_lookup_nvlist(zpool_get_config(zhp, NULL), ZPOOL_CONFIG_VDEV_TREE); @@ -3857,24 +3897,22 @@ zpool_do_sync(int argc, char **argv) argv += optind; /* if argc == 0 we will execute zpool_sync_one on all pools */ - ret = for_each_pool(argc, argv, B_FALSE, NULL, B_FALSE, zpool_sync_one, - &force); + ret = for_each_pool(argc, argv, B_FALSE, NULL, ZFS_TYPE_POOL, + B_FALSE, zpool_sync_one, &force); return (ret); } typedef struct iostat_cbdata { uint64_t cb_flags; - int cb_name_flags; int cb_namewidth; int cb_iteration; - char **cb_vdev_names; /* Only show these vdevs */ - unsigned int cb_vdev_names_count; boolean_t cb_verbose; boolean_t cb_literal; boolean_t cb_scripted; zpool_list_t *cb_list; vdev_cmd_data_list_t *vcdl; + vdev_cbdata_t cb_vdevs; } iostat_cbdata_t; /* iostat labels */ @@ -4128,7 +4166,7 @@ print_iostat_dashes(iostat_cbdata_t *cb, unsigned int force_column_width, if (cb->cb_flags & IOS_ANYHISTO_M) { title = histo_to_title[IOS_HISTO_IDX(cb->cb_flags)]; - } else if (cb->cb_vdev_names_count) { + } else if (cb->cb_vdevs.cb_names_count) { title = "vdev"; } else { title = "pool"; @@ -4188,7 +4226,7 @@ print_iostat_header_impl(iostat_cbdata_t *cb, unsigned int force_column_width, if (cb->cb_flags & IOS_ANYHISTO_M) { title = histo_to_title[IOS_HISTO_IDX(cb->cb_flags)]; - } else if (cb->cb_vdev_names_count) { + } else if (cb->cb_vdevs.cb_names_count) { title = "vdev"; } else { title = "pool"; @@ -4696,9 +4734,9 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv, } /* Do we only want to see a specific vdev? */ - for (i = 0; i < cb->cb_vdev_names_count; i++) { + for (i = 0; i < cb->cb_vdevs.cb_names_count; i++) { /* Yes we do. Is this the vdev? */ - if (strcmp(name, cb->cb_vdev_names[i]) == 0) { + if (strcmp(name, cb->cb_vdevs.cb_names[i]) == 0) { /* * This is our vdev. Since it is the only vdev we * will be displaying, make depth = 0 so that it @@ -4709,7 +4747,7 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv, } } - if (cb->cb_vdev_names_count && (i == cb->cb_vdev_names_count)) { + if (cb->cb_vdevs.cb_names_count && (i == cb->cb_vdevs.cb_names_count)) { /* Couldn't match the name */ goto children; } @@ -4816,7 +4854,7 @@ children: continue; vname = zpool_vdev_name(g_zfs, zhp, newchild[c], - cb->cb_name_flags); + cb->cb_vdevs.cb_name_flags); ret += print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL, newchild[c], cb, depth + 2); free(vname); @@ -4850,7 +4888,8 @@ children: if (!printed) { if ((!(cb->cb_flags & IOS_ANYHISTO_M)) && - !cb->cb_scripted && !cb->cb_vdev_names) { + !cb->cb_scripted && + !cb->cb_vdevs.cb_names) { print_iostat_dashes(cb, 0, class_name[n]); } @@ -4859,7 +4898,7 @@ children: } vname = zpool_vdev_name(g_zfs, zhp, newchild[c], - cb->cb_name_flags); + cb->cb_vdevs.cb_name_flags); ret += print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL, newchild[c], cb, depth + 2); free(vname); @@ -4883,14 +4922,14 @@ children: if (children > 0) { if ((!(cb->cb_flags & IOS_ANYHISTO_M)) && !cb->cb_scripted && - !cb->cb_vdev_names) { + !cb->cb_vdevs.cb_names) { print_iostat_dashes(cb, 0, "cache"); } printf("\n"); for (c = 0; c < children; c++) { vname = zpool_vdev_name(g_zfs, zhp, newchild[c], - cb->cb_name_flags); + cb->cb_vdevs.cb_name_flags); ret += print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL, newchild[c], cb, depth + 2); free(vname); @@ -4946,7 +4985,8 @@ print_iostat(zpool_handle_t *zhp, void *data) ret = print_vdev_stats(zhp, zpool_get_name(zhp), oldnvroot, newnvroot, cb, 0); if ((ret != 0) && !(cb->cb_flags & IOS_ANYHISTO_M) && - !cb->cb_scripted && cb->cb_verbose && !cb->cb_vdev_names_count) { + !cb->cb_scripted && cb->cb_verbose && + !cb->cb_vdevs.cb_names_count) { print_iostat_separator(cb); if (cb->vcdl != NULL) { print_cmd_columns(cb->vcdl, 1); @@ -5153,27 +5193,30 @@ get_stat_flags(zpool_list_t *list) } /* - * Return 1 if cb_data->cb_vdev_names[0] is this vdev's name, 0 otherwise. + * Return 1 if cb_data->cb_names[0] is this vdev's name, 0 otherwise. */ static int is_vdev_cb(void *zhp_data, nvlist_t *nv, void *cb_data) { - iostat_cbdata_t *cb = cb_data; + vdev_cbdata_t *cb = cb_data; char *name = NULL; - int ret = 0; + int ret = 1; /* assume match */ zpool_handle_t *zhp = zhp_data; name = zpool_vdev_name(g_zfs, zhp, nv, cb->cb_name_flags); - if (strcmp(name, cb->cb_vdev_names[0]) == 0) - ret = 1; /* match */ + if (strcmp(name, cb->cb_names[0])) { + free(name); + name = zpool_vdev_name(g_zfs, zhp, nv, VDEV_NAME_GUID); + ret = (strcmp(name, cb->cb_names[0]) == 0); + } free(name); return (ret); } /* - * Returns 1 if cb_data->cb_vdev_names[0] is a vdev name, 0 otherwise. + * Returns 1 if cb_data->cb_names[0] is a vdev name, 0 otherwise. */ static int is_vdev(zpool_handle_t *zhp, void *cb_data) @@ -5189,7 +5232,7 @@ is_vdev(zpool_handle_t *zhp, void *cb_data) */ static int are_vdevs_in_pool(int argc, char **argv, char *pool_name, - iostat_cbdata_t *cb) + vdev_cbdata_t *cb) { char **tmp_name; int ret = 0; @@ -5202,23 +5245,23 @@ are_vdevs_in_pool(int argc, char **argv, char *pool_name, if (pool_name) pool_count = 1; - /* Temporarily hijack cb_vdev_names for a second... */ - tmp_name = cb->cb_vdev_names; + /* Temporarily hijack cb_names for a second... */ + tmp_name = cb->cb_names; /* Go though our list of prospective vdev names */ for (i = 0; i < argc; i++) { - cb->cb_vdev_names = argv + i; + cb->cb_names = argv + i; /* Is this name a vdev in our pools? */ ret = for_each_pool(pool_count, &pool_name, B_TRUE, NULL, - B_FALSE, is_vdev, cb); + ZFS_TYPE_POOL, B_FALSE, is_vdev, cb); if (!ret) { /* No match */ break; } } - cb->cb_vdev_names = tmp_name; + cb->cb_names = tmp_name; return (ret); } @@ -5239,8 +5282,8 @@ is_pool_cb(zpool_handle_t *zhp, void *data) static int is_pool(char *name) { - return (for_each_pool(0, NULL, B_TRUE, NULL, B_FALSE, is_pool_cb, - name)); + return (for_each_pool(0, NULL, B_TRUE, NULL, ZFS_TYPE_POOL, B_FALSE, + is_pool_cb, name)); } /* Are all our argv[] strings pool names? If so return 1, 0 otherwise. */ @@ -5263,7 +5306,7 @@ are_all_pools(int argc, char **argv) */ static void error_list_unresolved_vdevs(int argc, char **argv, char *pool_name, - iostat_cbdata_t *cb) + vdev_cbdata_t *cb) { int i; char *name; @@ -5287,7 +5330,7 @@ error_list_unresolved_vdevs(int argc, char **argv, char *pool_name, /* * Same as get_interval_count(), but with additional checks to not misinterpret * guids as interval/count values. Assumes VDEV_NAME_GUID is set in - * cb.cb_name_flags. + * cb.cb_vdevs.cb_name_flags. */ static void get_interval_count_filter_guids(int *argc, char **argv, float *interval, @@ -5297,7 +5340,8 @@ get_interval_count_filter_guids(int *argc, char **argv, float *interval, int argc_for_interval = 0; /* Is the last arg an interval value? Or a guid? */ - if (*argc >= 1 && !are_vdevs_in_pool(1, &argv[*argc - 1], NULL, cb)) { + if (*argc >= 1 && !are_vdevs_in_pool(1, &argv[*argc - 1], NULL, + &cb->cb_vdevs)) { /* * The last arg is not a guid, so it's probably an * interval value. @@ -5305,7 +5349,8 @@ get_interval_count_filter_guids(int *argc, char **argv, float *interval, argc_for_interval++; if (*argc >= 2 && - !are_vdevs_in_pool(1, &argv[*argc - 2], NULL, cb)) { + !are_vdevs_in_pool(1, &argv[*argc - 2], NULL, + &cb->cb_vdevs)) { /* * The 2nd to last arg is not a guid, so it's probably * an interval value. @@ -5448,7 +5493,7 @@ get_namewidth_iostat(zpool_handle_t *zhp, void *data) * get_namewidth() returns the maximum width of any name in that column * for any pool/vdev/device line that will be output. */ - width = get_namewidth(zhp, cb->cb_namewidth, cb->cb_name_flags, + width = get_namewidth(zhp, cb->cb_namewidth, cb->cb_vdevs.cb_name_flags, cb->cb_verbose); /* @@ -5626,11 +5671,11 @@ zpool_do_iostat(int argc, char **argv) cb.cb_scripted = scripted; if (guid) - cb.cb_name_flags |= VDEV_NAME_GUID; + cb.cb_vdevs.cb_name_flags |= VDEV_NAME_GUID; if (follow_links) - cb.cb_name_flags |= VDEV_NAME_FOLLOW_LINKS; + cb.cb_vdevs.cb_name_flags |= VDEV_NAME_FOLLOW_LINKS; if (full_name) - cb.cb_name_flags |= VDEV_NAME_PATH; + cb.cb_vdevs.cb_name_flags |= VDEV_NAME_PATH; cb.cb_iteration = 0; cb.cb_namewidth = 0; cb.cb_verbose = verbose; @@ -5647,17 +5692,18 @@ zpool_do_iostat(int argc, char **argv) /* No args, so just print the defaults. */ } else if (are_all_pools(argc, argv)) { /* All the args are pool names */ - } else if (are_vdevs_in_pool(argc, argv, NULL, &cb)) { + } else if (are_vdevs_in_pool(argc, argv, NULL, &cb.cb_vdevs)) { /* All the args are vdevs */ - cb.cb_vdev_names = argv; - cb.cb_vdev_names_count = argc; + cb.cb_vdevs.cb_names = argv; + cb.cb_vdevs.cb_names_count = argc; argc = 0; /* No pools to process */ } else if (are_all_pools(1, argv)) { /* The first arg is a pool name */ - if (are_vdevs_in_pool(argc - 1, argv + 1, argv[0], &cb)) { + if (are_vdevs_in_pool(argc - 1, argv + 1, argv[0], + &cb.cb_vdevs)) { /* ...and the rest are vdev names */ - cb.cb_vdev_names = argv + 1; - cb.cb_vdev_names_count = argc - 1; + cb.cb_vdevs.cb_names = argv + 1; + cb.cb_vdevs.cb_names_count = argc - 1; argc = 1; /* One pool to process */ } else { fprintf(stderr, gettext("Expected either a list of ")); @@ -5665,7 +5711,7 @@ zpool_do_iostat(int argc, char **argv) fprintf(stderr, " \"%s\", ", argv[0]); fprintf(stderr, gettext("but got:\n")); error_list_unresolved_vdevs(argc - 1, argv + 1, - argv[0], &cb); + argv[0], &cb.cb_vdevs); fprintf(stderr, "\n"); usage(B_FALSE); return (1); @@ -5680,7 +5726,7 @@ zpool_do_iostat(int argc, char **argv) return (1); } - if (cb.cb_vdev_names_count != 0) { + if (cb.cb_vdevs.cb_names_count != 0) { /* * If user specified vdevs, it implies verbose. */ @@ -5691,7 +5737,8 @@ zpool_do_iostat(int argc, char **argv) * Construct the list of all interesting pools. */ ret = 0; - if ((list = pool_list_get(argc, argv, NULL, parsable, &ret)) == NULL) + if ((list = pool_list_get(argc, argv, NULL, ZFS_TYPE_POOL, parsable, + &ret)) == NULL) return (1); if (pool_list_count(list) == 0 && argc != 0) { @@ -5799,8 +5846,9 @@ zpool_do_iostat(int argc, char **argv) if (cmd != NULL && cb.cb_verbose && !(cb.cb_flags & IOS_ANYHISTO_M)) { cb.vcdl = all_pools_for_each_vdev_run(argc, - argv, cmd, g_zfs, cb.cb_vdev_names, - cb.cb_vdev_names_count, cb.cb_name_flags); + argv, cmd, g_zfs, cb.cb_vdevs.cb_names, + cb.cb_vdevs.cb_names_count, + cb.cb_vdevs.cb_name_flags); } else { cb.vcdl = NULL; } @@ -5852,7 +5900,7 @@ zpool_do_iostat(int argc, char **argv) if (((npools > 1 && !verbose && !(cb.cb_flags & IOS_ANYHISTO_M)) || (!(cb.cb_flags & IOS_ANYHISTO_M) && - cb.cb_vdev_names_count)) && + cb.cb_vdevs.cb_names_count)) && !cb.cb_scripted) { print_iostat_separator(&cb); if (cb.vcdl != NULL) @@ -6314,6 +6362,7 @@ zpool_do_list(int argc, char **argv) unsigned long count = 0; zpool_list_t *list; boolean_t first = B_TRUE; + current_prop_type = ZFS_TYPE_POOL; /* check options */ while ((c = getopt(argc, argv, ":gHLo:pPT:v")) != -1) { @@ -6365,7 +6414,7 @@ zpool_do_list(int argc, char **argv) for (;;) { if ((list = pool_list_get(argc, argv, &cb.cb_proplist, - cb.cb_literal, &ret)) == NULL) + ZFS_TYPE_POOL, cb.cb_literal, &ret)) == NULL) return (1); if (pool_list_count(list) == 0) @@ -7121,8 +7170,8 @@ zpool_do_reopen(int argc, char **argv) argv += optind; /* if argc == 0 we will execute zpool_reopen_one on all pools */ - ret = for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE, zpool_reopen_one, - &scrub_restart); + ret = for_each_pool(argc, argv, B_TRUE, NULL, ZFS_TYPE_POOL, + B_FALSE, zpool_reopen_one, &scrub_restart); return (ret); } @@ -7251,13 +7300,13 @@ zpool_do_scrub(int argc, char **argv) usage(B_FALSE); } - error = for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE, - scrub_callback, &cb); + error = for_each_pool(argc, argv, B_TRUE, NULL, ZFS_TYPE_POOL, + B_FALSE, scrub_callback, &cb); if (wait && !error) { zpool_wait_activity_t act = ZPOOL_WAIT_SCRUB; - error = for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE, - wait_callback, &act); + error = for_each_pool(argc, argv, B_TRUE, NULL, ZFS_TYPE_POOL, + B_FALSE, wait_callback, &act); } return (error); @@ -7295,8 +7344,8 @@ zpool_do_resilver(int argc, char **argv) usage(B_FALSE); } - return (for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE, - scrub_callback, &cb)); + return (for_each_pool(argc, argv, B_TRUE, NULL, ZFS_TYPE_POOL, + B_FALSE, scrub_callback, &cb)); } /* @@ -8719,8 +8768,8 @@ zpool_do_status(int argc, char **argv) cb.vcdl = all_pools_for_each_vdev_run(argc, argv, cmd, NULL, NULL, 0, 0); - ret = for_each_pool(argc, argv, B_TRUE, NULL, cb.cb_literal, - status_callback, &cb); + ret = for_each_pool(argc, argv, B_TRUE, NULL, ZFS_TYPE_POOL, + cb.cb_literal, status_callback, &cb); if (cb.vcdl != NULL) free_vdev_cmd_data_list(cb.vcdl); @@ -9279,8 +9328,8 @@ zpool_do_upgrade(int argc, char **argv) (void) printf(gettext("\n")); } } else { - ret = for_each_pool(argc, argv, B_FALSE, NULL, B_FALSE, - upgrade_one, &cb); + ret = for_each_pool(argc, argv, B_FALSE, NULL, ZFS_TYPE_POOL, + B_FALSE, upgrade_one, &cb); } return (ret); @@ -9476,8 +9525,8 @@ zpool_do_history(int argc, char **argv) argc -= optind; argv += optind; - ret = for_each_pool(argc, argv, B_FALSE, NULL, B_FALSE, get_history_one, - &cbdata); + ret = for_each_pool(argc, argv, B_FALSE, NULL, ZFS_TYPE_POOL, + B_FALSE, get_history_one, &cbdata); if (argc == 0 && cbdata.first == B_TRUE) { (void) fprintf(stderr, gettext("no pools available\n")); @@ -9875,44 +9924,135 @@ zpool_do_events(int argc, char **argv) } static int -get_callback(zpool_handle_t *zhp, void *data) +get_callback_vdev(zpool_handle_t *zhp, char *vdevname, void *data) { zprop_get_cbdata_t *cbp = (zprop_get_cbdata_t *)data; - char value[MAXNAMELEN]; + char value[ZFS_MAXPROPLEN]; zprop_source_t srctype; - zprop_list_t *pl; - - for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) { + for (zprop_list_t *pl = cbp->cb_proplist; pl != NULL; + pl = pl->pl_next) { + char *prop_name; /* - * Skip the special fake placeholder. This will also skip + * If the first property is pool name, it is a special + * placeholder that we can skip. This will also skip * over the name property when 'all' is specified. */ if (pl->pl_prop == ZPOOL_PROP_NAME && pl == cbp->cb_proplist) continue; - if (pl->pl_prop == ZPROP_INVAL && - (zpool_prop_feature(pl->pl_user_prop) || - zpool_prop_unsupported(pl->pl_user_prop))) { - srctype = ZPROP_SRC_LOCAL; - - if (zpool_prop_get_feature(zhp, pl->pl_user_prop, - value, sizeof (value)) == 0) { - zprop_print_one_property(zpool_get_name(zhp), - cbp, pl->pl_user_prop, value, srctype, - NULL, NULL); - } + if (pl->pl_prop == ZPROP_INVAL) { + prop_name = pl->pl_user_prop; } else { - if (zpool_get_prop(zhp, pl->pl_prop, value, - sizeof (value), &srctype, cbp->cb_literal) != 0) - continue; - - zprop_print_one_property(zpool_get_name(zhp), cbp, - zpool_prop_to_name(pl->pl_prop), value, srctype, - NULL, NULL); + prop_name = (char *)vdev_prop_to_name(pl->pl_prop); + } + if (zpool_get_vdev_prop(zhp, vdevname, pl->pl_prop, + prop_name, value, sizeof (value), &srctype, + cbp->cb_literal) == 0) { + zprop_print_one_property(vdevname, cbp, prop_name, + value, srctype, NULL, NULL); } } + + return (0); +} + +static int +get_callback_vdev_width_cb(void *zhp_data, nvlist_t *nv, void *data) +{ + zpool_handle_t *zhp = zhp_data; + zprop_get_cbdata_t *cbp = (zprop_get_cbdata_t *)data; + char *vdevname = zpool_vdev_name(g_zfs, zhp, nv, + cbp->cb_vdevs.cb_name_flags); + int ret; + + /* Adjust the column widths for the vdev properties */ + ret = vdev_expand_proplist(zhp, vdevname, &cbp->cb_proplist); + + return (ret); +} + +static int +get_callback_vdev_cb(void *zhp_data, nvlist_t *nv, void *data) +{ + zpool_handle_t *zhp = zhp_data; + zprop_get_cbdata_t *cbp = (zprop_get_cbdata_t *)data; + char *vdevname = zpool_vdev_name(g_zfs, zhp, nv, + cbp->cb_vdevs.cb_name_flags); + int ret; + + /* Display the properties */ + ret = get_callback_vdev(zhp, vdevname, data); + + return (ret); +} + +static int +get_callback(zpool_handle_t *zhp, void *data) +{ + zprop_get_cbdata_t *cbp = (zprop_get_cbdata_t *)data; + char value[MAXNAMELEN]; + zprop_source_t srctype; + zprop_list_t *pl; + int vid; + + if (cbp->cb_type == ZFS_TYPE_VDEV) { + if (strcmp(cbp->cb_vdevs.cb_names[0], "all-vdevs") == 0) { + for_each_vdev(zhp, get_callback_vdev_width_cb, data); + for_each_vdev(zhp, get_callback_vdev_cb, data); + } else { + /* Adjust column widths for vdev properties */ + for (vid = 0; vid < cbp->cb_vdevs.cb_names_count; + vid++) { + vdev_expand_proplist(zhp, + cbp->cb_vdevs.cb_names[vid], + &cbp->cb_proplist); + } + /* Display the properties */ + for (vid = 0; vid < cbp->cb_vdevs.cb_names_count; + vid++) { + get_callback_vdev(zhp, + cbp->cb_vdevs.cb_names[vid], data); + } + } + } else { + assert(cbp->cb_type == ZFS_TYPE_POOL); + for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) { + /* + * Skip the special fake placeholder. This will also + * skip over the name property when 'all' is specified. + */ + if (pl->pl_prop == ZPOOL_PROP_NAME && + pl == cbp->cb_proplist) + continue; + + if (pl->pl_prop == ZPROP_INVAL && + (zpool_prop_feature(pl->pl_user_prop) || + zpool_prop_unsupported(pl->pl_user_prop))) { + srctype = ZPROP_SRC_LOCAL; + + if (zpool_prop_get_feature(zhp, + pl->pl_user_prop, value, + sizeof (value)) == 0) { + zprop_print_one_property( + zpool_get_name(zhp), cbp, + pl->pl_user_prop, value, srctype, + NULL, NULL); + } + } else { + if (zpool_get_prop(zhp, pl->pl_prop, value, + sizeof (value), &srctype, + cbp->cb_literal) != 0) + continue; + + zprop_print_one_property(zpool_get_name(zhp), + cbp, zpool_prop_to_name(pl->pl_prop), + value, srctype, NULL, NULL); + } + } + } + return (0); } @@ -9936,6 +10076,7 @@ zpool_do_get(int argc, char **argv) int ret; int c, i; char *value; + char *propstr = NULL; cb.cb_first = B_TRUE; @@ -9948,6 +10089,8 @@ zpool_do_get(int argc, char **argv) cb.cb_columns[2] = GET_COL_VALUE; cb.cb_columns[3] = GET_COL_SOURCE; cb.cb_type = ZFS_TYPE_POOL; + cb.cb_vdevs.cb_name_flags |= VDEV_NAME_TYPE_ID; + current_prop_type = cb.cb_type; /* check options */ while ((c = getopt(argc, argv, ":Hpo:")) != -1) { @@ -10025,13 +10168,52 @@ zpool_do_get(int argc, char **argv) usage(B_FALSE); } - if (zprop_get_list(g_zfs, argv[0], &cb.cb_proplist, - ZFS_TYPE_POOL) != 0) - usage(B_FALSE); + /* Properties list is needed later by zprop_get_list() */ + propstr = argv[0]; argc--; argv++; + if (argc == 0) { + /* No args, so just print the defaults. */ + } else if (are_all_pools(argc, argv)) { + /* All the args are pool names */ + } else if (are_all_pools(1, argv)) { + /* The first arg is a pool name */ + if ((argc == 2 && strcmp(argv[1], "all-vdevs") == 0) || + are_vdevs_in_pool(argc - 1, argv + 1, argv[0], + &cb.cb_vdevs)) { + /* ... and the rest are vdev names */ + cb.cb_vdevs.cb_names = argv + 1; + cb.cb_vdevs.cb_names_count = argc - 1; + cb.cb_type = ZFS_TYPE_VDEV; + argc = 1; /* One pool to process */ + } else { + fprintf(stderr, gettext("Expected a list of vdevs in" + " \"%s\", but got:\n"), argv[0]); + error_list_unresolved_vdevs(argc - 1, argv + 1, + argv[0], &cb.cb_vdevs); + fprintf(stderr, "\n"); + usage(B_FALSE); + return (1); + } + } else { + /* + * The first arg isn't a pool name, + */ + fprintf(stderr, gettext("missing pool name.\n")); + fprintf(stderr, "\n"); + usage(B_FALSE); + return (1); + } + + if (zprop_get_list(g_zfs, propstr, &cb.cb_proplist, + cb.cb_type) != 0) { + /* Use correct list of valid properties (pool or vdev) */ + current_prop_type = cb.cb_type; + usage(B_FALSE); + } + if (cb.cb_proplist != NULL) { fake_name.pl_prop = ZPOOL_PROP_NAME; fake_name.pl_width = strlen(gettext("NAME")); @@ -10039,8 +10221,8 @@ zpool_do_get(int argc, char **argv) cb.cb_proplist = &fake_name; } - ret = for_each_pool(argc, argv, B_TRUE, &cb.cb_proplist, cb.cb_literal, - get_callback, &cb); + ret = for_each_pool(argc, argv, B_TRUE, &cb.cb_proplist, cb.cb_type, + cb.cb_literal, get_callback, &cb); if (cb.cb_proplist == &fake_name) zprop_free_list(fake_name.pl_next); @@ -10053,14 +10235,15 @@ zpool_do_get(int argc, char **argv) typedef struct set_cbdata { char *cb_propname; char *cb_value; + zfs_type_t cb_type; + vdev_cbdata_t cb_vdevs; boolean_t cb_any_successful; } set_cbdata_t; static int -set_callback(zpool_handle_t *zhp, void *data) +set_pool_callback(zpool_handle_t *zhp, set_cbdata_t *cb) { int error; - set_cbdata_t *cb = (set_cbdata_t *)data; /* Check if we have out-of-bounds features */ if (strcmp(cb->cb_propname, ZPOOL_CONFIG_COMPATIBILITY) == 0) { @@ -10121,9 +10304,24 @@ set_callback(zpool_handle_t *zhp, void *data) error = zpool_set_prop(zhp, cb->cb_propname, cb->cb_value); - if (!error) - cb->cb_any_successful = B_TRUE; + return (error); +} +static int +set_callback(zpool_handle_t *zhp, void *data) +{ + int error; + set_cbdata_t *cb = (set_cbdata_t *)data; + + if (cb->cb_type == ZFS_TYPE_VDEV) { + error = zpool_set_vdev_prop(zhp, *cb->cb_vdevs.cb_names, + cb->cb_propname, cb->cb_value); + } else { + assert(cb->cb_type == ZFS_TYPE_POOL); + error = set_pool_callback(zhp, cb); + } + + cb->cb_any_successful = !error; return (error); } @@ -10133,6 +10331,7 @@ zpool_do_set(int argc, char **argv) set_cbdata_t cb = { 0 }; int error; + current_prop_type = ZFS_TYPE_POOL; if (argc > 1 && argv[1][0] == '-') { (void) fprintf(stderr, gettext("invalid option '%c'\n"), argv[1][1]); @@ -10150,12 +10349,14 @@ zpool_do_set(int argc, char **argv) usage(B_FALSE); } - if (argc > 3) { + if (argc > 4) { (void) fprintf(stderr, gettext("too many pool names\n")); usage(B_FALSE); } cb.cb_propname = argv[1]; + cb.cb_type = ZFS_TYPE_POOL; + cb.cb_vdevs.cb_name_flags |= VDEV_NAME_TYPE_ID; cb.cb_value = strchr(cb.cb_propname, '='); if (cb.cb_value == NULL) { (void) fprintf(stderr, gettext("missing value in " @@ -10165,9 +10366,33 @@ zpool_do_set(int argc, char **argv) *(cb.cb_value) = '\0'; cb.cb_value++; + argc -= 2; + argv += 2; - error = for_each_pool(argc - 2, argv + 2, B_TRUE, NULL, B_FALSE, - set_callback, &cb); + if (are_vdevs_in_pool(argc, argv, NULL, &cb.cb_vdevs)) { + /* Argument is a vdev */ + cb.cb_vdevs.cb_names = argv; + cb.cb_vdevs.cb_names_count = 1; + cb.cb_type = ZFS_TYPE_VDEV; + argc = 0; /* No pools to process */ + } else if (are_all_pools(1, argv)) { + /* The first arg is a pool name */ + if (are_vdevs_in_pool(argc - 1, argv + 1, argv[0], + &cb.cb_vdevs)) { + /* 2nd argument is a vdev */ + cb.cb_vdevs.cb_names = argv + 1; + cb.cb_vdevs.cb_names_count = 1; + cb.cb_type = ZFS_TYPE_VDEV; + argc = 1; /* One pool to process */ + } else if (argc > 1) { + (void) fprintf(stderr, + gettext("too many pool names\n")); + usage(B_FALSE); + } + } + + error = for_each_pool(argc, argv, B_TRUE, NULL, ZFS_TYPE_POOL, + B_FALSE, set_callback, &cb); return (error); } diff --git a/cmd/zpool/zpool_util.h b/cmd/zpool/zpool_util.h index 6665eaf0d4..583f48cca8 100644 --- a/cmd/zpool/zpool_util.h +++ b/cmd/zpool/zpool_util.h @@ -65,7 +65,7 @@ nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname, /* * Pool list functions */ -int for_each_pool(int, char **, boolean_t unavail, zprop_list_t **, +int for_each_pool(int, char **, boolean_t unavail, zprop_list_t **, zfs_type_t, boolean_t, zpool_iter_f, void *); /* Vdev list functions */ @@ -73,7 +73,8 @@ int for_each_vdev(zpool_handle_t *zhp, pool_vdev_iter_f func, void *data); typedef struct zpool_list zpool_list_t; -zpool_list_t *pool_list_get(int, char **, zprop_list_t **, boolean_t, int *); +zpool_list_t *pool_list_get(int, char **, zprop_list_t **, zfs_type_t, + boolean_t, int *); void pool_list_update(zpool_list_t *); int pool_list_iter(zpool_list_t *, int unavail, zpool_iter_f, void *); void pool_list_free(zpool_list_t *); diff --git a/contrib/pyzfs/libzfs_core/_constants.py b/contrib/pyzfs/libzfs_core/_constants.py index 2dfed224c2..3273652f75 100644 --- a/contrib/pyzfs/libzfs_core/_constants.py +++ b/contrib/pyzfs/libzfs_core/_constants.py @@ -99,6 +99,7 @@ zfs_errno = enum_with_offset(1024, [ 'ZFS_ERR_RESILVER_IN_PROGRESS', 'ZFS_ERR_REBUILD_IN_PROGRESS', 'ZFS_ERR_BADPROP', + 'ZFS_ERR_VDEV_NOTSUP', ], {} ) @@ -110,5 +111,6 @@ ZFS_ERR_NO_CHECKPOINT = zfs_errno.ZFS_ERR_NO_CHECKPOINT ZFS_ERR_DEVRM_IN_PROGRESS = zfs_errno.ZFS_ERR_DEVRM_IN_PROGRESS ZFS_ERR_VDEV_TOO_BIG = zfs_errno.ZFS_ERR_VDEV_TOO_BIG ZFS_ERR_WRONG_PARENT = zfs_errno.ZFS_ERR_WRONG_PARENT +ZFS_ERR_VDEV_NOTSUP = zfs_errno.ZFS_ERR_VDEV_NOTSUP # vim: softtabstop=4 tabstop=4 expandtab shiftwidth=4 diff --git a/include/libzfs.h b/include/libzfs.h index c0883a9836..53a778f7be 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -150,6 +150,7 @@ typedef enum zfs_error { EZFS_NO_RESILVER_DEFER, /* pool doesn't support resilver_defer */ EZFS_EXPORT_IN_PROGRESS, /* currently exporting the pool */ EZFS_REBUILDING, /* resilvering (sequential reconstrution) */ + EZFS_VDEV_NOTSUP, /* ops not supported for this type of vdev */ EZFS_UNKNOWN } zfs_error_t; @@ -336,6 +337,24 @@ _LIBZFS_H int zpool_props_refresh(zpool_handle_t *); _LIBZFS_H const char *zpool_prop_to_name(zpool_prop_t); _LIBZFS_H const char *zpool_prop_values(zpool_prop_t); +/* + * Functions to manage vdev properties + */ +_LIBZFS_H int zpool_get_vdev_prop_value(nvlist_t *, vdev_prop_t, char *, char *, + size_t, zprop_source_t *, boolean_t); +_LIBZFS_H int zpool_get_vdev_prop(zpool_handle_t *, const char *, vdev_prop_t, + char *, char *, size_t, zprop_source_t *, boolean_t); +_LIBZFS_H int zpool_get_all_vdev_props(zpool_handle_t *, const char *, + nvlist_t **); +_LIBZFS_H int zpool_set_vdev_prop(zpool_handle_t *, const char *, const char *, + const char *); + +_LIBZFS_H const char *vdev_prop_to_name(vdev_prop_t); +_LIBZFS_H const char *vdev_prop_values(vdev_prop_t); +_LIBZFS_H boolean_t vdev_prop_user(const char *name); +_LIBZFS_H const char *vdev_prop_column_name(vdev_prop_t); +_LIBZFS_H boolean_t vdev_prop_align_right(vdev_prop_t); + /* * Pool health statistics. */ @@ -552,6 +571,8 @@ typedef struct zprop_list { _LIBZFS_H int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **, boolean_t, boolean_t); _LIBZFS_H void zfs_prune_proplist(zfs_handle_t *, uint8_t *); +_LIBZFS_H int vdev_expand_proplist(zpool_handle_t *, const char *, + zprop_list_t **); #define ZFS_MOUNTPOINT_NONE "none" #define ZFS_MOUNTPOINT_LEGACY "legacy" @@ -567,7 +588,7 @@ _LIBZFS_H void zfs_prune_proplist(zfs_handle_t *, uint8_t *); * zpool property management */ _LIBZFS_H int zpool_expand_proplist(zpool_handle_t *, zprop_list_t **, - boolean_t); + zfs_type_t, boolean_t); _LIBZFS_H int zpool_prop_get_feature(zpool_handle_t *, const char *, char *, size_t); _LIBZFS_H const char *zpool_prop_default_string(zpool_prop_t); @@ -598,6 +619,12 @@ typedef enum { /* * Functions for printing zfs or zpool properties */ +typedef struct vdev_cbdata { + int cb_name_flags; + char **cb_names; + unsigned int cb_names_count; +} vdev_cbdata_t; + typedef struct zprop_get_cbdata { int cb_sources; zfs_get_column_t cb_columns[ZFS_GET_NCOLS]; @@ -607,6 +634,7 @@ typedef struct zprop_get_cbdata { boolean_t cb_first; zprop_list_t *cb_proplist; zfs_type_t cb_type; + vdev_cbdata_t cb_vdevs; } zprop_get_cbdata_t; _LIBZFS_H void zprop_print_one_property(const char *, zprop_get_cbdata_t *, @@ -879,7 +907,7 @@ _LIBZFS_H void zfs_commit_shares(const char *); _LIBZFS_H int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *); /* - * Utility functions to run an external process. + * Utility functions to run an _LIBZFS_Hal process. */ #define STDOUT_VERBOSE 0x01 #define STDERR_VERBOSE 0x02 diff --git a/include/libzfs_core.h b/include/libzfs_core.h index 9020d70db3..7acc03fc71 100644 --- a/include/libzfs_core.h +++ b/include/libzfs_core.h @@ -146,6 +146,10 @@ _LIBZFS_CORE_H int lzc_wait_fs(const char *, zfs_wait_activity_t, boolean_t *); _LIBZFS_CORE_H int lzc_set_bootenv(const char *, const nvlist_t *); _LIBZFS_CORE_H int lzc_get_bootenv(const char *, nvlist_t **); + +_LIBZFS_CORE_H int lzc_get_vdev_prop(const char *, nvlist_t *, nvlist_t **); +_LIBZFS_CORE_H int lzc_set_vdev_prop(const char *, nvlist_t *, nvlist_t **); + #ifdef __cplusplus } #endif diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 2af11fc719..287b3beae9 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -54,7 +54,8 @@ typedef enum { ZFS_TYPE_SNAPSHOT = (1 << 1), ZFS_TYPE_VOLUME = (1 << 2), ZFS_TYPE_POOL = (1 << 3), - ZFS_TYPE_BOOKMARK = (1 << 4) + ZFS_TYPE_BOOKMARK = (1 << 4), + ZFS_TYPE_VDEV = (1 << 5), } zfs_type_t; /* @@ -252,6 +253,7 @@ typedef enum { /* Small enough to not hog a whole line of printout in zpool(8). */ #define ZPROP_MAX_COMMENT 32 +#define ZPROP_BOOLEAN_NA 2 #define ZPROP_VALUE "value" #define ZPROP_SOURCE "source" @@ -298,6 +300,59 @@ typedef int (*zprop_func)(int, void *); */ #define ZFS_WRITTEN_PROP_PREFIX_LEN 8 +/* + * VDEV properties are identified by these constants and must be added to the + * end of this list to ensure that external consumers are not affected + * by the change. If you make any changes to this list, be sure to update + * the property table in usr/src/common/zfs/zpool_prop.c. + */ +typedef enum { + VDEV_PROP_INVAL = -1, +#define VDEV_PROP_USER VDEV_PROP_INVAL + VDEV_PROP_NAME, + VDEV_PROP_CAPACITY, + VDEV_PROP_STATE, + VDEV_PROP_GUID, + VDEV_PROP_ASIZE, + VDEV_PROP_PSIZE, + VDEV_PROP_ASHIFT, + VDEV_PROP_SIZE, + VDEV_PROP_FREE, + VDEV_PROP_ALLOCATED, + VDEV_PROP_COMMENT, + VDEV_PROP_EXPANDSZ, + VDEV_PROP_FRAGMENTATION, + VDEV_PROP_BOOTSIZE, + VDEV_PROP_PARITY, + VDEV_PROP_PATH, + VDEV_PROP_DEVID, + VDEV_PROP_PHYS_PATH, + VDEV_PROP_ENC_PATH, + VDEV_PROP_FRU, + VDEV_PROP_PARENT, + VDEV_PROP_CHILDREN, + VDEV_PROP_NUMCHILDREN, + VDEV_PROP_READ_ERRORS, + VDEV_PROP_WRITE_ERRORS, + VDEV_PROP_CHECKSUM_ERRORS, + VDEV_PROP_INITIALIZE_ERRORS, + VDEV_PROP_OPS_NULL, + VDEV_PROP_OPS_READ, + VDEV_PROP_OPS_WRITE, + VDEV_PROP_OPS_FREE, + VDEV_PROP_OPS_CLAIM, + VDEV_PROP_OPS_TRIM, + VDEV_PROP_BYTES_NULL, + VDEV_PROP_BYTES_READ, + VDEV_PROP_BYTES_WRITE, + VDEV_PROP_BYTES_FREE, + VDEV_PROP_BYTES_CLAIM, + VDEV_PROP_BYTES_TRIM, + VDEV_PROP_REMOVING, + VDEV_PROP_ALLOCATING, + VDEV_NUM_PROPS +} vdev_prop_t; + /* * Dataset property functions shared between libzfs and kernel. */ @@ -337,6 +392,22 @@ _SYS_FS_ZFS_H int zpool_prop_string_to_index(zpool_prop_t, const char *, uint64_t *); _SYS_FS_ZFS_H uint64_t zpool_prop_random_value(zpool_prop_t, uint64_t seed); +/* + * VDEV property functions shared between libzfs and kernel. + */ +_SYS_FS_ZFS_H vdev_prop_t vdev_name_to_prop(const char *); +_SYS_FS_ZFS_H boolean_t vdev_prop_user(const char *name); +_SYS_FS_ZFS_H const char *vdev_prop_to_name(vdev_prop_t); +_SYS_FS_ZFS_H const char *vdev_prop_default_string(vdev_prop_t); +_SYS_FS_ZFS_H uint64_t vdev_prop_default_numeric(vdev_prop_t); +_SYS_FS_ZFS_H boolean_t vdev_prop_readonly(vdev_prop_t prop); +_SYS_FS_ZFS_H int vdev_prop_index_to_string(vdev_prop_t, uint64_t, + const char **); +_SYS_FS_ZFS_H int vdev_prop_string_to_index(vdev_prop_t, const char *, + uint64_t *); +_SYS_FS_ZFS_H boolean_t zpool_prop_vdev(const char *name); +_SYS_FS_ZFS_H uint64_t vdev_prop_random_value(vdev_prop_t prop, uint64_t seed); + /* * Definitions for the Delegation. */ @@ -712,6 +783,7 @@ typedef struct zpool_load_policy { #define ZPOOL_CONFIG_ORIG_GUID "orig_guid" #define ZPOOL_CONFIG_SPLIT_GUID "split_guid" #define ZPOOL_CONFIG_SPLIT_LIST "guid_list" +#define ZPOOL_CONFIG_NONALLOCATING "non_allocating" #define ZPOOL_CONFIG_REMOVING "removing" #define ZPOOL_CONFIG_RESILVER_TXG "resilver_txg" #define ZPOOL_CONFIG_REBUILD_TXG "rebuild_txg" @@ -1109,6 +1181,7 @@ typedef struct vdev_stat { uint64_t vs_configured_ashift; /* TLV vdev_ashift */ uint64_t vs_logical_ashift; /* vdev_logical_ashift */ uint64_t vs_physical_ashift; /* vdev_physical_ashift */ + uint64_t vs_noalloc; /* allocations halted? */ } vdev_stat_t; /* BEGIN CSTYLED */ @@ -1362,6 +1435,8 @@ typedef enum zfs_ioc { ZFS_IOC_GET_BOOKMARK_PROPS, /* 0x5a52 */ ZFS_IOC_WAIT, /* 0x5a53 */ ZFS_IOC_WAIT_FS, /* 0x5a54 */ + ZFS_IOC_VDEV_GET_PROPS, /* 0x5a55 */ + ZFS_IOC_VDEV_SET_PROPS, /* 0x5a56 */ /* * Per-platform (Optional) - 8/128 numbers reserved. @@ -1417,6 +1492,7 @@ typedef enum { ZFS_ERR_RESILVER_IN_PROGRESS, ZFS_ERR_REBUILD_IN_PROGRESS, ZFS_ERR_BADPROP, + ZFS_ERR_VDEV_NOTSUP, } zfs_errno_t; /* @@ -1508,6 +1584,18 @@ typedef enum { #define ZPOOL_WAIT_TAG "wait_tag" #define ZPOOL_WAIT_WAITED "wait_waited" +/* + * The following are names used when invoking ZFS_IOC_VDEV_GET_PROP. + */ +#define ZPOOL_VDEV_PROPS_GET_VDEV "vdevprops_get_vdev" +#define ZPOOL_VDEV_PROPS_GET_PROPS "vdevprops_get_props" + +/* + * The following are names used when invoking ZFS_IOC_VDEV_SET_PROP. + */ +#define ZPOOL_VDEV_PROPS_SET_VDEV "vdevprops_set_vdev" +#define ZPOOL_VDEV_PROPS_SET_PROPS "vdevprops_set_props" + /* * The following are names used when invoking ZFS_IOC_WAIT_FS. */ diff --git a/include/sys/spa.h b/include/sys/spa.h index a55dbd66d8..2e365eabe2 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -792,7 +792,8 @@ extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing, int rebuild); extern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done); -extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare); +extern int spa_vdev_alloc(spa_t *spa, uint64_t guid); +extern int spa_vdev_noalloc(spa_t *spa, uint64_t guid); extern boolean_t spa_vdev_remove_active(spa_t *spa); extern int spa_vdev_initialize(spa_t *spa, nvlist_t *nv, uint64_t cmd_type, nvlist_t *vdev_errlist); diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 9714bbce9c..eee4783fe3 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -308,6 +308,7 @@ struct spa { uint64_t spa_missing_tvds; /* unopenable tvds on load */ uint64_t spa_missing_tvds_allowed; /* allow loading spa? */ + uint64_t spa_nonallocating_dspace; spa_removing_phys_t spa_removing_phys; spa_vdev_removal_t *spa_vdev_removal; diff --git a/include/sys/vdev.h b/include/sys/vdev.h index 0a81713a44..4e507d0819 100644 --- a/include/sys/vdev.h +++ b/include/sys/vdev.h @@ -219,6 +219,9 @@ typedef enum { extern int vdev_label_init(vdev_t *vd, uint64_t txg, vdev_labeltype_t reason); +extern int vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl); +extern int vdev_prop_get(vdev_t *vd, nvlist_t *nvprops, nvlist_t *outnvl); + #ifdef __cplusplus } #endif diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 3cfde40a77..86959725a5 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -295,6 +295,7 @@ struct vdev { list_node_t vdev_state_dirty_node; /* state dirty list */ uint64_t vdev_deflate_ratio; /* deflation ratio (x512) */ uint64_t vdev_islog; /* is an intent log device */ + uint64_t vdev_noalloc; /* device is passivated? */ uint64_t vdev_removing; /* device is being removed? */ boolean_t vdev_ishole; /* is a hole in the namespace */ uint64_t vdev_top_zap; diff --git a/include/sys/zfs_sysfs.h b/include/sys/zfs_sysfs.h index 14ba61fc4b..912ef234f8 100644 --- a/include/sys/zfs_sysfs.h +++ b/include/sys/zfs_sysfs.h @@ -39,6 +39,7 @@ _SYS_ZFS_SYSFS_H boolean_t zfs_mod_supported(const char *, const char *); #endif #define ZFS_SYSFS_POOL_PROPERTIES "properties.pool" +#define ZFS_SYSFS_VDEV_PROPERTIES "properties.vdev" #define ZFS_SYSFS_DATASET_PROPERTIES "properties.dataset" #define ZFS_SYSFS_KERNEL_FEATURES "features.kernel" #define ZFS_SYSFS_POOL_FEATURES "features.pool" diff --git a/include/zfs_prop.h b/include/zfs_prop.h index 91b5032e70..8014c757aa 100644 --- a/include/zfs_prop.h +++ b/include/zfs_prop.h @@ -99,6 +99,13 @@ _ZFS_PROP_H void zpool_prop_init(void); _ZFS_PROP_H zprop_type_t zpool_prop_get_type(zpool_prop_t); _ZFS_PROP_H zprop_desc_t *zpool_prop_get_table(void); +/* + * vdev property functions + */ +_ZFS_PROP_H void vdev_prop_init(void); +_ZFS_PROP_H zprop_type_t vdev_prop_get_type(vdev_prop_t prop); +_ZFS_PROP_H zprop_desc_t *vdev_prop_get_table(void); + /* * Common routines to initialize property tables */ @@ -122,11 +129,13 @@ _ZFS_PROP_H int zprop_iter_common(zprop_func, void *, boolean_t, boolean_t, _ZFS_PROP_H int zprop_name_to_prop(const char *, zfs_type_t); _ZFS_PROP_H int zprop_string_to_index(int, const char *, uint64_t *, zfs_type_t); -_ZFS_PROP_H int zprop_index_to_string(int, uint64_t, const char **, zfs_type_t); +_ZFS_PROP_H int zprop_index_to_string(int, uint64_t, const char **, + zfs_type_t); _ZFS_PROP_H uint64_t zprop_random_value(int, uint64_t, zfs_type_t); _ZFS_PROP_H const char *zprop_values(int, zfs_type_t); _ZFS_PROP_H size_t zprop_width(int, boolean_t *, zfs_type_t); _ZFS_PROP_H boolean_t zprop_valid_for_type(int, zfs_type_t, boolean_t); +_ZFS_PROP_H int zprop_valid_char(char c); #ifdef __cplusplus } diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index ab6d27e913..8a696206a5 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -259,6 +259,22 @@ + + + + + + + + + + + + + + + + @@ -463,6 +479,7 @@ + @@ -477,6 +494,8 @@ + + @@ -514,6 +533,7 @@ + @@ -523,6 +543,7 @@ + @@ -557,6 +578,7 @@ + @@ -2683,6 +2705,73 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -2784,6 +2873,10 @@ + + + + @@ -2886,6 +2979,7 @@ + @@ -4216,6 +4310,53 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -4342,9 +4483,16 @@ + + + + + + + @@ -4680,6 +4828,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -4922,7 +5104,19 @@ - + + + + + + + + + + + + + @@ -4947,6 +5141,9 @@ + + + diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 8ed96275c4..6e302ad4b3 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -29,6 +29,7 @@ * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2018, loli10K * Copyright (c) 2021, Colm Buckley + * Copyright (c) 2021, Klara Inc. */ #include @@ -61,6 +62,7 @@ static boolean_t zpool_vdev_is_interior(const char *name); typedef struct prop_flags { int create:1; /* Validate property on creation */ int import:1; /* Validate property on import */ + int vdevprop:1; /* Validate property as a VDEV property */ } prop_flags_t; /* @@ -478,6 +480,35 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname, while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { const char *propname = nvpair_name(elem); + if (flags.vdevprop && zpool_prop_vdev(propname)) { + vdev_prop_t vprop = vdev_name_to_prop(propname); + + if (vdev_prop_readonly(vprop)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' " + "is readonly"), propname); + (void) zfs_error(hdl, EZFS_PROPREADONLY, + errbuf); + goto error; + } + + if (zprop_parse_value(hdl, elem, vprop, ZFS_TYPE_VDEV, + retprops, &strval, &intval, errbuf) != 0) + goto error; + + continue; + } else if (flags.vdevprop && vdev_prop_user(propname)) { + if (nvlist_add_nvpair(retprops, elem) != 0) { + (void) no_memory(hdl); + goto error; + } + continue; + } else if (flags.vdevprop) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid property: '%s'"), propname); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + prop = zpool_name_to_prop(propname); if (prop == ZPOOL_PROP_INVAL && zpool_prop_feature(propname)) { int err; @@ -806,7 +837,7 @@ zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval) int zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp, - boolean_t literal) + zfs_type_t type, boolean_t literal) { libzfs_handle_t *hdl = zhp->zpool_hdl; zprop_list_t *entry; @@ -817,9 +848,12 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp, boolean_t firstexpand = (NULL == *plp); int i; - if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0) + if (zprop_expand_list(hdl, plp, type) != 0) return (-1); + if (type == ZFS_TYPE_VDEV) + return (0); + last = plp; while (*last != NULL) last = &(*last)->pl_next; @@ -899,6 +933,77 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp, return (0); } +int +vdev_expand_proplist(zpool_handle_t *zhp, const char *vdevname, + zprop_list_t **plp) +{ + zprop_list_t *entry; + char buf[ZFS_MAXPROPLEN]; + char *strval = NULL; + int err = 0; + nvpair_t *elem = NULL; + nvlist_t *vprops = NULL; + nvlist_t *propval = NULL; + const char *propname; + vdev_prop_t prop; + zprop_list_t **last; + + for (entry = *plp; entry != NULL; entry = entry->pl_next) { + if (entry->pl_fixed) + continue; + + if (zpool_get_vdev_prop(zhp, vdevname, entry->pl_prop, + entry->pl_user_prop, buf, sizeof (buf), NULL, + B_FALSE) == 0) { + if (strlen(buf) > entry->pl_width) + entry->pl_width = strlen(buf); + } + if (entry->pl_prop == VDEV_PROP_NAME && + strlen(vdevname) > entry->pl_width) + entry->pl_width = strlen(vdevname); + } + + /* Handle the all properties case */ + last = plp; + if (*last != NULL && (*last)->pl_all == B_TRUE) { + while (*last != NULL) + last = &(*last)->pl_next; + + err = zpool_get_all_vdev_props(zhp, vdevname, &vprops); + if (err != 0) + return (err); + + while ((elem = nvlist_next_nvpair(vprops, elem)) != NULL) { + propname = nvpair_name(elem); + + /* Skip properties that are not user defined */ + if ((prop = vdev_name_to_prop(propname)) != + VDEV_PROP_USER) + continue; + + if (nvpair_value_nvlist(elem, &propval) != 0) + continue; + + verify(nvlist_lookup_string(propval, ZPROP_VALUE, + &strval) == 0); + + if ((entry = zfs_alloc(zhp->zpool_hdl, + sizeof (zprop_list_t))) == NULL) + return (ENOMEM); + + entry->pl_prop = prop; + entry->pl_user_prop = zfs_strdup(zhp->zpool_hdl, + propname); + entry->pl_width = strlen(strval); + entry->pl_all = B_TRUE; + *last = entry; + last = &entry->pl_next; + } + } + + return (0); +} + /* * Get the state for the given feature on the given ZFS pool. */ @@ -4959,3 +5064,353 @@ zpool_load_compat(const char *compat, boolean_t *features, char *report, strlcpy(report, gettext("compatibility set ok"), rlen); return (ZPOOL_COMPATIBILITY_OK); } + +static int +zpool_vdev_guid(zpool_handle_t *zhp, const char *vdevname, uint64_t *vdev_guid) +{ + nvlist_t *tgt; + boolean_t avail_spare, l2cache; + + verify(zhp != NULL); + if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "pool is in an unavailable state")); + return (zfs_error(zhp->zpool_hdl, EZFS_POOLUNAVAIL, errbuf)); + } + + if ((tgt = zpool_find_vdev(zhp, vdevname, &avail_spare, &l2cache, + NULL)) == NULL) { + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "can not find %s in %s"), + vdevname, zhp->zpool_name); + return (zfs_error(zhp->zpool_hdl, EZFS_NODEVICE, errbuf)); + } + + verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, vdev_guid) == 0); + return (0); +} + +/* + * Get a vdev property value for 'prop' and return the value in + * a pre-allocated buffer. + */ +int +zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name, + char *buf, size_t len, zprop_source_t *srctype, boolean_t literal) +{ + nvlist_t *nv; + uint64_t intval; + char *strval; + zprop_source_t src = ZPROP_SRC_NONE; + + if (prop == VDEV_PROP_USER) { + /* user property, prop_name must contain the property name */ + assert(prop_name != NULL); + if (nvlist_lookup_nvlist(nvprop, prop_name, &nv) == 0) { + verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, + &intval) == 0); + src = intval; + verify(nvlist_lookup_string(nv, ZPROP_VALUE, + &strval) == 0); + } else { + /* user prop not found */ + return (-1); + } + (void) strlcpy(buf, strval, len); + if (srctype) + *srctype = src; + return (0); + } + + if (prop_name == NULL) + prop_name = (char *)vdev_prop_to_name(prop); + + switch (vdev_prop_get_type(prop)) { + case PROP_TYPE_STRING: + if (nvlist_lookup_nvlist(nvprop, prop_name, &nv) == 0) { + verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, + &intval) == 0); + src = intval; + verify(nvlist_lookup_string(nv, ZPROP_VALUE, + &strval) == 0); + } else { + src = ZPROP_SRC_DEFAULT; + if ((strval = (char *)vdev_prop_default_string(prop)) + == NULL) + strval = "-"; + } + (void) strlcpy(buf, strval, len); + break; + + case PROP_TYPE_NUMBER: + if (nvlist_lookup_nvlist(nvprop, prop_name, &nv) == 0) { + verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, + &intval) == 0); + src = intval; + verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, + &intval) == 0); + } else { + src = ZPROP_SRC_DEFAULT; + intval = vdev_prop_default_numeric(prop); + } + + switch (prop) { + case VDEV_PROP_ASIZE: + case VDEV_PROP_PSIZE: + case VDEV_PROP_SIZE: + case VDEV_PROP_ALLOCATED: + case VDEV_PROP_FREE: + case VDEV_PROP_READ_ERRORS: + case VDEV_PROP_WRITE_ERRORS: + case VDEV_PROP_CHECKSUM_ERRORS: + case VDEV_PROP_INITIALIZE_ERRORS: + case VDEV_PROP_OPS_NULL: + case VDEV_PROP_OPS_READ: + case VDEV_PROP_OPS_WRITE: + case VDEV_PROP_OPS_FREE: + case VDEV_PROP_OPS_CLAIM: + case VDEV_PROP_OPS_TRIM: + case VDEV_PROP_BYTES_NULL: + case VDEV_PROP_BYTES_READ: + case VDEV_PROP_BYTES_WRITE: + case VDEV_PROP_BYTES_FREE: + case VDEV_PROP_BYTES_CLAIM: + case VDEV_PROP_BYTES_TRIM: + if (literal) { + (void) snprintf(buf, len, "%llu", + (u_longlong_t)intval); + } else { + (void) zfs_nicenum(intval, buf, len); + } + break; + case VDEV_PROP_EXPANDSZ: + if (intval == 0) { + (void) strlcpy(buf, "-", len); + } else if (literal) { + (void) snprintf(buf, len, "%llu", + (u_longlong_t)intval); + } else { + (void) zfs_nicenum(intval, buf, len); + } + break; + case VDEV_PROP_CAPACITY: + if (literal) { + (void) snprintf(buf, len, "%llu", + (u_longlong_t)intval); + } else { + (void) snprintf(buf, len, "%llu%%", + (u_longlong_t)intval); + } + break; + case VDEV_PROP_FRAGMENTATION: + if (intval == UINT64_MAX) { + (void) strlcpy(buf, "-", len); + } else { + (void) snprintf(buf, len, "%llu%%", + (u_longlong_t)intval); + } + break; + case VDEV_PROP_STATE: + if (literal) { + (void) snprintf(buf, len, "%llu", + (u_longlong_t)intval); + } else { + (void) strlcpy(buf, zpool_state_to_name(intval, + VDEV_AUX_NONE), len); + } + break; + default: + (void) snprintf(buf, len, "%llu", + (u_longlong_t)intval); + } + break; + + case PROP_TYPE_INDEX: + if (nvlist_lookup_nvlist(nvprop, prop_name, &nv) == 0) { + verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, + &intval) == 0); + src = intval; + verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, + &intval) == 0); + } else { + src = ZPROP_SRC_DEFAULT; + intval = vdev_prop_default_numeric(prop); + } + if (vdev_prop_index_to_string(prop, intval, + (const char **)&strval) != 0) + return (-1); + (void) strlcpy(buf, strval, len); + break; + + default: + abort(); + } + + if (srctype) + *srctype = src; + + return (0); +} + +/* + * Get a vdev property value for 'prop_name' and return the value in + * a pre-allocated buffer. + */ +int +zpool_get_vdev_prop(zpool_handle_t *zhp, const char *vdevname, vdev_prop_t prop, + char *prop_name, char *buf, size_t len, zprop_source_t *srctype, + boolean_t literal) +{ + nvlist_t *reqnvl, *reqprops; + nvlist_t *retprops = NULL; + uint64_t vdev_guid; + int ret; + + if ((ret = zpool_vdev_guid(zhp, vdevname, &vdev_guid)) != 0) + return (ret); + + if (nvlist_alloc(&reqnvl, NV_UNIQUE_NAME, 0) != 0) + return (no_memory(zhp->zpool_hdl)); + if (nvlist_alloc(&reqprops, NV_UNIQUE_NAME, 0) != 0) + return (no_memory(zhp->zpool_hdl)); + + fnvlist_add_uint64(reqnvl, ZPOOL_VDEV_PROPS_GET_VDEV, vdev_guid); + + if (prop != VDEV_PROP_USER) { + /* prop_name overrides prop value */ + if (prop_name != NULL) + prop = vdev_name_to_prop(prop_name); + else + prop_name = (char *)vdev_prop_to_name(prop); + assert(prop < VDEV_NUM_PROPS); + } + + assert(prop_name != NULL); + if (nvlist_add_uint64(reqprops, prop_name, prop) != 0) { + nvlist_free(reqnvl); + nvlist_free(reqprops); + return (no_memory(zhp->zpool_hdl)); + } + + fnvlist_add_nvlist(reqnvl, ZPOOL_VDEV_PROPS_GET_PROPS, reqprops); + + ret = lzc_get_vdev_prop(zhp->zpool_name, reqnvl, &retprops); + + if (ret == 0) { + ret = zpool_get_vdev_prop_value(retprops, prop, prop_name, buf, + len, srctype, literal); + } else { + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot get vdev property %s from" + " %s in %s"), prop_name, vdevname, zhp->zpool_name); + (void) zpool_standard_error(zhp->zpool_hdl, ret, errbuf); + } + + nvlist_free(reqnvl); + nvlist_free(reqprops); + nvlist_free(retprops); + + return (ret); +} + +/* + * Get all vdev properties + */ +int +zpool_get_all_vdev_props(zpool_handle_t *zhp, const char *vdevname, + nvlist_t **outnvl) +{ + nvlist_t *nvl = NULL; + uint64_t vdev_guid; + int ret; + + if ((ret = zpool_vdev_guid(zhp, vdevname, &vdev_guid)) != 0) + return (ret); + + if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) + return (no_memory(zhp->zpool_hdl)); + + fnvlist_add_uint64(nvl, ZPOOL_VDEV_PROPS_GET_VDEV, vdev_guid); + + ret = lzc_get_vdev_prop(zhp->zpool_name, nvl, outnvl); + + nvlist_free(nvl); + + if (ret) { + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot get vdev properties for" + " %s in %s"), vdevname, zhp->zpool_name); + (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf); + } + + return (ret); +} + +/* + * Set vdev property + */ +int +zpool_set_vdev_prop(zpool_handle_t *zhp, const char *vdevname, + const char *propname, const char *propval) +{ + int ret; + vdev_prop_t vprop; + nvlist_t *nvl = NULL; + nvlist_t *outnvl = NULL; + nvlist_t *props; + nvlist_t *realprops; + prop_flags_t flags = { 0 }; + uint64_t version; + uint64_t vdev_guid; + + if ((ret = zpool_vdev_guid(zhp, vdevname, &vdev_guid)) != 0) + return (ret); + + vprop = vdev_name_to_prop(propname); + + if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) + return (no_memory(zhp->zpool_hdl)); + if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) + return (no_memory(zhp->zpool_hdl)); + + fnvlist_add_uint64(nvl, ZPOOL_VDEV_PROPS_SET_VDEV, vdev_guid); + + if (nvlist_add_string(props, propname, propval) != 0) { + nvlist_free(props); + return (no_memory(zhp->zpool_hdl)); + } + + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot set property %s for %s on %s"), + propname, vdevname, zhp->zpool_name); + + flags.vdevprop = 1; + version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL); + if ((realprops = zpool_valid_proplist(zhp->zpool_hdl, + zhp->zpool_name, props, version, flags, errbuf)) == NULL) { + nvlist_free(props); + nvlist_free(nvl); + return (-1); + } + + nvlist_free(props); + props = realprops; + + fnvlist_add_nvlist(nvl, ZPOOL_VDEV_PROPS_SET_PROPS, props); + + ret = lzc_set_vdev_prop(zhp->zpool_name, nvl, &outnvl); + + nvlist_free(props); + nvlist_free(nvl); + nvlist_free(outnvl); + + if (ret) + (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf); + + return (ret); +} diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index c3c009ae3a..b3f39afe21 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -296,6 +296,9 @@ libzfs_error_description(libzfs_handle_t *hdl) case EZFS_REBUILDING: return (dgettext(TEXT_DOMAIN, "currently sequentially " "resilvering")); + case EZFS_VDEV_NOTSUP: + return (dgettext(TEXT_DOMAIN, "operation not supported " + "on this type of vdev")); case EZFS_UNKNOWN: return (dgettext(TEXT_DOMAIN, "unknown error")); default: @@ -716,6 +719,9 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) case ZFS_ERR_BADPROP: zfs_verror(hdl, EZFS_BADPROP, fmt, ap); break; + case ZFS_ERR_VDEV_NOTSUP: + zfs_verror(hdl, EZFS_VDEV_NOTSUP, fmt, ap); + break; case ZFS_ERR_IOC_CMD_UNAVAIL: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs " "module does not support this operation. A reboot may " @@ -1034,6 +1040,7 @@ libzfs_init(void) zfs_prop_init(); zpool_prop_init(); zpool_feature_init(); + vdev_prop_init(); libzfs_mnttab_init(hdl); fletcher_4_init(); @@ -1267,7 +1274,8 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type) /* first property is always NAME */ assert(cbp->cb_proplist->pl_prop == - ((type == ZFS_TYPE_POOL) ? ZPOOL_PROP_NAME : ZFS_PROP_NAME)); + ((type == ZFS_TYPE_POOL) ? ZPOOL_PROP_NAME : + ((type == ZFS_TYPE_VDEV) ? VDEV_PROP_NAME : ZFS_PROP_NAME))); /* * Go through and calculate the widths for each column. For the @@ -1284,12 +1292,16 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type) if (pl->pl_prop != ZPROP_INVAL) { const char *propname = (type == ZFS_TYPE_POOL) ? zpool_prop_to_name(pl->pl_prop) : - zfs_prop_to_name(pl->pl_prop); + ((type == ZFS_TYPE_VDEV) ? + vdev_prop_to_name(pl->pl_prop) : + zfs_prop_to_name(pl->pl_prop)); + assert(propname != NULL); len = strlen(propname); if (len > cbp->cb_colwidths[GET_COL_PROPERTY]) cbp->cb_colwidths[GET_COL_PROPERTY] = len; } else { + assert(pl->pl_user_prop != NULL); len = strlen(pl->pl_user_prop); if (len > cbp->cb_colwidths[GET_COL_PROPERTY]) cbp->cb_colwidths[GET_COL_PROPERTY] = len; @@ -1314,9 +1326,10 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type) /* * 'NAME' and 'SOURCE' columns */ - if (pl->pl_prop == (type == ZFS_TYPE_POOL ? ZPOOL_PROP_NAME : - ZFS_PROP_NAME) && - pl->pl_width > cbp->cb_colwidths[GET_COL_NAME]) { + if (pl->pl_prop == ((type == ZFS_TYPE_POOL) ? ZPOOL_PROP_NAME : + ((type == ZFS_TYPE_VDEV) ? VDEV_PROP_NAME : + ZFS_PROP_NAME)) && pl->pl_width > + cbp->cb_colwidths[GET_COL_NAME]) { cbp->cb_colwidths[GET_COL_NAME] = pl->pl_width; cbp->cb_colwidths[GET_COL_SOURCE] = pl->pl_width + strlen(dgettext(TEXT_DOMAIN, "inherited from")); @@ -1597,6 +1610,9 @@ zprop_parse_value(libzfs_handle_t *hdl, nvpair_t *elem, int prop, if (type == ZFS_TYPE_POOL) { proptype = zpool_prop_get_type(prop); propname = zpool_prop_to_name(prop); + } else if (type == ZFS_TYPE_VDEV) { + proptype = vdev_prop_get_type(prop); + propname = vdev_prop_to_name(prop); } else { proptype = zfs_prop_get_type(prop); propname = zfs_prop_to_name(prop); @@ -1747,15 +1763,15 @@ addlist(libzfs_handle_t *hdl, char *propname, zprop_list_t **listp, prop = ZPROP_INVAL; /* - * When no property table entry can be found, return failure if - * this is a pool property or if this isn't a user-defined - * dataset property, + * Return failure if no property table entry was found and this isn't + * a user-defined property. */ if (prop == ZPROP_INVAL && ((type == ZFS_TYPE_POOL && !zpool_prop_feature(propname) && !zpool_prop_unsupported(propname)) || - (type == ZFS_TYPE_DATASET && !zfs_prop_user(propname) && - !zfs_prop_userquota(propname) && !zfs_prop_written(propname)))) { + ((type == ZFS_TYPE_DATASET) && !zfs_prop_user(propname) && + !zfs_prop_userquota(propname) && !zfs_prop_written(propname)) || + ((type == ZFS_TYPE_VDEV) && !vdev_prop_user(propname)))) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid property '%s'"), propname); return (zfs_error(hdl, EZFS_BADPROP, @@ -1938,8 +1954,8 @@ zprop_expand_list(libzfs_handle_t *hdl, zprop_list_t **plp, zfs_type_t type) if ((entry = zfs_alloc(hdl, sizeof (zprop_list_t))) == NULL) return (-1); - entry->pl_prop = (type == ZFS_TYPE_POOL) ? ZPOOL_PROP_NAME : - ZFS_PROP_NAME; + entry->pl_prop = ((type == ZFS_TYPE_POOL) ? ZPOOL_PROP_NAME : + ((type == ZFS_TYPE_VDEV) ? VDEV_PROP_NAME : ZFS_PROP_NAME)); entry->pl_width = zprop_width(entry->pl_prop, &entry->pl_fixed, type); entry->pl_all = B_TRUE; diff --git a/lib/libzfs/os/freebsd/libzfs_compat.c b/lib/libzfs/os/freebsd/libzfs_compat.c index f143f9cb63..e3f17662a1 100644 --- a/lib/libzfs/os/freebsd/libzfs_compat.c +++ b/lib/libzfs/os/freebsd/libzfs_compat.c @@ -305,6 +305,10 @@ zfs_jail(zfs_handle_t *zhp, int jailid, int attach) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "bookmarks can not be jailed")); return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + case ZFS_TYPE_VDEV: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "vdevs can not be jailed")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); case ZFS_TYPE_POOL: case ZFS_TYPE_FILESYSTEM: /* OK */ diff --git a/lib/libzfs_core/libzfs_core.abi b/lib/libzfs_core/libzfs_core.abi index 5bed6c8e0f..4f4d7f6ab7 100644 --- a/lib/libzfs_core/libzfs_core.abi +++ b/lib/libzfs_core/libzfs_core.abi @@ -167,6 +167,7 @@ + @@ -192,6 +193,7 @@ + @@ -1823,6 +1825,18 @@ + + + + + + + + + + + + diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c index cbe486d08b..c7c4482a0c 100644 --- a/lib/libzfs_core/libzfs_core.c +++ b/lib/libzfs_core/libzfs_core.c @@ -1394,6 +1394,18 @@ lzc_channel_program_nosync(const char *pool, const char *program, memlimit, argnvl, outnvl)); } +int +lzc_get_vdev_prop(const char *poolname, nvlist_t *innvl, nvlist_t **outnvl) +{ + return (lzc_ioctl(ZFS_IOC_VDEV_GET_PROPS, poolname, innvl, outnvl)); +} + +int +lzc_set_vdev_prop(const char *poolname, nvlist_t *innvl, nvlist_t **outnvl) +{ + return (lzc_ioctl(ZFS_IOC_VDEV_SET_PROPS, poolname, innvl, outnvl)); +} + /* * Performs key management functions * diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c index 9eb55aaf77..f637e680c8 100644 --- a/lib/libzutil/zutil_import.c +++ b/lib/libzutil/zutil_import.c @@ -1888,6 +1888,15 @@ for_each_vdev_cb(void *zhp, nvlist_t *nv, pool_vdev_iter_f func, ZPOOL_CONFIG_CHILDREN }; + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0) + return (ret); + + /* Don't run our function on root or indirect vdevs */ + if ((strcmp(type, VDEV_TYPE_ROOT) != 0) && + (strcmp(type, VDEV_TYPE_INDIRECT) != 0)) { + ret |= func(zhp, nv, data); + } + for (i = 0; i < ARRAY_SIZE(list); i++) { if (nvlist_lookup_nvlist_array(nv, list[i], &child, &children) == 0) { @@ -1906,14 +1915,6 @@ for_each_vdev_cb(void *zhp, nvlist_t *nv, pool_vdev_iter_f func, } } - if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0) - return (ret); - - /* Don't run our function on root vdevs */ - if (strcmp(type, VDEV_TYPE_ROOT) != 0) { - ret |= func(zhp, nv, data); - } - return (ret); } diff --git a/man/man7/vdevprops.7 b/man/man7/vdevprops.7 new file mode 100644 index 0000000000..ec7b52955e --- /dev/null +++ b/man/man7/vdevprops.7 @@ -0,0 +1,172 @@ +.\" +.\" CDDL HEADER START +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +.\" or http://www.opensolaris.org/os/licensing. +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each +.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" CDDL HEADER END +.\" +.\" Copyright (c) 2021 Klara, Inc. +.\" +.Dd November 27, 2021 +.Dt VDEVPROPS 7 +.Os +. +.Sh NAME +.Nm vdevprops +.Nd native and user-defined properties of ZFS vdevs +. +.Sh DESCRIPTION +Properties are divided into two types, native properties and user-defined +.Pq or Qq user +properties. +Native properties either export internal statistics or control ZFS behavior. +In addition, native properties are either editable or read-only. +User properties have no effect on ZFS behavior, but you can use them to annotate +vdevs in a way that is meaningful in your environment. +For more information about user properties, see the +.Sx User Properties +section, below. +. +.Ss Native Properties +Every vdev has a set of properties that export statistics about the vdev +as well as control various behaviors. +Properties are NOT inherited from top-level vdevs. +.Pp +The values of numeric properties can be specified using human-readable suffixes +.Po for example, +.Sy k , KB , M , Gb , +and so forth, up to +.Sy Z +for zettabyte +.Pc . +The following are all valid +.Pq and equal +specifications: +.Li 1536M , 1.5g , 1.50GB . +.Pp +The values of non-numeric properties are case sensitive and must be lowercase. +.Pp +The following native properties consist of read-only statistics about the +vdev. +These properties can not be changed. +.Bl -tag -width "fragmentation" +.It Sy capacity +Percentage of vdev space used +.It Sy state +state of this vdev such as online, faulted, or offline +.It Sy guid +globaly unique id of this vdev +.It Sy asize +The allocable size of this vdev +.It Sy psize +The physical size of this vdev +.It Sy ashift +The physical sector size of this vdev expressed as the power of two +.It Sy size +The total size of this vdev +.It Sy free +The amount of remaining free space on this vdev +.It Sy allocated +The amount of allocated space on this vdev +.It Sy expandsize +How much this vdev can expand by +.It Sy fragmentation +Percent of fragmentation in this vdev +.It Sy parity +The level of parity for this vdev +.It Sy devid +The device id for this vdev +.It Sy physpath +The physical path to the device +.It Sy encpath +The enclosure path to the device +.It Sy fru +Field Replacable Unit, usually a model number +.It Sy parent +Parent of this vdev +.It Sy children +Comma separated list of children of this vdev +.It Sy numchildren +The number of children belonging to this vdev +.It Sy read_errors , write_errors , checksum_errors , initialize_errors +The number of errors of each type encountered by this vdev +.It Sy null_ops , read_ops , write_ops , free_ops , claim_ops , trim_ops +The number of I/O operations of each type performed by this vdev +.It Xo +.Sy null_bytes , read_bytes , write_bytes , free_bytes , claim_bytes , +.Sy trim_bytes +.Xc +The cumulative size of all operations of each type performed by this vdev +.It Sy removing +If this device is currently being removed from the pool +.El +.Pp +The following native properties can be used to change the behavior of a ZFS +dataset. +.Bl -tag -width "allocating" +.It Sy comment +A text comment up to 8192 characters long +.It Sy bootsize +The amount of space to reserve for the EFI system partition +.It Sy path +The path to the device for this vdev +.It Sy allocating +If this device should perform new allocations, used to disable a device +when it is scheduled for later removal. +See +.Xr zpool-remove 8 . +.El +.Ss User Properties +In addition to the standard native properties, ZFS supports arbitrary user +properties. +User properties have no effect on ZFS behavior, but applications or +administrators can use them to annotate vdevs. +.Pp +User property names must contain a colon +.Pq Qq Sy \&: +character to distinguish them from native properties. +They may contain lowercase letters, numbers, and the following punctuation +characters: colon +.Pq Qq Sy \&: , +dash +.Pq Qq Sy - , +period +.Pq Qq Sy \&. , +and underscore +.Pq Qq Sy _ . +The expected convention is that the property name is divided into two portions +such as +.Ar module : Ns Ar property , +but this namespace is not enforced by ZFS. +User property names can be at most 256 characters, and cannot begin with a dash +.Pq Qq Sy - . +.Pp +When making programmatic use of user properties, it is strongly suggested to use +a reversed DNS domain name for the +.Ar module +component of property names to reduce the chance that two +independently-developed packages use the same property name for different +purposes. +.Pp +The values of user properties are arbitrary strings and +are never validated. +Use the +.Nm zpool Cm set +command with a blank value to clear a user property. +Property values are limited to 8192 bytes. +.Sh SEE ALSO +.Xr zpoolprops 7 , +.Xr zpool-set 8 diff --git a/man/man8/zpool-get.8 b/man/man8/zpool-get.8 index 55904f169e..4ef9a1b5ec 100644 --- a/man/man8/zpool-get.8 +++ b/man/man8/zpool-get.8 @@ -40,11 +40,27 @@ .Op Fl o Ar field Ns Oo , Ns Ar field Oc Ns … .Sy all Ns | Ns Ar property Ns Oo , Ns Ar property Oc Ns … .Oo Ar pool Oc Ns … +. +.Nm zpool +.Cm get +.Op Fl Hp +.Op Fl o Ar field Ns Oo , Ns Ar field Oc Ns … +.Sy all Ns | Ns Ar property Ns Oo , Ns Ar property Oc Ns … +.Ar pool +.Oo Sy all-vdevs Ns | Ns +.Ar vdev Oc Ns … +. .Nm zpool .Cm set .Ar property Ns = Ns Ar value .Ar pool . +.Nm zpool +.Cm set +.Ar property Ns = Ns Ar value +.Ar pool +.Ar vdev +. .Sh DESCRIPTION .Bl -tag -width Ds .It Xo @@ -91,6 +107,56 @@ Display numbers in parsable (exact) values. .El .It Xo .Nm zpool +.Cm get +.Op Fl Hp +.Op Fl o Ar field Ns Oo , Ns Ar field Oc Ns … +.Sy all Ns | Ns Ar property Ns Oo , Ns Ar property Oc Ns … +.Ar pool +.Oo Sy all-vdevs Ns | Ns +.Ar vdev Oc Ns … +.Xc +Retrieves the given list of properties +.Po +or all properties if +.Sy all +is used +.Pc +for the specified vdevs +.Po +or all vdevs if +.Sy all-vdevs +is used +.Pc +in the specified pool. +These properties are displayed with the following fields: +.Bl -tag -compact -offset Ds -width "property" +.It Sy name +Name of vdev. +.It Sy property +Property name. +.It Sy value +Property value. +.It Sy source +Property source, either +.Sy default No or Sy local . +.El +.Pp +See the +.Xr vdevprops 7 +manual page for more information on the available pool properties. +.Bl -tag -compact -offset Ds -width "-o field" +.It Fl H +Scripted mode. +Do not display headers, and separate fields by a single tab instead of arbitrary +space. +.It Fl o Ar field +A comma-separated list of columns to display, defaults to +.Sy name , Ns Sy property , Ns Sy value , Ns Sy source . +.It Fl p +Display numbers in parsable (exact) values. +.El +.It Xo +.Nm zpool .Cm set .Ar property Ns = Ns Ar value .Ar pool @@ -100,9 +166,22 @@ See the .Xr zpoolprops 7 manual page for more information on what properties can be set and acceptable values. +.It Xo +.Nm zpool +.Cm set +.Ar property Ns = Ns Ar value +.Ar pool +.Ar vdev +.Xc +Sets the given property on the specified vdev in the specified pool. +See the +.Xr vdevprops 7 +manual page for more information on what properties can be set and acceptable +values. .El . .Sh SEE ALSO +.Xr vdevprops 7 , .Xr zpool-features 7 , .Xr zpoolprops 7 , .Xr zpool-list 8 diff --git a/module/os/linux/zfs/zfs_sysfs.c b/module/os/linux/zfs/zfs_sysfs.c index fb7c689873..6f71382cf7 100644 --- a/module/os/linux/zfs/zfs_sysfs.c +++ b/module/os/linux/zfs/zfs_sysfs.c @@ -90,6 +90,7 @@ struct zfs_mod_kobj { static zfs_mod_kobj_t kernel_features_kobj; static zfs_mod_kobj_t pool_features_kobj; static zfs_mod_kobj_t dataset_props_kobj; +static zfs_mod_kobj_t vdev_props_kobj; static zfs_mod_kobj_t pool_props_kobj; /* @@ -333,6 +334,20 @@ dataset_property_show(struct kobject *kobj, struct attribute *attr, char *buf) return (len); } +static ssize_t +vdev_property_show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + vdev_prop_t prop = vdev_name_to_prop(kobject_name(kobj)); + zprop_desc_t *prop_tbl = vdev_prop_get_table(); + ssize_t len; + + ASSERT3U(prop, <, VDEV_NUM_PROPS); + + len = zprop_sysfs_show(attr->name, &prop_tbl[prop], buf, PAGE_SIZE); + + return (len); +} + static ssize_t pool_property_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -577,6 +592,14 @@ zfs_sysfs_properties_init(zfs_mod_kobj_t *zfs_kobj, struct kobject *parent, context.p2k_show_func = pool_property_show; err = zfs_kobj_init(zfs_kobj, 0, ZPOOL_NUM_PROPS, pool_property_show); + } else if (type == ZFS_TYPE_VDEV) { + name = ZFS_SYSFS_VDEV_PROPERTIES; + context.p2k_table = vdev_prop_get_table(); + context.p2k_attr_count = ZPOOL_PROP_ATTR_COUNT; + context.p2k_parent = zfs_kobj; + context.p2k_show_func = vdev_property_show; + err = zfs_kobj_init(zfs_kobj, 0, VDEV_NUM_PROPS, + vdev_property_show); } else { name = ZFS_SYSFS_DATASET_PROPERTIES; context.p2k_table = zfs_prop_get_table(); @@ -639,12 +662,22 @@ zfs_sysfs_init(void) return; } + err = zfs_sysfs_properties_init(&vdev_props_kobj, parent, + ZFS_TYPE_VDEV); + if (err) { + zfs_kobj_fini(&kernel_features_kobj); + zfs_kobj_fini(&pool_features_kobj); + zfs_kobj_fini(&pool_props_kobj); + return; + } + err = zfs_sysfs_properties_init(&dataset_props_kobj, parent, ZFS_TYPE_FILESYSTEM); if (err) { zfs_kobj_fini(&kernel_features_kobj); zfs_kobj_fini(&pool_features_kobj); zfs_kobj_fini(&pool_props_kobj); + zfs_kobj_fini(&vdev_props_kobj); return; } } @@ -657,6 +690,7 @@ zfs_sysfs_fini(void) */ zfs_kobj_fini(&kernel_features_kobj); zfs_kobj_fini(&pool_features_kobj); - zfs_kobj_fini(&dataset_props_kobj); zfs_kobj_fini(&pool_props_kobj); + zfs_kobj_fini(&vdev_props_kobj); + zfs_kobj_fini(&dataset_props_kobj); } diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index 260bf185a3..2a0e26eca9 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -723,18 +723,6 @@ zfs_name_to_prop(const char *propname) return (zprop_name_to_prop(propname, ZFS_TYPE_DATASET)); } -/* - * For user property names, we allow all lowercase alphanumeric characters, plus - * a few useful punctuation characters. - */ -static int -valid_char(char c) -{ - return ((c >= 'a' && c <= 'z') || - (c >= '0' && c <= '9') || - c == '-' || c == '_' || c == '.' || c == ':'); -} - /* * Returns true if this is a valid user-defined property (one with a ':'). */ @@ -747,7 +735,7 @@ zfs_prop_user(const char *name) for (i = 0; i < strlen(name); i++) { c = name[i]; - if (!valid_char(c)) + if (!zprop_valid_char(c)) return (B_FALSE); if (c == ':') foundsep = B_TRUE; diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index 6299d371f2..8e7a20e8ef 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -23,6 +23,7 @@ * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright (c) 2021, Colm Buckley + * Copyright (c) 2021, Klara Inc. */ #include @@ -40,6 +41,7 @@ #endif static zprop_desc_t zpool_prop_table[ZPOOL_NUM_PROPS]; +static zprop_desc_t vdev_prop_table[VDEV_NUM_PROPS]; zprop_desc_t * zpool_prop_get_table(void) @@ -260,12 +262,249 @@ zpool_prop_align_right(zpool_prop_t prop) } #endif +zprop_desc_t * +vdev_prop_get_table(void) +{ + return (vdev_prop_table); +} + +void +vdev_prop_init(void) +{ + static zprop_index_t boolean_table[] = { + { "off", 0}, + { "on", 1}, + { NULL } + }; + static zprop_index_t boolean_na_table[] = { + { "off", 0}, + { "on", 1}, + { "-", 2}, /* ZPROP_BOOLEAN_NA */ + { NULL } + }; + + /* string properties */ + zprop_register_string(VDEV_PROP_COMMENT, "comment", NULL, + PROP_DEFAULT, ZFS_TYPE_VDEV, "", "COMMENT"); + zprop_register_string(VDEV_PROP_PATH, "path", NULL, + PROP_DEFAULT, ZFS_TYPE_VDEV, "", "PATH"); + zprop_register_string(VDEV_PROP_DEVID, "devid", NULL, + PROP_READONLY, ZFS_TYPE_VDEV, "", "DEVID"); + zprop_register_string(VDEV_PROP_PHYS_PATH, "physpath", NULL, + PROP_READONLY, ZFS_TYPE_VDEV, "", "PHYSPATH"); + zprop_register_string(VDEV_PROP_ENC_PATH, "encpath", NULL, + PROP_READONLY, ZFS_TYPE_VDEV, "", "ENCPATH"); + zprop_register_string(VDEV_PROP_FRU, "fru", NULL, + PROP_READONLY, ZFS_TYPE_VDEV, "", "FRU"); + zprop_register_string(VDEV_PROP_PARENT, "parent", NULL, + PROP_READONLY, ZFS_TYPE_VDEV, "", "PARENT"); + zprop_register_string(VDEV_PROP_CHILDREN, "children", NULL, + PROP_READONLY, ZFS_TYPE_VDEV, "", "CHILDREN"); + + /* readonly number properties */ + zprop_register_number(VDEV_PROP_SIZE, "size", 0, PROP_READONLY, + ZFS_TYPE_VDEV, "", "SIZE"); + zprop_register_number(VDEV_PROP_FREE, "free", 0, PROP_READONLY, + ZFS_TYPE_VDEV, "", "FREE"); + zprop_register_number(VDEV_PROP_ALLOCATED, "allocated", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "ALLOC"); + zprop_register_number(VDEV_PROP_EXPANDSZ, "expandsize", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "EXPANDSZ"); + zprop_register_number(VDEV_PROP_FRAGMENTATION, "fragmentation", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "FRAG"); + zprop_register_number(VDEV_PROP_CAPACITY, "capacity", 0, PROP_READONLY, + ZFS_TYPE_VDEV, "", "CAP"); + zprop_register_number(VDEV_PROP_GUID, "guid", 0, PROP_READONLY, + ZFS_TYPE_VDEV, "", "GUID"); + zprop_register_number(VDEV_PROP_STATE, "state", 0, PROP_READONLY, + ZFS_TYPE_VDEV, "", "STATE"); + zprop_register_number(VDEV_PROP_BOOTSIZE, "bootsize", 0, PROP_READONLY, + ZFS_TYPE_VDEV, "", "BOOTSIZE"); + zprop_register_number(VDEV_PROP_ASIZE, "asize", 0, PROP_READONLY, + ZFS_TYPE_VDEV, "", "ASIZE"); + zprop_register_number(VDEV_PROP_PSIZE, "psize", 0, PROP_READONLY, + ZFS_TYPE_VDEV, "", "PSIZE"); + zprop_register_number(VDEV_PROP_ASHIFT, "ashift", 0, PROP_READONLY, + ZFS_TYPE_VDEV, "", "ASHIFT"); + zprop_register_number(VDEV_PROP_PARITY, "parity", 0, PROP_READONLY, + ZFS_TYPE_VDEV, "", "PARITY"); + zprop_register_number(VDEV_PROP_NUMCHILDREN, "numchildren", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "NUMCHILD"); + zprop_register_number(VDEV_PROP_READ_ERRORS, "read_errors", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "RDERR"); + zprop_register_number(VDEV_PROP_WRITE_ERRORS, "write_errors", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "WRERR"); + zprop_register_number(VDEV_PROP_CHECKSUM_ERRORS, "checksum_errors", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "CKERR"); + zprop_register_number(VDEV_PROP_INITIALIZE_ERRORS, + "initialize_errors", 0, PROP_READONLY, ZFS_TYPE_VDEV, "", + "INITERR"); + zprop_register_number(VDEV_PROP_OPS_NULL, "null_ops", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "NULLOP"); + zprop_register_number(VDEV_PROP_OPS_READ, "read_ops", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "READOP"); + zprop_register_number(VDEV_PROP_OPS_WRITE, "write_ops", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "WRITEOP"); + zprop_register_number(VDEV_PROP_OPS_FREE, "free_ops", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "FREEOP"); + zprop_register_number(VDEV_PROP_OPS_CLAIM, "claim_ops", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "CLAIMOP"); + zprop_register_number(VDEV_PROP_OPS_TRIM, "trim_ops", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "TRIMOP"); + zprop_register_number(VDEV_PROP_BYTES_NULL, "null_bytes", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "NULLBYTE"); + zprop_register_number(VDEV_PROP_BYTES_READ, "read_bytes", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "READBYTE"); + zprop_register_number(VDEV_PROP_BYTES_WRITE, "write_bytes", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "WRITEBYTE"); + zprop_register_number(VDEV_PROP_BYTES_FREE, "free_bytes", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "FREEBYTE"); + zprop_register_number(VDEV_PROP_BYTES_CLAIM, "claim_bytes", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "CLAIMBYTE"); + zprop_register_number(VDEV_PROP_BYTES_TRIM, "trim_bytes", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "TRIMBYTE"); + + /* default numeric properties */ + + /* default index (boolean) properties */ + zprop_register_index(VDEV_PROP_REMOVING, "removing", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "REMOVING", + boolean_table); + zprop_register_index(VDEV_PROP_ALLOCATING, "allocating", 1, + PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "ALLOCATING", + boolean_na_table); + + /* default index properties */ + + /* hidden properties */ + zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING, + PROP_READONLY, ZFS_TYPE_VDEV, "NAME"); +} + +/* + * Given a property name and its type, returns the corresponding property ID. + */ +vdev_prop_t +vdev_name_to_prop(const char *propname) +{ + return (zprop_name_to_prop(propname, ZFS_TYPE_VDEV)); +} + +/* + * Returns true if this is a valid user-defined property (one with a ':'). + */ +boolean_t +vdev_prop_user(const char *name) +{ + int i; + char c; + boolean_t foundsep = B_FALSE; + + for (i = 0; i < strlen(name); i++) { + c = name[i]; + if (!zprop_valid_char(c)) + return (B_FALSE); + if (c == ':') + foundsep = B_TRUE; + } + + return (foundsep); +} + +/* + * Given a pool property ID, returns the corresponding name. + * Assuming the pool property ID is valid. + */ +const char * +vdev_prop_to_name(vdev_prop_t prop) +{ + return (vdev_prop_table[prop].pd_name); +} + +zprop_type_t +vdev_prop_get_type(vdev_prop_t prop) +{ + return (vdev_prop_table[prop].pd_proptype); +} + +boolean_t +vdev_prop_readonly(vdev_prop_t prop) +{ + return (vdev_prop_table[prop].pd_attr == PROP_READONLY); +} + +const char * +vdev_prop_default_string(vdev_prop_t prop) +{ + return (vdev_prop_table[prop].pd_strdefault); +} + +uint64_t +vdev_prop_default_numeric(vdev_prop_t prop) +{ + return (vdev_prop_table[prop].pd_numdefault); +} + +int +vdev_prop_string_to_index(vdev_prop_t prop, const char *string, + uint64_t *index) +{ + return (zprop_string_to_index(prop, string, index, ZFS_TYPE_VDEV)); +} + +int +vdev_prop_index_to_string(vdev_prop_t prop, uint64_t index, + const char **string) +{ + return (zprop_index_to_string(prop, index, string, ZFS_TYPE_VDEV)); +} + +/* + * Returns true if this is a valid vdev property. + */ +boolean_t +zpool_prop_vdev(const char *name) +{ + return (vdev_name_to_prop(name) != VDEV_PROP_INVAL); +} + +uint64_t +vdev_prop_random_value(vdev_prop_t prop, uint64_t seed) +{ + return (zprop_random_value(prop, seed, ZFS_TYPE_VDEV)); +} + +#ifndef _KERNEL +const char * +vdev_prop_values(vdev_prop_t prop) +{ + return (vdev_prop_table[prop].pd_values); +} + +const char * +vdev_prop_column_name(vdev_prop_t prop) +{ + return (vdev_prop_table[prop].pd_colname); +} + +boolean_t +vdev_prop_align_right(vdev_prop_t prop) +{ + return (vdev_prop_table[prop].pd_rightalign); +} +#endif + #if defined(_KERNEL) /* zpool property functions */ EXPORT_SYMBOL(zpool_prop_init); EXPORT_SYMBOL(zpool_prop_get_type); EXPORT_SYMBOL(zpool_prop_get_table); +/* vdev property functions */ +EXPORT_SYMBOL(vdev_prop_init); +EXPORT_SYMBOL(vdev_prop_get_type); +EXPORT_SYMBOL(vdev_prop_get_table); + /* Pool property functions shared between libzfs and kernel. */ EXPORT_SYMBOL(zpool_name_to_prop); EXPORT_SYMBOL(zpool_prop_to_name); @@ -276,4 +515,15 @@ EXPORT_SYMBOL(zpool_prop_feature); EXPORT_SYMBOL(zpool_prop_unsupported); EXPORT_SYMBOL(zpool_prop_index_to_string); EXPORT_SYMBOL(zpool_prop_string_to_index); +EXPORT_SYMBOL(zpool_prop_vdev); + +/* vdev property functions shared between libzfs and kernel. */ +EXPORT_SYMBOL(vdev_name_to_prop); +EXPORT_SYMBOL(vdev_prop_user); +EXPORT_SYMBOL(vdev_prop_to_name); +EXPORT_SYMBOL(vdev_prop_default_string); +EXPORT_SYMBOL(vdev_prop_default_numeric); +EXPORT_SYMBOL(vdev_prop_readonly); +EXPORT_SYMBOL(vdev_prop_index_to_string); +EXPORT_SYMBOL(vdev_prop_string_to_index); #endif diff --git a/module/zcommon/zprop_common.c b/module/zcommon/zprop_common.c index faab9d9a74..17a48361f9 100644 --- a/module/zcommon/zprop_common.c +++ b/module/zcommon/zprop_common.c @@ -53,6 +53,8 @@ zprop_get_proptable(zfs_type_t type) { if (type == ZFS_TYPE_POOL) return (zpool_prop_get_table()); + else if (type == ZFS_TYPE_VDEV) + return (vdev_prop_get_table()); else return (zfs_prop_get_table()); } @@ -62,6 +64,8 @@ zprop_get_numprops(zfs_type_t type) { if (type == ZFS_TYPE_POOL) return (ZPOOL_NUM_PROPS); + else if (type == ZFS_TYPE_VDEV) + return (VDEV_NUM_PROPS); else return (ZFS_NUM_PROPS); } @@ -81,7 +85,8 @@ zfs_mod_supported_prop(const char *name, zfs_type_t type) return (B_TRUE); #else return (zfs_mod_supported(type == ZFS_TYPE_POOL ? - ZFS_SYSFS_POOL_PROPERTIES : ZFS_SYSFS_DATASET_PROPERTIES, name)); + ZFS_SYSFS_POOL_PROPERTIES : (type == ZFS_TYPE_VDEV ? + ZFS_SYSFS_VDEV_PROPERTIES : ZFS_SYSFS_DATASET_PROPERTIES), name)); #endif } @@ -235,6 +240,8 @@ propname_match(const char *p, size_t len, zprop_desc_t *prop_entry) int c; #endif + ASSERT(propname != NULL); + if (len == strlen(propname) && strncmp(p, propname, len) == 0) return (B_TRUE); @@ -391,6 +398,18 @@ zprop_valid_for_type(int prop, zfs_type_t type, boolean_t headcheck) return ((prop_tbl[prop].pd_types & type) != 0); } +/* + * For user property names, we allow all lowercase alphanumeric characters, plus + * a few useful punctuation characters. + */ +int +zprop_valid_char(char c) +{ + return ((c >= 'a' && c <= 'z') || + (c >= '0' && c <= '9') || + c == '-' || c == '_' || c == '.' || c == ':'); +} + #ifndef _KERNEL /* @@ -477,4 +496,5 @@ EXPORT_SYMBOL(zprop_index_to_string); EXPORT_SYMBOL(zprop_random_value); EXPORT_SYMBOL(zprop_values); EXPORT_SYMBOL(zprop_valid_for_type); +EXPORT_SYMBOL(zprop_valid_char); #endif diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 7546e3e414..30a442ab8c 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -786,7 +786,7 @@ spa_prop_set(spa_t *spa, nvlist_t *nvp) continue; if (prop == ZPOOL_PROP_VERSION || prop == ZPOOL_PROP_INVAL) { - uint64_t ver; + uint64_t ver = 0; if (prop == ZPOOL_PROP_VERSION) { VERIFY(nvpair_value_uint64(elem, &ver) == 0); diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 1ecd2294db..3f74631983 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -1833,36 +1833,27 @@ spa_update_dspace(spa_t *spa) { spa->spa_dspace = metaslab_class_get_dspace(spa_normal_class(spa)) + ddt_get_dedup_dspace(spa); - if (spa->spa_vdev_removal != NULL) { + if (spa->spa_nonallocating_dspace > 0) { /* - * We can't allocate from the removing device, so subtract - * its size if it was included in dspace (i.e. if this is a - * normal-class vdev, not special/dedup). This prevents the - * DMU/DSL from filling up the (now smaller) pool while we - * are in the middle of removing the device. + * Subtract the space provided by all non-allocating vdevs that + * contribute to dspace. If a file is overwritten, its old + * blocks are freed and new blocks are allocated. If there are + * no snapshots of the file, the available space should remain + * the same. The old blocks could be freed from the + * non-allocating vdev, but the new blocks must be allocated on + * other (allocating) vdevs. By reserving the entire size of + * the non-allocating vdevs (including allocated space), we + * ensure that there will be enough space on the allocating + * vdevs for this file overwrite to succeed. * * Note that the DMU/DSL doesn't actually know or care * how much space is allocated (it does its own tracking * of how much space has been logically used). So it * doesn't matter that the data we are moving may be - * allocated twice (on the old device and the new - * device). + * allocated twice (on the old device and the new device). */ - spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); - vdev_t *vd = - vdev_lookup_top(spa, spa->spa_vdev_removal->svr_vdev_id); - /* - * If the stars align, we can wind up here after - * vdev_remove_complete() has cleared vd->vdev_mg but before - * spa->spa_vdev_removal gets cleared, so we must check before - * we dereference. - */ - if (vd->vdev_mg && - vd->vdev_mg->mg_class == spa_normal_class(spa)) { - spa->spa_dspace -= spa_deflate(spa) ? - vd->vdev_stat.vs_dspace : vd->vdev_stat.vs_space; - } - spa_config_exit(spa, SCL_VDEV, FTAG); + ASSERT3U(spa->spa_dspace, >=, spa->spa_nonallocating_dspace); + spa->spa_dspace -= spa->spa_nonallocating_dspace; } } @@ -2429,6 +2420,7 @@ spa_init(spa_mode_t mode) zpool_prop_init(); zpool_feature_init(); spa_config_load(); + vdev_prop_init(); l2arc_start(); scan_init(); qat_init(); diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 4a67ba85f5..5784bd2a09 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -28,6 +28,7 @@ * Copyright 2017 Joyent, Inc. * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2019, Datto Inc. All rights reserved. + * Copyright (c) 2021, Klara Inc. * Copyright [2021] Hewlett Packard Enterprise Development LP */ @@ -59,6 +60,7 @@ #include #include #include +#include "zfs_prop.h" /* * One metaslab from each (normal-class) vdev is used by the ZIL. These are @@ -865,6 +867,8 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, &vd->vdev_ms_shift); (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASIZE, &vd->vdev_asize); + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NONALLOCATING, + &vd->vdev_noalloc); (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVING, &vd->vdev_removing); (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_VDEV_TOP_ZAP, @@ -1183,8 +1187,10 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd) ASSERT3P(tvd->vdev_indirect_mapping, ==, NULL); ASSERT3P(tvd->vdev_indirect_births, ==, NULL); ASSERT3P(tvd->vdev_obsolete_sm, ==, NULL); + ASSERT0(tvd->vdev_noalloc); ASSERT0(tvd->vdev_removing); ASSERT0(tvd->vdev_rebuilding); + tvd->vdev_noalloc = svd->vdev_noalloc; tvd->vdev_removing = svd->vdev_removing; tvd->vdev_rebuilding = svd->vdev_rebuilding; tvd->vdev_rebuild_config = svd->vdev_rebuild_config; @@ -1200,6 +1206,7 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd) svd->vdev_indirect_mapping = NULL; svd->vdev_indirect_births = NULL; svd->vdev_obsolete_sm = NULL; + svd->vdev_noalloc = 0; svd->vdev_removing = 0; svd->vdev_rebuilding = 0; @@ -1498,11 +1505,15 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg) spa_config_enter(spa, SCL_ALLOC, FTAG, RW_WRITER); /* - * If the vdev is being removed we don't activate - * the metaslabs since we want to ensure that no new - * allocations are performed on this device. + * If the vdev is marked as non-allocating then don't + * activate the metaslabs since we want to ensure that + * no allocations are performed on this device. */ - if (!expanding && !vd->vdev_removing) { + if (vd->vdev_noalloc) { + /* track non-allocating vdev space */ + spa->spa_nonallocating_dspace += spa_deflate(spa) ? + vd->vdev_stat.vs_dspace : vd->vdev_stat.vs_space; + } else if (!expanding) { metaslab_group_activate(vd->vdev_mg); if (vd->vdev_log_mg != NULL) metaslab_group_activate(vd->vdev_log_mg); @@ -4469,6 +4480,8 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx) vs->vs_fragmentation = (vd->vdev_mg != NULL) ? vd->vdev_mg->mg_fragmentation : 0; } + vs->vs_noalloc = MAX(vd->vdev_noalloc, + tvd ? tvd->vdev_noalloc : 0); } vdev_get_stats_ex_impl(vd, vs, vsx); @@ -5375,6 +5388,23 @@ vdev_xlate_walk(vdev_t *vd, const range_seg64_t *logical_rs, } } +static char * +vdev_name(vdev_t *vd, char *buf, int buflen) +{ + if (vd->vdev_path == NULL) { + if (strcmp(vd->vdev_ops->vdev_op_type, "root") == 0) { + strlcpy(buf, vd->vdev_spa->spa_name, buflen); + } else if (!vd->vdev_ops->vdev_op_leaf) { + snprintf(buf, buflen, "%s-%llu", + vd->vdev_ops->vdev_op_type, + (u_longlong_t)vd->vdev_id); + } + } else { + strlcpy(buf, vd->vdev_path, buflen); + } + return (buf); +} + /* * Look at the vdev tree and determine whether any devices are currently being * replaced. @@ -5404,6 +5434,594 @@ vdev_replace_in_progress(vdev_t *vdev) return (B_FALSE); } +/* + * Add a (source=src, propname=propval) list to an nvlist. + */ +static void +vdev_prop_add_list(nvlist_t *nvl, const char *propname, char *strval, + uint64_t intval, zprop_source_t src) +{ + nvlist_t *propval; + + propval = fnvlist_alloc(); + fnvlist_add_uint64(propval, ZPROP_SOURCE, src); + + if (strval != NULL) + fnvlist_add_string(propval, ZPROP_VALUE, strval); + else + fnvlist_add_uint64(propval, ZPROP_VALUE, intval); + + fnvlist_add_nvlist(nvl, propname, propval); + nvlist_free(propval); +} + +static void +vdev_props_set_sync(void *arg, dmu_tx_t *tx) +{ + vdev_t *vd; + nvlist_t *nvp = arg; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; + objset_t *mos = spa->spa_meta_objset; + nvpair_t *elem = NULL; + uint64_t vdev_guid; + nvlist_t *nvprops; + + vdev_guid = fnvlist_lookup_uint64(nvp, ZPOOL_VDEV_PROPS_SET_VDEV); + nvprops = fnvlist_lookup_nvlist(nvp, ZPOOL_VDEV_PROPS_SET_PROPS); + vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE); + VERIFY(vd != NULL); + + mutex_enter(&spa->spa_props_lock); + + while ((elem = nvlist_next_nvpair(nvprops, elem)) != NULL) { + uint64_t intval, objid = 0; + char *strval; + vdev_prop_t prop; + const char *propname = nvpair_name(elem); + zprop_type_t proptype; + + /* + * Set vdev property values in the vdev props mos object. + */ + if (vd->vdev_top_zap != 0) { + objid = vd->vdev_top_zap; + } else if (vd->vdev_leaf_zap != 0) { + objid = vd->vdev_leaf_zap; + } else { + panic("vdev not top or leaf"); + } + + switch (prop = vdev_name_to_prop(propname)) { + case VDEV_PROP_USER: + if (vdev_prop_user(propname)) { + strval = fnvpair_value_string(elem); + if (strlen(strval) == 0) { + /* remove the property if value == "" */ + (void) zap_remove(mos, objid, propname, + tx); + } else { + VERIFY0(zap_update(mos, objid, propname, + 1, strlen(strval) + 1, strval, tx)); + } + spa_history_log_internal(spa, "vdev set", tx, + "vdev_guid=%llu: %s=%s", + (u_longlong_t)vdev_guid, nvpair_name(elem), + strval); + } + break; + default: + /* normalize the property name */ + propname = vdev_prop_to_name(prop); + proptype = vdev_prop_get_type(prop); + + if (nvpair_type(elem) == DATA_TYPE_STRING) { + ASSERT(proptype == PROP_TYPE_STRING); + strval = fnvpair_value_string(elem); + VERIFY0(zap_update(mos, objid, propname, + 1, strlen(strval) + 1, strval, tx)); + spa_history_log_internal(spa, "vdev set", tx, + "vdev_guid=%llu: %s=%s", + (u_longlong_t)vdev_guid, nvpair_name(elem), + strval); + } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { + intval = fnvpair_value_uint64(elem); + + if (proptype == PROP_TYPE_INDEX) { + const char *unused; + VERIFY0(vdev_prop_index_to_string( + prop, intval, &unused)); + } + VERIFY0(zap_update(mos, objid, propname, + sizeof (uint64_t), 1, &intval, tx)); + spa_history_log_internal(spa, "vdev set", tx, + "vdev_guid=%llu: %s=%lld", + (u_longlong_t)vdev_guid, + nvpair_name(elem), (longlong_t)intval); + } else { + panic("invalid vdev property type %u", + nvpair_type(elem)); + } + } + + } + + mutex_exit(&spa->spa_props_lock); +} + +int +vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) +{ + spa_t *spa = vd->vdev_spa; + nvpair_t *elem = NULL; + uint64_t vdev_guid; + nvlist_t *nvprops; + int error; + + ASSERT(vd != NULL); + + if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_SET_VDEV, + &vdev_guid) != 0) + return (SET_ERROR(EINVAL)); + + if (nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_SET_PROPS, + &nvprops) != 0) + return (SET_ERROR(EINVAL)); + + if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) + return (SET_ERROR(EINVAL)); + + while ((elem = nvlist_next_nvpair(nvprops, elem)) != NULL) { + char *propname = nvpair_name(elem); + vdev_prop_t prop = vdev_name_to_prop(propname); + uint64_t intval = 0; + char *strval = NULL; + + if (prop == VDEV_PROP_USER && !vdev_prop_user(propname)) { + error = EINVAL; + goto end; + } + + if (vdev_prop_readonly(prop)) { + error = EROFS; + goto end; + } + + /* Special Processing */ + switch (prop) { + case VDEV_PROP_PATH: + if (vd->vdev_path == NULL) { + error = EROFS; + break; + } + if (nvpair_value_string(elem, &strval) != 0) { + error = EINVAL; + break; + } + /* New path must start with /dev/ */ + if (strncmp(strval, "/dev/", 5)) { + error = EINVAL; + break; + } + error = spa_vdev_setpath(spa, vdev_guid, strval); + break; + case VDEV_PROP_ALLOCATING: + if (nvpair_value_uint64(elem, &intval) != 0) { + error = EINVAL; + break; + } + if (intval != vd->vdev_noalloc) + break; + if (intval == 0) + error = spa_vdev_noalloc(spa, vdev_guid); + else + error = spa_vdev_alloc(spa, vdev_guid); + break; + default: + /* Most processing is done in vdev_props_set_sync */ + break; + } +end: + if (error != 0) { + intval = error; + vdev_prop_add_list(outnvl, propname, strval, intval, 0); + return (error); + } + } + + return (dsl_sync_task(spa->spa_name, NULL, vdev_props_set_sync, + innvl, 6, ZFS_SPACE_CHECK_EXTRA_RESERVED)); +} + +int +vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) +{ + spa_t *spa = vd->vdev_spa; + objset_t *mos = spa->spa_meta_objset; + int err = 0; + uint64_t objid; + uint64_t vdev_guid; + nvpair_t *elem = NULL; + nvlist_t *nvprops = NULL; + uint64_t intval = 0; + char *strval = NULL; + const char *propname = NULL; + vdev_prop_t prop; + + ASSERT(vd != NULL); + ASSERT(mos != NULL); + + if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_GET_VDEV, + &vdev_guid) != 0) + return (SET_ERROR(EINVAL)); + + nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_GET_PROPS, &nvprops); + + if (vd->vdev_top_zap != 0) { + objid = vd->vdev_top_zap; + } else if (vd->vdev_leaf_zap != 0) { + objid = vd->vdev_leaf_zap; + } else { + return (SET_ERROR(EINVAL)); + } + ASSERT(objid != 0); + + mutex_enter(&spa->spa_props_lock); + + if (nvprops != NULL) { + char namebuf[64] = { 0 }; + + while ((elem = nvlist_next_nvpair(nvprops, elem)) != NULL) { + intval = 0; + strval = NULL; + propname = nvpair_name(elem); + prop = vdev_name_to_prop(propname); + zprop_source_t src = ZPROP_SRC_DEFAULT; + uint64_t integer_size, num_integers; + + switch (prop) { + /* Special Read-only Properties */ + case VDEV_PROP_NAME: + strval = vdev_name(vd, namebuf, + sizeof (namebuf)); + if (strval == NULL) + continue; + vdev_prop_add_list(outnvl, propname, strval, 0, + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_CAPACITY: + /* percent used */ + intval = (vd->vdev_stat.vs_dspace == 0) ? 0 : + (vd->vdev_stat.vs_alloc * 100 / + vd->vdev_stat.vs_dspace); + vdev_prop_add_list(outnvl, propname, NULL, + intval, ZPROP_SRC_NONE); + continue; + case VDEV_PROP_STATE: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_state, ZPROP_SRC_NONE); + continue; + case VDEV_PROP_GUID: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_guid, ZPROP_SRC_NONE); + continue; + case VDEV_PROP_ASIZE: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_asize, ZPROP_SRC_NONE); + continue; + case VDEV_PROP_PSIZE: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_psize, ZPROP_SRC_NONE); + continue; + case VDEV_PROP_ASHIFT: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_ashift, ZPROP_SRC_NONE); + continue; + case VDEV_PROP_SIZE: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_dspace, ZPROP_SRC_NONE); + continue; + case VDEV_PROP_FREE: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_dspace - + vd->vdev_stat.vs_alloc, ZPROP_SRC_NONE); + continue; + case VDEV_PROP_ALLOCATED: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_alloc, ZPROP_SRC_NONE); + continue; + case VDEV_PROP_EXPANDSZ: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_esize, ZPROP_SRC_NONE); + continue; + case VDEV_PROP_FRAGMENTATION: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_fragmentation, + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_PARITY: + vdev_prop_add_list(outnvl, propname, NULL, + vdev_get_nparity(vd), ZPROP_SRC_NONE); + continue; + case VDEV_PROP_PATH: + if (vd->vdev_path == NULL) + continue; + vdev_prop_add_list(outnvl, propname, + vd->vdev_path, 0, ZPROP_SRC_NONE); + continue; + case VDEV_PROP_DEVID: + if (vd->vdev_devid == NULL) + continue; + vdev_prop_add_list(outnvl, propname, + vd->vdev_devid, 0, ZPROP_SRC_NONE); + continue; + case VDEV_PROP_PHYS_PATH: + if (vd->vdev_physpath == NULL) + continue; + vdev_prop_add_list(outnvl, propname, + vd->vdev_physpath, 0, ZPROP_SRC_NONE); + continue; + case VDEV_PROP_ENC_PATH: + if (vd->vdev_enc_sysfs_path == NULL) + continue; + vdev_prop_add_list(outnvl, propname, + vd->vdev_enc_sysfs_path, 0, ZPROP_SRC_NONE); + continue; + case VDEV_PROP_FRU: + if (vd->vdev_fru == NULL) + continue; + vdev_prop_add_list(outnvl, propname, + vd->vdev_fru, 0, ZPROP_SRC_NONE); + continue; + case VDEV_PROP_PARENT: + if (vd->vdev_parent != NULL) { + strval = vdev_name(vd->vdev_parent, + namebuf, sizeof (namebuf)); + vdev_prop_add_list(outnvl, propname, + strval, 0, ZPROP_SRC_NONE); + } + continue; + case VDEV_PROP_CHILDREN: + if (vd->vdev_children > 0) + strval = kmem_zalloc(ZAP_MAXVALUELEN, + KM_SLEEP); + for (uint64_t i = 0; i < vd->vdev_children; + i++) { + char *vname; + + vname = vdev_name(vd->vdev_child[i], + namebuf, sizeof (namebuf)); + if (vname == NULL) + vname = "(unknown)"; + if (strlen(strval) > 0) + strlcat(strval, ",", + ZAP_MAXVALUELEN); + strlcat(strval, vname, ZAP_MAXVALUELEN); + } + if (strval != NULL) { + vdev_prop_add_list(outnvl, propname, + strval, 0, ZPROP_SRC_NONE); + kmem_free(strval, ZAP_MAXVALUELEN); + } + continue; + case VDEV_PROP_NUMCHILDREN: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_children, ZPROP_SRC_NONE); + continue; + case VDEV_PROP_READ_ERRORS: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_read_errors, + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_WRITE_ERRORS: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_write_errors, + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_CHECKSUM_ERRORS: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_checksum_errors, + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_INITIALIZE_ERRORS: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_initialize_errors, + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_OPS_NULL: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_ops[ZIO_TYPE_NULL], + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_OPS_READ: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_ops[ZIO_TYPE_READ], + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_OPS_WRITE: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_ops[ZIO_TYPE_WRITE], + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_OPS_FREE: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_ops[ZIO_TYPE_FREE], + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_OPS_CLAIM: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_ops[ZIO_TYPE_CLAIM], + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_OPS_TRIM: + /* + * TRIM ops and bytes are reported to user + * space as ZIO_TYPE_IOCTL. This is done to + * preserve the vdev_stat_t structure layout + * for user space. + */ + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_ops[ZIO_TYPE_IOCTL], + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_BYTES_NULL: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_bytes[ZIO_TYPE_NULL], + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_BYTES_READ: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_bytes[ZIO_TYPE_READ], + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_BYTES_WRITE: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_bytes[ZIO_TYPE_WRITE], + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_BYTES_FREE: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_bytes[ZIO_TYPE_FREE], + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_BYTES_CLAIM: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_bytes[ZIO_TYPE_CLAIM], + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_BYTES_TRIM: + /* + * TRIM ops and bytes are reported to user + * space as ZIO_TYPE_IOCTL. This is done to + * preserve the vdev_stat_t structure layout + * for user space. + */ + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_bytes[ZIO_TYPE_IOCTL], + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_REMOVING: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_removing, ZPROP_SRC_NONE); + continue; + /* Numeric Properites */ + case VDEV_PROP_ALLOCATING: + src = ZPROP_SRC_LOCAL; + strval = NULL; + + err = zap_lookup(mos, objid, nvpair_name(elem), + sizeof (uint64_t), 1, &intval); + if (err == ENOENT) { + intval = + vdev_prop_default_numeric(prop); + err = 0; + } else if (err) + break; + if (intval == vdev_prop_default_numeric(prop)) + src = ZPROP_SRC_DEFAULT; + + /* Leaf vdevs cannot have this property */ + if (vd->vdev_mg == NULL && + vd->vdev_top != NULL) { + src = ZPROP_SRC_NONE; + intval = ZPROP_BOOLEAN_NA; + } + + vdev_prop_add_list(outnvl, propname, strval, + intval, src); + break; + /* Text Properties */ + case VDEV_PROP_COMMENT: + /* Exists in the ZAP below */ + /* FALLTHRU */ + case VDEV_PROP_USER: + /* User Properites */ + src = ZPROP_SRC_LOCAL; + + err = zap_length(mos, objid, nvpair_name(elem), + &integer_size, &num_integers); + if (err) + break; + + switch (integer_size) { + case 8: + /* User properties cannot be integers */ + err = EINVAL; + break; + case 1: + /* string property */ + strval = kmem_alloc(num_integers, + KM_SLEEP); + err = zap_lookup(mos, objid, + nvpair_name(elem), 1, + num_integers, strval); + if (err) { + kmem_free(strval, + num_integers); + break; + } + vdev_prop_add_list(outnvl, propname, + strval, 0, src); + kmem_free(strval, num_integers); + break; + } + break; + default: + err = ENOENT; + break; + } + if (err) + break; + } + } else { + /* + * Get all properties from the MOS vdev property object. + */ + zap_cursor_t zc; + zap_attribute_t za; + for (zap_cursor_init(&zc, mos, objid); + (err = zap_cursor_retrieve(&zc, &za)) == 0; + zap_cursor_advance(&zc)) { + intval = 0; + strval = NULL; + zprop_source_t src = ZPROP_SRC_DEFAULT; + propname = za.za_name; + prop = vdev_name_to_prop(propname); + + switch (za.za_integer_length) { + case 8: + /* We do not allow integer user properties */ + /* This is likely an internal value */ + break; + case 1: + /* string property */ + strval = kmem_alloc(za.za_num_integers, + KM_SLEEP); + err = zap_lookup(mos, objid, za.za_name, 1, + za.za_num_integers, strval); + if (err) { + kmem_free(strval, za.za_num_integers); + break; + } + vdev_prop_add_list(outnvl, propname, strval, 0, + src); + kmem_free(strval, za.za_num_integers); + break; + + default: + break; + } + } + zap_cursor_fini(&zc); + } + + mutex_exit(&spa->spa_props_lock); + if (err && err != ENOENT) { + return (err); + } + + return (0); +} + EXPORT_SYMBOL(vdev_fault); EXPORT_SYMBOL(vdev_degrade); EXPORT_SYMBOL(vdev_online); diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index daf53f0a0c..6252ee135b 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -496,6 +496,10 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASIZE, vd->vdev_asize); fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG, vd->vdev_islog); + if (vd->vdev_noalloc) { + fnvlist_add_uint64(nv, ZPOOL_CONFIG_NONALLOCATING, + vd->vdev_noalloc); + } if (vd->vdev_removing) { fnvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVING, vd->vdev_removing); diff --git a/module/zfs/vdev_removal.c b/module/zfs/vdev_removal.c index f762c1df96..02dbdfb6c5 100644 --- a/module/zfs/vdev_removal.c +++ b/module/zfs/vdev_removal.c @@ -167,6 +167,176 @@ spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) return (NULL); } +static void +vdev_activate(vdev_t *vd) +{ + metaslab_group_t *mg = vd->vdev_mg; + spa_t *spa = vd->vdev_spa; + uint64_t vdev_space = spa_deflate(spa) ? + vd->vdev_stat.vs_dspace : vd->vdev_stat.vs_space; + + ASSERT(!vd->vdev_islog); + ASSERT(vd->vdev_noalloc); + + metaslab_group_activate(mg); + metaslab_group_activate(vd->vdev_log_mg); + + ASSERT3U(spa->spa_nonallocating_dspace, >=, vdev_space); + + spa->spa_nonallocating_dspace -= vdev_space; + + vd->vdev_noalloc = B_FALSE; +} + +static int +vdev_passivate(vdev_t *vd, uint64_t *txg) +{ + spa_t *spa = vd->vdev_spa; + int error; + + ASSERT(!vd->vdev_noalloc); + + vdev_t *rvd = spa->spa_root_vdev; + metaslab_group_t *mg = vd->vdev_mg; + metaslab_class_t *normal = spa_normal_class(spa); + if (mg->mg_class == normal) { + /* + * We must check that this is not the only allocating device in + * the pool before passivating, otherwise we will not be able + * to make progress because we can't allocate from any vdevs. + */ + boolean_t last = B_TRUE; + for (uint64_t id = 0; id < rvd->vdev_children; id++) { + vdev_t *cvd = rvd->vdev_child[id]; + + if (cvd == vd || + cvd->vdev_ops == &vdev_indirect_ops) + continue; + + metaslab_class_t *mc = cvd->vdev_mg->mg_class; + if (mc != normal) + continue; + + if (!cvd->vdev_noalloc) { + last = B_FALSE; + break; + } + } + if (last) + return (SET_ERROR(EINVAL)); + } + + metaslab_group_passivate(mg); + ASSERT(!vd->vdev_islog); + metaslab_group_passivate(vd->vdev_log_mg); + + /* + * Wait for the youngest allocations and frees to sync, + * and then wait for the deferral of those frees to finish. + */ + spa_vdev_config_exit(spa, NULL, + *txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG); + + /* + * We must ensure that no "stubby" log blocks are allocated + * on the device to be removed. These blocks could be + * written at any time, including while we are in the middle + * of copying them. + */ + error = spa_reset_logs(spa); + + *txg = spa_vdev_config_enter(spa); + + if (error != 0) { + metaslab_group_activate(mg); + ASSERT(!vd->vdev_islog); + if (vd->vdev_log_mg != NULL) + metaslab_group_activate(vd->vdev_log_mg); + return (error); + } + + spa->spa_nonallocating_dspace += spa_deflate(spa) ? + vd->vdev_stat.vs_dspace : vd->vdev_stat.vs_space; + vd->vdev_noalloc = B_TRUE; + + return (0); +} + +/* + * Turn off allocations for a top-level device from the pool. + * + * Turning off allocations for a top-level device can take a significant + * amount of time. As a result we use the spa_vdev_config_[enter/exit] + * functions which allow us to grab and release the spa_config_lock while + * still holding the namespace lock. During each step the configuration + * is synced out. + */ +int +spa_vdev_noalloc(spa_t *spa, uint64_t guid) +{ + vdev_t *vd; + uint64_t txg; + int error = 0; + + ASSERT(!MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_writeable(spa)); + + txg = spa_vdev_enter(spa); + + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + + vd = spa_lookup_by_guid(spa, guid, B_FALSE); + + if (vd == NULL) + error = SET_ERROR(ENOENT); + else if (vd->vdev_mg == NULL) + error = SET_ERROR(ZFS_ERR_VDEV_NOTSUP); + else if (!vd->vdev_noalloc) + error = vdev_passivate(vd, &txg); + + if (error == 0) { + vdev_dirty_leaves(vd, VDD_DTL, txg); + vdev_config_dirty(vd); + } + + error = spa_vdev_exit(spa, NULL, txg, error); + + return (error); +} + +int +spa_vdev_alloc(spa_t *spa, uint64_t guid) +{ + vdev_t *vd; + uint64_t txg; + int error = 0; + + ASSERT(!MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_writeable(spa)); + + txg = spa_vdev_enter(spa); + + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + + vd = spa_lookup_by_guid(spa, guid, B_FALSE); + + if (vd == NULL) + error = SET_ERROR(ENOENT); + else if (vd->vdev_mg == NULL) + error = SET_ERROR(ZFS_ERR_VDEV_NOTSUP); + else if (!vd->vdev_removing) + vdev_activate(vd); + + if (error == 0) { + vdev_dirty_leaves(vd, VDD_DTL, txg); + vdev_config_dirty(vd); + } + + (void) spa_vdev_exit(spa, NULL, txg, error); + + return (error); +} + static void spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, nvlist_t *dev_to_remove) @@ -1193,6 +1363,8 @@ vdev_remove_complete(spa_t *spa) ASSERT3P(vd->vdev_initialize_thread, ==, NULL); ASSERT3P(vd->vdev_trim_thread, ==, NULL); ASSERT3P(vd->vdev_autotrim_thread, ==, NULL); + uint64_t vdev_space = spa_deflate(spa) ? + vd->vdev_stat.vs_dspace : vd->vdev_stat.vs_space; sysevent_t *ev = spa_event_create(spa, vd, NULL, ESC_ZFS_VDEV_REMOVE_DEV); @@ -1200,6 +1372,12 @@ vdev_remove_complete(spa_t *spa) zfs_dbgmsg("finishing device removal for vdev %llu in txg %llu", (u_longlong_t)vd->vdev_id, (u_longlong_t)txg); + ASSERT3U(0, !=, vdev_space); + ASSERT3U(spa->spa_nonallocating_dspace, >=, vdev_space); + + /* the vdev is no longer part of the dspace */ + spa->spa_nonallocating_dspace -= vdev_space; + /* * Discard allocation state. */ @@ -1619,6 +1797,28 @@ spa_vdev_remove_suspend(spa_t *spa) mutex_exit(&svr->svr_lock); } +/* + * Return true if the "allocating" property has been set to "off" + */ +static boolean_t +vdev_prop_allocating_off(vdev_t *vd) +{ + uint64_t objid = vd->vdev_top_zap; + uint64_t allocating = 1; + + /* no vdev property object => no props */ + if (objid != 0) { + spa_t *spa = vd->vdev_spa; + objset_t *mos = spa->spa_meta_objset; + + mutex_enter(&spa->spa_props_lock); + (void) zap_lookup(mos, objid, "allocating", sizeof (uint64_t), + 1, &allocating); + mutex_exit(&spa->spa_props_lock); + } + return (allocating == 0); +} + /* ARGSUSED */ static int spa_vdev_remove_cancel_check(void *arg, dmu_tx_t *tx) @@ -1761,6 +1961,13 @@ spa_vdev_remove_cancel_sync(void *arg, dmu_tx_t *tx) spa_finish_removal(spa, DSS_CANCELED, tx); vd->vdev_removing = B_FALSE; + + if (!vdev_prop_allocating_off(vd)) { + spa_config_enter(spa, SCL_ALLOC | SCL_VDEV, FTAG, RW_WRITER); + vdev_activate(vd); + spa_config_exit(spa, SCL_ALLOC | SCL_VDEV, FTAG); + } + vdev_config_dirty(vd); zfs_dbgmsg("canceled device removal for vdev %llu in %llu", @@ -1774,21 +1981,9 @@ spa_vdev_remove_cancel_sync(void *arg, dmu_tx_t *tx) static int spa_vdev_remove_cancel_impl(spa_t *spa) { - uint64_t vdid = spa->spa_vdev_removal->svr_vdev_id; - int error = dsl_sync_task(spa->spa_name, spa_vdev_remove_cancel_check, spa_vdev_remove_cancel_sync, NULL, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED); - - if (error == 0) { - spa_config_enter(spa, SCL_ALLOC | SCL_VDEV, FTAG, RW_WRITER); - vdev_t *vd = vdev_lookup_top(spa, vdid); - metaslab_group_activate(vd->vdev_mg); - ASSERT(!vd->vdev_islog); - metaslab_group_activate(vd->vdev_log_mg); - spa_config_exit(spa, SCL_ALLOC | SCL_VDEV, FTAG); - } - return (error); } @@ -1984,6 +2179,11 @@ spa_vdev_remove_top_check(vdev_t *vd) if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REMOVAL)) return (SET_ERROR(ENOTSUP)); + /* + * This device is already being removed + */ + if (vd->vdev_removing) + return (SET_ERROR(EALREADY)); metaslab_class_t *mc = vd->vdev_mg->mg_class; metaslab_class_t *normal = spa_normal_class(spa); @@ -2002,20 +2202,12 @@ spa_vdev_remove_top_check(vdev_t *vd) ASSERT3U(available, >=, vd->vdev_stat.vs_alloc); if (available < vd->vdev_stat.vs_alloc) return (SET_ERROR(ENOSPC)); - } else { + } else if (!vd->vdev_noalloc) { /* available space in the pool's normal class */ uint64_t available = dsl_dir_space_available( spa->spa_dsl_pool->dp_root_dir, NULL, 0, B_TRUE); - if (available < - vd->vdev_stat.vs_dspace + spa_get_slop_space(spa)) { - /* - * This is a normal device. There has to be enough free - * space to remove the device and leave double the - * "slop" space (i.e. we must leave at least 3% of the - * pool free, in addition to the normal slop space). - */ + if (available < vd->vdev_stat.vs_dspace) return (SET_ERROR(ENOSPC)); - } } /* @@ -2108,6 +2300,7 @@ static int spa_vdev_remove_top(vdev_t *vd, uint64_t *txg) { spa_t *spa = vd->vdev_spa; + boolean_t set_noalloc = B_FALSE; int error; /* @@ -2116,8 +2309,6 @@ spa_vdev_remove_top(vdev_t *vd, uint64_t *txg) * are errors. */ error = spa_vdev_remove_top_check(vd); - if (error != 0) - return (error); /* * Stop allocating from this vdev. Note that we must check @@ -2127,31 +2318,22 @@ spa_vdev_remove_top(vdev_t *vd, uint64_t *txg) * The above check for sufficient free space serves this * purpose. */ - metaslab_group_t *mg = vd->vdev_mg; - metaslab_group_passivate(mg); - ASSERT(!vd->vdev_islog); - metaslab_group_passivate(vd->vdev_log_mg); + if (error == 0 && !vd->vdev_noalloc) { + set_noalloc = B_TRUE; + error = vdev_passivate(vd, txg); + } - /* - * Wait for the youngest allocations and frees to sync, - * and then wait for the deferral of those frees to finish. - */ - spa_vdev_config_exit(spa, NULL, - *txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG); - - /* - * We must ensure that no "stubby" log blocks are allocated - * on the device to be removed. These blocks could be - * written at any time, including while we are in the middle - * of copying them. - */ - error = spa_reset_logs(spa); + if (error != 0) + return (error); /* * We stop any initializing and TRIM that is currently in progress * but leave the state as "active". This will allow the process to * resume if the removal is canceled sometime later. */ + + spa_vdev_config_exit(spa, NULL, *txg, 0, FTAG); + vdev_initialize_stop_all(vd, VDEV_INITIALIZE_ACTIVE); vdev_trim_stop_all(vd, VDEV_TRIM_ACTIVE); vdev_autotrim_stop_wait(vd); @@ -2162,13 +2344,11 @@ spa_vdev_remove_top(vdev_t *vd, uint64_t *txg) * Things might have changed while the config lock was dropped * (e.g. space usage). Check for errors again. */ - if (error == 0) - error = spa_vdev_remove_top_check(vd); + error = spa_vdev_remove_top_check(vd); if (error != 0) { - metaslab_group_activate(mg); - ASSERT(!vd->vdev_islog); - metaslab_group_activate(vd->vdev_log_mg); + if (set_noalloc) + vdev_activate(vd); spa_async_request(spa, SPA_ASYNC_INITIALIZE_RESTART); spa_async_request(spa, SPA_ASYNC_TRIM_RESTART); spa_async_request(spa, SPA_ASYNC_AUTOTRIM_RESTART); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 96a021acbc..ca2da56122 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -38,7 +38,7 @@ * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. * Copyright (c) 2019 Datto Inc. * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved. - * Copyright (c) 2019, Klara Inc. + * Copyright (c) 2019, 2021, Klara Inc. * Copyright (c) 2019, Allan Jude */ @@ -2981,6 +2981,96 @@ zfs_ioc_pool_get_props(zfs_cmd_t *zc) return (error); } +/* + * innvl: { + * "vdevprops_set_vdev" -> guid + * "vdevprops_set_props" -> { prop -> value } + * } + * + * outnvl: propname -> error code (int32) + */ +static const zfs_ioc_key_t zfs_keys_vdev_set_props[] = { + {ZPOOL_VDEV_PROPS_SET_VDEV, DATA_TYPE_UINT64, 0}, + {ZPOOL_VDEV_PROPS_SET_PROPS, DATA_TYPE_NVLIST, 0} +}; + +static int +zfs_ioc_vdev_set_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) +{ + spa_t *spa; + int error; + vdev_t *vd; + uint64_t vdev_guid; + + /* Early validation */ + if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_SET_VDEV, + &vdev_guid) != 0) + return (SET_ERROR(EINVAL)); + + if (outnvl == NULL) + return (SET_ERROR(EINVAL)); + + if ((error = spa_open(poolname, &spa, FTAG)) != 0) + return (error); + + ASSERT(spa_writeable(spa)); + + if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) { + spa_close(spa, FTAG); + return (SET_ERROR(ENOENT)); + } + + error = vdev_prop_set(vd, innvl, outnvl); + + spa_close(spa, FTAG); + + return (error); +} + +/* + * innvl: { + * "vdevprops_get_vdev" -> guid + * (optional) "vdevprops_get_props" -> { propname -> propid } + * } + * + * outnvl: propname -> value + */ +static const zfs_ioc_key_t zfs_keys_vdev_get_props[] = { + {ZPOOL_VDEV_PROPS_GET_VDEV, DATA_TYPE_UINT64, 0}, + {ZPOOL_VDEV_PROPS_GET_PROPS, DATA_TYPE_NVLIST, ZK_OPTIONAL} +}; + +static int +zfs_ioc_vdev_get_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) +{ + spa_t *spa; + int error; + vdev_t *vd; + uint64_t vdev_guid; + + /* Early validation */ + if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_GET_VDEV, + &vdev_guid) != 0) + return (SET_ERROR(EINVAL)); + + if (outnvl == NULL) + return (SET_ERROR(EINVAL)); + + if ((error = spa_open(poolname, &spa, FTAG)) != 0) + return (error); + + if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) { + spa_close(spa, FTAG); + return (SET_ERROR(ENOENT)); + } + + error = vdev_prop_get(vd, innvl, outnvl); + + spa_close(spa, FTAG); + + return (error); +} + /* * inputs: * zc_name name of filesystem @@ -7107,6 +7197,16 @@ zfs_ioctl_init(void) POOL_CHECK_SUSPENDED, B_FALSE, B_TRUE, zfs_keys_get_bootenv, ARRAY_SIZE(zfs_keys_get_bootenv)); + zfs_ioctl_register("zpool_vdev_get_props", ZFS_IOC_VDEV_GET_PROPS, + zfs_ioc_vdev_get_props, zfs_secpolicy_read, POOL_NAME, + POOL_CHECK_NONE, B_FALSE, B_FALSE, zfs_keys_vdev_get_props, + ARRAY_SIZE(zfs_keys_vdev_get_props)); + + zfs_ioctl_register("zpool_vdev_set_props", ZFS_IOC_VDEV_SET_PROPS, + zfs_ioc_vdev_set_props, zfs_secpolicy_config, POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE, + zfs_keys_vdev_set_props, ARRAY_SIZE(zfs_keys_vdev_set_props)); + /* IOCTLS that use the legacy function signature */ zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, diff --git a/module/zfs/zio.c b/module/zfs/zio.c index c016fa323b..2079a1e0a9 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -3755,7 +3755,7 @@ zio_vdev_io_start(zio_t *zio) * Note: the code can handle other kinds of writes, * but we don't expect them. */ - if (zio->io_vd->vdev_removing) { + if (zio->io_vd->vdev_noalloc) { ASSERT(zio->io_flags & (ZIO_FLAG_PHYSICAL | ZIO_FLAG_SELF_HEAL | ZIO_FLAG_RESILVER | ZIO_FLAG_INDUCE_DAMAGE));