From 76e55911da8c1fa2d68277eca6b3825b270153c8 Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Wed, 27 Dec 2023 16:46:07 -0800 Subject: [PATCH] Backup allocation class vdev data to the pool This commit allows you to automatically backup allocation class vdevs to the pool. If the alloc class vdev is fully backed up, it can fail without the pool losing any data. This also means you can safely create pools with non-matching alloc class redundancy (like a mirrored pool with a single special device). It works by making sure all alloc class writes have at least two DVA copies, and then having the 2nd copy always go to the pool itself. So whenever you write to an alloc class vdev, another copy of the data is also written to the pool. This behavior is controlled via three properties: 1. feature@allow_backup_to_pool - This feature flag enables the backup subsystem. It also prevents the backed-up pool from being imported read/write on an older version of ZFS that does not support alloc class backups. 2. backup_alloc_class_to_pool - This pool property is the main on/off switch to control the backup feature. It is on by default but can be turned off at any time. Once it is turned off, then all existing vdevs will no longer considered to be fully backed up. 3. backup_to_pool - This is a read-only vdev property that will report "on" if all the data on the vdev is fully backed up to the pool. Note that the backup to pool feature is now enabled by default on all new pools. This may create a performance penalty over pure alloc class writes due to the extra backup copy write to the pool. Alloc class reads should not be affected as they always read from DVA 0 first (the copy of the data on the special device). Closes: #15118 Signed-off-by: Tony Hutter --- cmd/zpool/zpool_vdev.c | 56 +++- include/sys/fs/zfs.h | 4 + include/sys/spa.h | 3 +- include/sys/spa_impl.h | 9 + include/sys/vdev.h | 18 ++ include/sys/vdev_impl.h | 12 + include/zfeature_common.h | 1 + lib/libnvpair/libnvpair.abi | 10 +- lib/libuutil/libuutil.abi | 47 ++- lib/libzfs/libzfs.abi | 24 +- lib/libzfs/libzfs_util.c | 6 + lib/libzfs_core/libzfs_core.abi | 51 +++- lib/libzutil/zutil_import.c | 10 +- man/man7/vdevprops.7 | 19 ++ man/man7/zpool-features.7 | 40 +++ man/man7/zpoolconcepts.7 | 22 +- man/man7/zpoolprops.7 | 31 ++ module/zcommon/zfeature_common.c | 12 + module/zcommon/zpool_prop.c | 7 + module/zfs/metaslab.c | 28 +- module/zfs/spa.c | 228 +++++++++++++- module/zfs/spa_misc.c | 15 +- module/zfs/vdev.c | 284 +++++++++++++++++- module/zfs/vdev_label.c | 49 ++- module/zfs/vdev_root.c | 18 +- module/zfs/zio.c | 29 ++ tests/runfiles/common.run | 8 + tests/zfs-tests/include/libtest.shlib | 37 ++- tests/zfs-tests/tests/Makefile.am | 12 + .../alloc_class/alloc_class_001_pos.ksh | 16 +- .../alloc_class/alloc_class_002_neg.ksh | 35 ++- .../alloc_class/alloc_class_003_pos.ksh | 40 +-- .../alloc_class/alloc_class_004_pos.ksh | 48 +-- .../alloc_class/alloc_class_005_pos.ksh | 66 ++-- .../alloc_class/alloc_class_006_pos.ksh | 14 +- .../alloc_class/alloc_class_007_pos.ksh | 14 +- .../alloc_class/alloc_class_008_pos.ksh | 34 ++- .../alloc_class/alloc_class_009_pos.ksh | 58 ++-- .../alloc_class/alloc_class_010_pos.ksh | 27 +- .../alloc_class/alloc_class_011_neg.ksh | 16 +- .../alloc_class/alloc_class_012_pos.ksh | 116 +++++-- .../alloc_class/alloc_class_013_pos.ksh | 41 +-- .../alloc_class/alloc_class_014_neg.ksh | 16 +- .../alloc_class/alloc_class_015_pos.ksh | 28 +- .../backup_alloc_class/backup_alloc_class.cfg | 36 +++ .../backup_alloc_class.kshlib | 283 +++++++++++++++++ .../backup_alloc_class_add.ksh | 94 ++++++ .../backup_alloc_class_create.ksh | 86 ++++++ .../backup_alloc_class_files.ksh | 124 ++++++++ .../backup_alloc_class_import.ksh | 95 ++++++ .../backup_alloc_class_offline.ksh | 126 ++++++++ .../backup_alloc_class_prop.ksh | 98 ++++++ .../backup_alloc_class_scrub.ksh | 112 +++++++ .../backup_alloc_class_split.ksh | 101 +++++++ .../functional/backup_alloc_class/cleanup.ksh | 27 ++ .../functional/backup_alloc_class/setup.ksh | 24 ++ 56 files changed, 2567 insertions(+), 298 deletions(-) create mode 100644 tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class.cfg create mode 100644 tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class.kshlib create mode 100755 tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_add.ksh create mode 100755 tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_create.ksh create mode 100755 tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_files.ksh create mode 100755 tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_import.ksh create mode 100755 tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_offline.ksh create mode 100755 tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_prop.ksh create mode 100755 tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_scrub.ksh create mode 100755 tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_split.ksh create mode 100755 tests/zfs-tests/tests/functional/backup_alloc_class/cleanup.ksh create mode 100755 tests/zfs-tests/tests/functional/backup_alloc_class/setup.ksh diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index fbd4b81dfa..74e24f9ee8 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -480,13 +480,43 @@ is_raidz_draid(replication_level_t *a, replication_level_t *b) return (B_FALSE); } +/* + * Return true if 'props' contains either: + * + * feature@allow_backup_to_pool=disabled + * + * or + * + * backup_alloc_class_to_pool=off + */ +static boolean_t +is_backup_to_pool_disabled_in_props(nvlist_t *props) +{ + const char *str = NULL; + if (nvlist_lookup_string(props, "feature@allow_backup_to_pool", + &str) == 0) { + if ((str != NULL) && strcmp(str, "disabled") == 0) { + return (B_TRUE); /* It is disabled */ + } + } + + if (nvlist_lookup_string(props, "backup_alloc_class_to_pool", + &str) == 0) { + if ((str != NULL) && strcmp(str, "off") == 0) { + return (B_TRUE); /* It is disabled */ + } + } + + return (B_FALSE); +} + /* * Given a list of toplevel vdevs, return the current replication level. If * the config is inconsistent, then NULL is returned. If 'fatal' is set, then * an error message will be displayed for each self-inconsistent vdev. */ static replication_level_t * -get_replication(nvlist_t *nvroot, boolean_t fatal) +get_replication(nvlist_t *props, nvlist_t *nvroot, boolean_t fatal) { nvlist_t **top; uint_t t, toplevels; @@ -507,6 +537,7 @@ get_replication(nvlist_t *nvroot, boolean_t fatal) for (t = 0; t < toplevels; t++) { uint64_t is_log = B_FALSE; + const char *str = NULL; nv = top[t]; @@ -518,6 +549,21 @@ get_replication(nvlist_t *nvroot, boolean_t fatal) if (is_log) continue; + /* + * By default, all alloc class devices have their backup to pool + * props enabled, so their replication level doesn't matter. + * However, if they're disabled for any reason, then we do need + * to force redundancy. + */ + (void) nvlist_lookup_string(nv, ZPOOL_CONFIG_ALLOCATION_BIAS, + &str); + if (str && + ((strcmp(str, VDEV_ALLOC_BIAS_SPECIAL) == 0) || + (strcmp(str, VDEV_ALLOC_BIAS_DEDUP) == 0))) { + if (!is_backup_to_pool_disabled_in_props(props)) + continue; /* We're backed up, skip redundancy */ + } + /* * Ignore holes introduced by removing aux devices, along * with indirect vdevs introduced by previously removed @@ -808,7 +854,7 @@ get_replication(nvlist_t *nvroot, boolean_t fatal) * report any difference between the two. */ static int -check_replication(nvlist_t *config, nvlist_t *newroot) +check_replication(nvlist_t *props, nvlist_t *config, nvlist_t *newroot) { nvlist_t **child; uint_t children; @@ -825,7 +871,7 @@ check_replication(nvlist_t *config, nvlist_t *newroot) verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); - if ((current = get_replication(nvroot, B_FALSE)) == NULL) + if ((current = get_replication(props, nvroot, B_FALSE)) == NULL) return (0); } /* @@ -850,7 +896,7 @@ check_replication(nvlist_t *config, nvlist_t *newroot) * Get the replication level of the new vdev spec, reporting any * inconsistencies found. */ - if ((new = get_replication(newroot, B_TRUE)) == NULL) { + if ((new = get_replication(props, newroot, B_TRUE)) == NULL) { free(current); return (-1); } @@ -1888,7 +1934,7 @@ make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep, * found. We include the existing pool spec, if any, as we need to * catch changes against the existing replication level. */ - if (check_rep && check_replication(poolconfig, newroot) != 0) { + if (check_rep && check_replication(props, poolconfig, newroot) != 0) { nvlist_free(newroot); return (NULL); } diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 21f99baccc..dc2087f188 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -258,6 +258,7 @@ typedef enum { ZPOOL_PROP_BCLONEUSED, ZPOOL_PROP_BCLONESAVED, ZPOOL_PROP_BCLONERATIO, + ZPOOL_PROP_BACKUP_ALLOC_CLASS_TO_POOL, ZPOOL_NUM_PROPS } zpool_prop_t; @@ -368,6 +369,7 @@ typedef enum { VDEV_PROP_RAIDZ_EXPANDING, VDEV_PROP_SLOW_IO_N, VDEV_PROP_SLOW_IO_T, + VDEV_PROP_BACKUP_TO_POOL, VDEV_NUM_PROPS } vdev_prop_t; @@ -845,6 +847,7 @@ typedef struct zpool_load_policy { #define ZPOOL_CONFIG_EXPANSION_TIME "expansion_time" /* not stored */ #define ZPOOL_CONFIG_REBUILD_STATS "org.openzfs:rebuild_stats" #define ZPOOL_CONFIG_COMPATIBILITY "compatibility" +#define ZPOOL_CONFIG_BACKUP_TO_POOL "backup_to_pool" /* * The persistent vdev state is stored as separate values rather than a single @@ -1604,6 +1607,7 @@ typedef enum { ZFS_ERR_CRYPTO_NOTSUP, ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS, ZFS_ERR_ASHIFT_MISMATCH, + ZFS_ERR_BACKUP_DISABLED_BUT_REQUESTED, } zfs_errno_t; /* diff --git a/include/sys/spa.h b/include/sys/spa.h index b969f05afe..6405b04965 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -1113,7 +1113,8 @@ extern boolean_t spa_remap_blkptr(spa_t *spa, blkptr_t *bp, extern uint64_t spa_get_last_removal_txg(spa_t *spa); extern boolean_t spa_trust_config(spa_t *spa); extern uint64_t spa_missing_tvds_allowed(spa_t *spa); -extern void spa_set_missing_tvds(spa_t *spa, uint64_t missing); +extern void spa_set_missing_tvds(spa_t *spa, uint64_t missing, + uint64_t missing_special); extern boolean_t spa_top_vdevs_spacemap_addressable(spa_t *spa); extern uint64_t spa_total_metaslabs(spa_t *spa); extern boolean_t spa_multihost(spa_t *spa); diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 0cd0c4720f..39643256f9 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -327,6 +327,12 @@ struct spa { uint64_t spa_missing_tvds; /* unopenable tvds on load */ uint64_t spa_missing_tvds_allowed; /* allow loading spa? */ + /* + * number of 'spa_missing_tvds' that are alloc class devices + * backed up to the pool, and thus recoverable from errors. + */ + uint64_t spa_missing_recovered_tvds; + uint64_t spa_nonallocating_dspace; spa_removing_phys_t spa_removing_phys; spa_vdev_removal_t *spa_vdev_removal; @@ -465,6 +471,9 @@ struct spa { */ spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */ zfs_refcount_t spa_refcount; /* number of opens */ + + /* Backup special/dedup devices data to the pool */ + boolean_t spa_backup_alloc_class; }; extern char *spa_config_path; diff --git a/include/sys/vdev.h b/include/sys/vdev.h index 38f62b07dc..b6401a5f7b 100644 --- a/include/sys/vdev.h +++ b/include/sys/vdev.h @@ -172,6 +172,24 @@ extern uint32_t vdev_queue_length(vdev_t *vd); extern uint64_t vdev_queue_last_offset(vdev_t *vd); extern uint64_t vdev_queue_class_length(vdev_t *vq, zio_priority_t p); +typedef enum { + /* (special flag) dry-run, get count only */ + VDEV_ARRAY_COUNT = 1ULL << 0, + + VDEV_ARRAY_ANY_LEAF = 1ULL << 1, /* match any leaf */ + VDEV_ARRAY_SPECIAL_LEAF = 1ULL << 2, /* match special vdev leaves */ + VDEV_ARRAY_DEDUP_LEAF = 1ULL << 3, /* match dedup vdev leaves */ +} vdev_array_flag_t; + +struct vdev_array +{ + vdev_t **vds; /* Array of vdev_t's */ + int count; +}; + +extern struct vdev_array *vdev_array_alloc(vdev_t *rvd, uint64_t flags); +extern void vdev_array_free(struct vdev_array *vda); + extern void vdev_config_dirty(vdev_t *vd); extern void vdev_config_clean(vdev_t *vd); extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg); diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 2a93f7c680..68aa3ae7da 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -284,6 +284,13 @@ struct vdev { uint64_t vdev_failfast; /* device failfast setting */ boolean_t vdev_rz_expanding; /* raidz is being expanded? */ boolean_t vdev_ishole; /* is a hole in the namespace */ + + /* + * If this is set to true, then all the data on this vdev is backed up + * to the pool. This is only used by allocation class devices. + */ + boolean_t vdev_backup_to_pool; + uint64_t vdev_top_zap; vdev_alloc_bias_t vdev_alloc_bias; /* metaslab allocation bias */ @@ -641,6 +648,11 @@ extern int vdev_obsolete_counts_are_precise(vdev_t *vd, boolean_t *are_precise); int vdev_checkpoint_sm_object(vdev_t *vd, uint64_t *sm_obj); void vdev_metaslab_group_create(vdev_t *vd); uint64_t vdev_best_ashift(uint64_t logical, uint64_t a, uint64_t b); +extern boolean_t vdev_is_fully_backed_up(vdev_t *vd); +extern boolean_t vdev_is_leaf(vdev_t *vd); +extern boolean_t vdev_is_special(vdev_t *vd); +extern boolean_t vdev_is_dedup(vdev_t *vd); +extern boolean_t vdev_is_alloc_class(vdev_t *vd); /* * Vdev ashift optimization tunables diff --git a/include/zfeature_common.h b/include/zfeature_common.h index 2515ba3217..5d878f8783 100644 --- a/include/zfeature_common.h +++ b/include/zfeature_common.h @@ -82,6 +82,7 @@ typedef enum spa_feature { SPA_FEATURE_AVZ_V2, SPA_FEATURE_REDACTION_LIST_SPILL, SPA_FEATURE_RAIDZ_EXPANSION, + SPA_FEATURE_ALLOW_BACKUP_TO_POOL, SPA_FEATURES } spa_feature_t; diff --git a/lib/libnvpair/libnvpair.abi b/lib/libnvpair/libnvpair.abi index ef92f3e9bd..b99a0d6a33 100644 --- a/lib/libnvpair/libnvpair.abi +++ b/lib/libnvpair/libnvpair.abi @@ -1156,6 +1156,11 @@ + + + + + @@ -2536,11 +2541,6 @@ - - - - - diff --git a/lib/libuutil/libuutil.abi b/lib/libuutil/libuutil.abi index e942d24c65..620f384d8f 100644 --- a/lib/libuutil/libuutil.abi +++ b/lib/libuutil/libuutil.abi @@ -596,14 +596,11 @@ - + - - - - + @@ -800,9 +797,16 @@ + + + + + + + @@ -912,6 +916,25 @@ + + + + + + + + + + + + + + + + + + + @@ -920,12 +943,23 @@ + + + + + + + + + + + @@ -937,8 +971,9 @@ - + + diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 2bbaae6345..12f0b01a70 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -606,7 +606,7 @@ - + @@ -2895,7 +2895,8 @@ - + + @@ -5675,7 +5676,8 @@ - + + @@ -5936,7 +5938,8 @@ - + + @@ -6251,6 +6254,11 @@ + + + + + @@ -6362,7 +6370,7 @@ - + @@ -8987,8 +8995,8 @@ - - + + @@ -9065,7 +9073,7 @@ - + diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index 73ae0950cc..02b7add294 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -774,6 +774,12 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) case ZFS_ERR_ASHIFT_MISMATCH: zfs_verror(hdl, EZFS_ASHIFT_MISMATCH, fmt, ap); break; + case ZFS_ERR_BACKUP_DISABLED_BUT_REQUESTED: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Cannot enable backup to pool since " + "feature@allow_backup_to_pool is not active.")); + zfs_verror(hdl, EZFS_IOC_NOTSUPPORTED, fmt, ap); + break; default: zfs_error_aux(hdl, "%s", zfs_strerror(error)); zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap); diff --git a/lib/libzfs_core/libzfs_core.abi b/lib/libzfs_core/libzfs_core.abi index 5b95c8f779..ab7231971c 100644 --- a/lib/libzfs_core/libzfs_core.abi +++ b/lib/libzfs_core/libzfs_core.abi @@ -594,14 +594,11 @@ - + - - - - + @@ -770,6 +767,13 @@ + + + + + + + @@ -873,12 +877,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -895,8 +929,9 @@ - + + @@ -1119,7 +1154,7 @@ - + @@ -1127,7 +1162,7 @@ - + diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c index 06705ff4d9..6e349920d2 100644 --- a/lib/libzutil/zutil_import.c +++ b/lib/libzutil/zutil_import.c @@ -1924,7 +1924,7 @@ zpool_find_config(libpc_handle_t *hdl, const char *target, nvlist_t **configp, /* Return if a vdev is a leaf vdev. Note: draid spares are leaf vdevs. */ static boolean_t -vdev_is_leaf(nvlist_t *nv) +vdev_is_leaf_nv(nvlist_t *nv) { uint_t children = 0; nvlist_t **child; @@ -1937,10 +1937,10 @@ vdev_is_leaf(nvlist_t *nv) /* Return if a vdev is a leaf vdev and a real device (disk or file) */ static boolean_t -vdev_is_real_leaf(nvlist_t *nv) +vdev_is_real_leaf_nv(nvlist_t *nv) { const char *type = NULL; - if (!vdev_is_leaf(nv)) + if (!vdev_is_leaf_nv(nv)) return (B_FALSE); (void) nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type); @@ -1973,7 +1973,7 @@ __for_each_vdev_macro_helper_func(void *state, nvlist_t *nv, void *last_nv, /* The very first entry in the NV list is a special case */ if (*((nvlist_t **)state) == (nvlist_t *)FIRST_NV) { - if (real_leaves_only && !vdev_is_real_leaf(nv)) + if (real_leaves_only && !vdev_is_real_leaf_nv(nv)) return (0); *((nvlist_t **)last_nv) = nv; @@ -1996,7 +1996,7 @@ __for_each_vdev_macro_helper_func(void *state, nvlist_t *nv, void *last_nv, * we want. */ if (*(nvlist_t **)state == (nvlist_t *)NEXT_IS_MATCH) { - if (real_leaves_only && !vdev_is_real_leaf(nv)) + if (real_leaves_only && !vdev_is_real_leaf_nv(nv)) return (0); *((nvlist_t **)last_nv) = nv; diff --git a/man/man7/vdevprops.7 b/man/man7/vdevprops.7 index 5ec37df179..9cfac09f00 100644 --- a/man/man7/vdevprops.7 +++ b/man/man7/vdevprops.7 @@ -148,6 +148,25 @@ If this device should perform new allocations, used to disable a device when it is scheduled for later removal. See .Xr zpool-remove 8 . +.It Sy backup_to_pool +When +.Sy backup_to_pool +is "on" it means the vdev is fully backed up to the pool. +That is, there is an extra copy of all the vdev's data on the pool itself. +This allows vdevs with +.Sy backup_to_pool=on +to fail without losing data, regardless +of their redundancy level. +.Sy backup_to_pool +is only used for alloc class devices +(special and dedup) and is controlled by the +.Sy feature@allow_backup_to_pool +feature flag and +.Sy backup_alloc_class_to_pool +pool property. +The +.Sy backup_to_pool +vdev property is read-only. .El .Ss User Properties In addition to the standard native properties, ZFS supports arbitrary user diff --git a/man/man7/zpool-features.7 b/man/man7/zpool-features.7 index ea3c68dc60..a1f6921239 100644 --- a/man/man7/zpool-features.7 +++ b/man/man7/zpool-features.7 @@ -322,6 +322,46 @@ With device removal, it can be returned to the .Sy enabled state if all the dedicated allocation class vdevs are removed. . +.feature org.zfsonlinux allow_backup_to_pool yes allocation_classes +This feature allows the +.Sy backup_alloc_class_to_pool +pool property to be used. +When the +.Sy backup_alloc_class_to_pool +pool property is set to "on" all proceeding writes to allocation class vdevs +(like special and dedup vdevs) will also generate an additional copy of the data +to be written to the pool. +This allows alloc class vdev data to be "backed up" to the pool. +A fully backed up allocation device vdev can fail without causing the pool to be +suspended, even if the alloc class device is not redundant. +.Pp +It is important to note the difference between the +.Sy allow_backup_to_pool +feature flag and a +.Sy backup_alloc_class_to_pool +pool property since they appear similar. +The +.Sy allow_backup_to_pool +feature flag is a safeguard to prevent a pool that is backed up from being +imported read/write on an older version of ZFS that does not support backup to +pool (and possibly compromising the integrity of the backup guarantees). +The pool property is what actually allows you to turn on/off the backup copy +writes. +You can think of it as if the +.Sy allow_backup_to_pool +feature "unlocks" the +.Sy backup_alloc_class_to_pool +pool property. +See the +.Sy backup_alloc_class_to_pool +pool property and +.Sy backup_to_pool +vdev property for more details. +.Pp +This feature becomes +.Sy active +by default on new pools (unless explicitly disabled at zpool creation time). +. .feature com.delphix async_destroy yes Destroying a file system requires traversing all of its data in order to return its used space to the pool. diff --git a/man/man7/zpoolconcepts.7 b/man/man7/zpoolconcepts.7 index 18dfca6dc8..804ba6a086 100644 --- a/man/man7/zpoolconcepts.7 +++ b/man/man7/zpoolconcepts.7 @@ -180,17 +180,31 @@ For more information, see the section. .It Sy dedup A device solely dedicated for deduplication tables. -The redundancy of this device should match the redundancy of the other normal -devices in the pool. If more than one dedup device is specified, then allocations are load-balanced between those devices. +The dedup vdevs only need to match the redundancy level of the normal devices +if they are not being backed-up to the pool (backed-up is the default). +See the +.Sy feature@allow_backup_to_pool +feature flag, +.Sy backup_alloc_class_to_pool +pool property and +.Sy backup_to_pool +vdev property for more details. .It Sy special A device dedicated solely for allocating various kinds of internal metadata, and optionally small file blocks. -The redundancy of this device should match the redundancy of the other normal -devices in the pool. If more than one special device is specified, then allocations are load-balanced between those devices. +The special vdevs only need to match the redundancy level of the normal devices +if they are not being backed-up to the pool (backed-up is the default). +See the +.Sy feature@allow_backup_to_pool +feature flag, +.Sy backup_alloc_class_to_pool +pool property and +.Sy backup_to_pool +vdev property for more details. .Pp For more information on special allocations, see the .Sx Special Allocation Class diff --git a/man/man7/zpoolprops.7 b/man/man7/zpoolprops.7 index 5428ab8d30..0792cc5307 100644 --- a/man/man7/zpoolprops.7 +++ b/man/man7/zpoolprops.7 @@ -437,6 +437,37 @@ command, though this property can be used when a specific version is needed for backwards compatibility. Once feature flags are enabled on a pool this property will no longer have a value. +.It Sy backup_alloc_class_to_pool Ns = Ns Sy on Ns | Ns Sy off +When set to "on" all proceeding writes to allocation class vdevs (like special +and dedup vdevs) will also write an additional copy of the data to the pool. +This allows alloc class vdev data to be "backed up" to the pool. +If an alloc class vdev has all of its data backed up to the pool, then the vdev +will be considered "fully backed up" and will have the +.Sy backup_to_pool +vdev property set to "on". +Fully backed up alloc class vdevs can fail regardless of their redundancy level +without the pool losing data. +If +.Sy backup_alloc_class_to_pool +is set to "off" then all alloc class vdevs will no longer be considered fully +backed up, and will have their +.Sy backup_to_pool +vdev property automatically +set to "off". +If +.Sy backup_alloc_class_to_pool +is set to "on" after being set to "off", the alloc class data writes will still +write an extra copy of the data to the pool, and new top-level alloc class +vdevs added after that point will be fully backed up, but the older alloc class +vdevs will remain not fully backed up. +Essentially, an alloc class vdev must have had +.Sy backup_alloc_class_to_pool +set to "on" for the entirety of its lifetime to be considered fully backed up. +Note that the +.Sy feature@allow_backup_to_pool +feature flag must be active in order to use the +.Sy backup_alloc_class_to_pool +pool property. .El . .Ss User Properties diff --git a/module/zcommon/zfeature_common.c b/module/zcommon/zfeature_common.c index 309d9bf14c..1ccefc4b40 100644 --- a/module/zcommon/zfeature_common.c +++ b/module/zcommon/zfeature_common.c @@ -753,6 +753,18 @@ zpool_feature_init(void) "org.openzfs:raidz_expansion", "raidz_expansion", "Support for raidz expansion", ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures); + { + static const spa_feature_t allow_backup_to_pool_deps[] = { + SPA_FEATURE_ALLOCATION_CLASSES, + SPA_FEATURE_NONE + }; + zfeature_register(SPA_FEATURE_ALLOW_BACKUP_TO_POOL, + "org.openzfs:allow_backup_to_pool", "allow_backup_to_pool", + "Allow backing up allocation class device data to pool", + ZFEATURE_FLAG_MOS | ZFEATURE_FLAG_ACTIVATE_ON_ENABLE, + ZFEATURE_TYPE_BOOLEAN, allow_backup_to_pool_deps, + sfeatures); + } zfs_mod_list_supported_free(sfeatures); } diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index e2e3bf5be6..b2dd02bf26 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -153,6 +153,10 @@ zpool_prop_init(void) zprop_register_index(ZPOOL_PROP_MULTIHOST, "multihost", 0, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "MULTIHOST", boolean_table, sfeatures); + zprop_register_index(ZPOOL_PROP_BACKUP_ALLOC_CLASS_TO_POOL, + "backup_alloc_class_to_pool", 1, PROP_DEFAULT, ZFS_TYPE_POOL, + "on | off", "BACKUP_ALLOC_CLASS_TO_POOL", boolean_table, + sfeatures); /* default index properties */ zprop_register_index(ZPOOL_PROP_FAILUREMODE, "failmode", @@ -448,6 +452,9 @@ vdev_prop_init(void) zprop_register_index(VDEV_PROP_RAIDZ_EXPANDING, "raidz_expanding", 0, PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "RAIDZ_EXPANDING", boolean_table, sfeatures); + zprop_register_index(VDEV_PROP_BACKUP_TO_POOL, "backup_to_pool", B_TRUE, + PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "BACKUP_TO_POOL", + boolean_table, sfeatures); /* default index properties */ zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE, diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index c4aa98ced4..4ed28266aa 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -5845,10 +5845,22 @@ metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp, dva_t *dva = bp->blk_dva; dva_t *hintdva = (hintbp != NULL) ? hintbp->blk_dva : NULL; int error = 0; + boolean_t is_backup_alloc_class = B_FALSE; + + if ((spa->spa_backup_alloc_class && ((mc == spa_special_class(spa)) || + (mc == spa_dedup_class(spa))))) { + is_backup_alloc_class = B_TRUE; + } ASSERT0(BP_GET_LOGICAL_BIRTH(bp)); ASSERT0(BP_GET_PHYSICAL_BIRTH(bp)); + /* + * Earlier layers of the code should set nvdas > 1 if the + * alloc class vdev is being backed up. + */ + ASSERT(!(is_backup_alloc_class && ndvas == 1)); + spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER); if (mc->mc_allocator[allocator].mca_rotor == NULL) { @@ -5863,7 +5875,21 @@ metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp, ASSERT3P(zal, !=, NULL); for (int d = 0; d < ndvas; d++) { - error = metaslab_alloc_dva(spa, mc, psize, dva, d, hintdva, + metaslab_class_t *_mc; + if (is_backup_alloc_class && (d == 1)) { + /* + * If we have the backup to pool props set, then make + * the 2nd copy of the data we are going to write go to + * the regular pool rather than yet another copy to the + * alloc class device. That way, if the special device + * is lost, there's still a backup in the pool. + */ + _mc = spa_normal_class(spa); + } else { + _mc = mc; + } + + error = metaslab_alloc_dva(spa, _mc, psize, dva, d, hintdva, txg, flags, zal, allocator); if (error != 0) { for (d--; d >= 0; d--) { diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 3704ffd088..daa02266c9 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -471,6 +471,15 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp) DNODE_MIN_SIZE, ZPROP_SRC_NONE); } + if (spa_feature_is_active(spa, SPA_FEATURE_ALLOW_BACKUP_TO_POOL)) { + spa_prop_add_list(*nvp, ZPOOL_PROP_BACKUP_ALLOC_CLASS_TO_POOL, + NULL, spa->spa_backup_alloc_class, ZPROP_SRC_NONE); + } else { + /* Feature not active, turn off backup to pool */ + spa_prop_add_list(*nvp, ZPOOL_PROP_BACKUP_ALLOC_CLASS_TO_POOL, + NULL, B_FALSE, ZPROP_SRC_NONE); + } + if ((dp = list_head(&spa->spa_config_list)) != NULL) { if (dp->scd_path == NULL) { spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, @@ -604,6 +613,8 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) int error = 0, reset_bootfs = 0; uint64_t objnum = 0; boolean_t has_feature = B_FALSE; + boolean_t allow_backup_to_pool = B_FALSE; + boolean_t backup_alloc_class_to_pool = B_FALSE; elem = NULL; while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { @@ -611,6 +622,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) const char *strval, *slash, *check, *fname; const char *propname = nvpair_name(elem); zpool_prop_t prop = zpool_name_to_prop(propname); + spa_feature_t fid = 0; switch (prop) { case ZPOOL_PROP_INVAL: @@ -645,11 +657,30 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) } fname = strchr(propname, '@') + 1; - if (zfeature_lookup_name(fname, NULL) != 0) { + if (zfeature_lookup_name(fname, &fid) != 0) { error = SET_ERROR(EINVAL); break; } + /* + * Special case - If both: + * + * SPA_FEATURE_ALLOW_BACKUP_TO_POOL = disabled + * + * ... and ... + * + * ZPOOL_PROP_BACKUP_ALLOC_CLASS_TO_POOL = on + * + * then we need to fail. Note that the presence + * of SPA_FEATURE_ALLOW_BACKUP_TO_POOL in the + * nvlist means it is enabled (although its + * intval will be 0). If it's disabled, then + * SPA_FEATURE_ALLOW_BACKUP_TO_POOL will not + * be in the nvlist at all. + */ + if (fid == SPA_FEATURE_ALLOW_BACKUP_TO_POOL) { + allow_backup_to_pool = B_TRUE; + } has_feature = B_TRUE; } else { error = SET_ERROR(EINVAL); @@ -793,6 +824,15 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) if (strlen(strval) > ZPROP_MAX_COMMENT) error = SET_ERROR(E2BIG); break; + case ZPOOL_PROP_BACKUP_ALLOC_CLASS_TO_POOL: + error = nvpair_value_uint64(elem, &intval); + if (!error && intval > 1) + error = SET_ERROR(EINVAL); + + if (intval == 1) { + backup_alloc_class_to_pool = B_TRUE; + } + break; default: break; @@ -805,6 +845,18 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) (void) nvlist_remove_all(props, zpool_prop_to_name(ZPOOL_PROP_DEDUPDITTO)); + if (spa_feature_is_active(spa, SPA_FEATURE_ALLOW_BACKUP_TO_POOL)) { + allow_backup_to_pool = B_TRUE; + } + + if (!allow_backup_to_pool && backup_alloc_class_to_pool) { + /* + * We can't enable pool props BACKUP_ALLOC_CLASS_TO_POOL if the + * feature flag SPA_FEATURE_ALLOW_BACKUP_TO_POOL is disabled. + */ + error = SET_ERROR(ZFS_ERR_BACKUP_DISABLED_BUT_REQUESTED); + } + if (!error && reset_bootfs) { error = nvlist_remove(props, zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); @@ -2485,6 +2537,52 @@ spa_check_removed(vdev_t *vd) } } +/* + * Decide what to do if we have missing/corrupted alloc class devices. + * + * If we have missing top-level vdevs and they are all alloc class devices with + * backup_to_pool set, then we may still be able to import the pool. + */ +static int +spa_check_for_bad_alloc_class_devices(spa_t *spa) +{ + if (spa->spa_missing_recovered_tvds == 0) + return (0); + + /* + * Are there missing alloc class devices but + * SPA_FEATURE_ALLOW_BACKUP_TO_POOL is not enabled? If so, + * then we can't import. + */ + if (!spa_feature_is_active(spa, SPA_FEATURE_ALLOW_BACKUP_TO_POOL)) { + spa_load_note(spa, "some alloc class devices are missing, " + "cannot import."); + return (SET_ERROR(ENXIO)); + } + + /* + * If all the missing top-level devices are alloc class devices, and + * if they have all their data backed up to the pool, then we can still + * import the pool. + */ + if (spa->spa_missing_tvds > 0 && + spa->spa_missing_tvds == spa->spa_missing_recovered_tvds) { + spa_load_note(spa, "only alloc class devices are missing, and " + "the normal pool has copies of the alloc class data, so " + "it's still possible to import."); + return (0); + } + + /* + * If we're here, then it means that not all the missing top-level vdevs + * were alloc class devices. This should have been caught earlier. + */ + spa_load_note(spa, "some alloc class devices that are not backed up to " + "the pool are amongst those that are missing, cannot import"); + + return (SET_ERROR(ENXIO)); +} + static int spa_check_for_missing_logs(spa_t *spa) { @@ -3914,7 +4012,24 @@ spa_ld_open_vdevs(spa_t *spa) error = vdev_open(spa->spa_root_vdev); spa_config_exit(spa, SCL_ALL, FTAG); - if (spa->spa_missing_tvds != 0) { + if (spa->spa_missing_tvds != 0 && + spa->spa_missing_tvds == spa->spa_missing_recovered_tvds && + (error == 0 || error == ENOENT)) { + /* + * Special case: If all the missing top-level vdevs are special + * devices, we may or may not be able to import the pool, + * depending on if the relevant "backup to pool" feature and + * properties are set. At this early stage of import we do not + * have the feature flags loaded yet, so for now proceed + * with the import. We will do the backup checks later after + * the feature flags are loaded. + */ + spa_load_note(spa, "vdev tree has %lld missing special " + "top-level vdevs. Keep importing for now until we " + "can check the feature flags.", + (u_longlong_t)spa->spa_missing_tvds); + error = 0; + } else if (spa->spa_missing_tvds != 0) { spa_load_note(spa, "vdev tree has %lld missing top-level " "vdevs.", (u_longlong_t)spa->spa_missing_tvds); if (spa->spa_trust_config && (spa->spa_mode & SPA_MODE_WRITE)) { @@ -5337,6 +5452,13 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) if (error != 0) return (error); + spa_import_progress_set_notes(spa, "Checking for bad alloc class " + "devices"); + spa_check_for_bad_alloc_class_devices(spa); + if (error != 0) + return (error); + + spa_import_progress_set_notes(spa, "Loading dedup tables"); error = spa_ld_load_dedup_tables(spa); if (error != 0) @@ -6240,6 +6362,47 @@ spa_create_check_encryption_params(dsl_crypto_params_t *dcp, return (dmu_objset_create_crypt_check(NULL, dcp, NULL)); } +/* + * For each special or dedup vdev, disable backing up its data to the pool. + * + * Return 0 on success, non-zero otherwise. + */ +static int +spa_disable_alloc_class_backup(spa_t *spa) +{ + struct vdev_array *vda; + int rc; + + /* + * TODO: I don't know what locks are required here + * + * I need to iterate over the vdev tree and write + * vd->vdev_backup_to_pool. + * + * Take more locks than I need to just to be sure. + */ + int locks = SCL_CONFIG | SCL_STATE | SCL_VDEV; + + spa_config_enter(spa, locks, FTAG, RW_READER); + + /* Get an array of alloc class vdev_t's */ + vda = vdev_array_alloc(spa->spa_root_vdev, VDEV_ARRAY_SPECIAL_LEAF | + VDEV_ARRAY_DEDUP_LEAF); + if (vda == NULL) { + spa_config_exit(spa, locks, FTAG); + return (-1); + } + + for (int i = 0; i < vda->count; i++) { + vda->vds[i]->vdev_backup_to_pool = B_FALSE; + } + + spa_config_exit(spa, locks, FTAG); + + vdev_array_free(vda); + return (rc); +} + /* * Pool Creation */ @@ -6521,11 +6684,40 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, spa->spa_multihost = zpool_prop_default_numeric(ZPOOL_PROP_MULTIHOST); spa->spa_autotrim = zpool_prop_default_numeric(ZPOOL_PROP_AUTOTRIM); + /* + * Set initial backup settings. These may change after the nvlist + * properties are processed a little later in spa_sync_props(). + */ + spa->spa_backup_alloc_class = (boolean_t) + zpool_prop_default_numeric(ZPOOL_PROP_BACKUP_ALLOC_CLASS_TO_POOL); + if (props != NULL) { spa_configfile_set(spa, props, B_FALSE); spa_sync_props(props, tx); } + /* + * At this point in the code, the pool features are loaded and + * we can query them. If SPA_FEATURE_ALLOW_BACKUP_TO_POOL is disabled, + * then disable the pool prop. + */ + if (!spa_feature_is_active(spa, SPA_FEATURE_ALLOW_BACKUP_TO_POOL)) { + spa->spa_backup_alloc_class = B_FALSE; + } + + /* + * We now have the spa->spa_backup_alloc_class correctly set. + * + * Unfortunately, our vdev's vd->vdev_backup_to_pool values were + * already set earlier in spa_config_parse(). We need to update + * these vdev values to reflect our pool backup settings. + * + * Make things right by setting the vd->backup_to_pool to the correct + * value on all the alloc class vdevs. + */ + if (!spa->spa_backup_alloc_class) + spa_disable_alloc_class_backup(spa); + for (int i = 0; i < ndraid; i++) spa_feature_incr(spa, SPA_FEATURE_DRAID, tx); @@ -7353,6 +7545,14 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing, return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); } + /* + * Our new/replaced alloc class vdev's backup setting should inherit + * the current pool property. + */ + if (vdev_is_leaf(oldvd) && vdev_is_alloc_class(oldvd)) { + newvd->vdev_backup_to_pool = spa->spa_backup_alloc_class; + } + /* * A dRAID spare can only replace a child of its parent dRAID vdev. */ @@ -9381,6 +9581,7 @@ spa_sync_props(void *arg, dmu_tx_t *tx) const char *elemname = nvpair_name(elem); zprop_type_t proptype; spa_feature_t fid; + boolean_t boolval; switch (prop = zpool_name_to_prop(elemname)) { case ZPOOL_PROP_VERSION: @@ -9444,6 +9645,29 @@ spa_sync_props(void *arg, dmu_tx_t *tx) "%s=%s", nvpair_name(elem), strval); break; + case ZPOOL_PROP_BACKUP_ALLOC_CLASS_TO_POOL: + boolval = (boolean_t)fnvpair_value_uint64(elem); + + /* + * If we're disabling backup, then mark all the alloc + * class vdevs as not fully backed-up anymore. + */ + spa->spa_backup_alloc_class = boolval; + if (boolval == B_FALSE) + spa_disable_alloc_class_backup(spa); + + /* + * Dirty the configuration on vdevs as above. + */ + if (tx->tx_txg != TXG_INITIAL) { + vdev_config_dirty(spa->spa_root_vdev); + spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); + } + + spa_history_log_internal(spa, "set", tx, + "%s=%s", nvpair_name(elem), boolval ? "on" : "off"); + break; + case ZPOOL_PROP_INVAL: if (zpool_prop_feature(elemname)) { fname = strchr(elemname, '@') + 1; diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 68b9076141..36bdaaada7 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -721,6 +721,9 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms); spa->spa_deadman_ziotime = MSEC2NSEC(zfs_deadman_ziotime_ms); + + spa->spa_backup_alloc_class = B_TRUE; + spa_set_deadman_failmode(spa, zfs_deadman_failmode); spa_set_allocator(spa, zfs_active_allocator); @@ -2812,10 +2815,20 @@ spa_syncing_log_sm(spa_t *spa) return (spa->spa_syncing_log_sm); } +/* + * Record the total number of missing top-level vdevs ('missing'), and the + * number of missing top-level vdevs that are recoverable ('missing_recovered'). + * In this case, missing_recovered is the number of top-level alloc class vdevs + * that are fully backed up to the pool, and thus their data is recoverable. + * + * The separete 'missing_recovered' count is used during pool import to + * determine if we can import a pool with missing alloc class vdevs. + */ void -spa_set_missing_tvds(spa_t *spa, uint64_t missing) +spa_set_missing_tvds(spa_t *spa, uint64_t missing, uint64_t missing_recovered) { spa->spa_missing_tvds = missing; + spa->spa_missing_recovered_tvds = missing_recovered; } /* diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index ebba453e2b..86e6ca484d 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -722,6 +722,60 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) return (vd); } +boolean_t +vdev_is_leaf(vdev_t *vd) +{ + return (vd->vdev_children == 0); +} + +/* Return true if vdev or TLD vdev is special alloc class */ +boolean_t +vdev_is_special(vdev_t *vd) +{ + if (vd->vdev_alloc_bias == VDEV_BIAS_SPECIAL) + return (B_TRUE); + + /* + * If the vdev is a leaf vdev, and is part of a mirror, its parent + * 'mirror' TLD will have vdev_alloc_bias == VDEV_BIAS_SPECIAL, but the + * leaf vdev itself will not. So we also need to check the parent + * in those cases. + */ + if (vdev_is_leaf(vd) && + (vd->vdev_parent != NULL && vdev_is_special(vd->vdev_parent))) { + return (B_TRUE); + } + + return (B_FALSE); +} + +/* Return true if vdev or TLD vdev is dedup alloc class */ +boolean_t +vdev_is_dedup(vdev_t *vd) +{ + if (vd->vdev_alloc_bias == VDEV_BIAS_DEDUP) + return (B_TRUE); + + /* + * If the vdev is a leaf vdev, and is part of a mirror, it's parent + * 'mirror' TLD will have vdev_alloc_bias == VDEV_BIAS_DEDUP, but the + * leaf vdev itself will not. So we also need to check the parent + * in those cases. + */ + if (vdev_is_leaf(vd) && + (vd->vdev_parent != NULL && vdev_is_dedup(vd->vdev_parent))) { + return (B_TRUE); + } + + return (B_FALSE); +} + +boolean_t +vdev_is_alloc_class(vdev_t *vd) +{ + return (vdev_is_special(vd) || vdev_is_dedup(vd)); +} + /* * Allocate a new vdev. The 'alloctype' is used to control whether we are * creating a new vdev or loading an existing one - the behavior is slightly @@ -740,6 +794,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, int rc; vdev_alloc_bias_t alloc_bias = VDEV_BIAS_NONE; boolean_t top_level = (parent && !parent->vdev_parent); + const char *bias = NULL; ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); @@ -791,8 +846,6 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, return (SET_ERROR(ENOTSUP)); if (top_level && alloctype == VDEV_ALLOC_ADD) { - const char *bias; - /* * If creating a top-level vdev, check for allocation * classes input. @@ -834,6 +887,11 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, vd->vdev_tsd = tsd; vd->vdev_islog = islog; + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_ALLOCATION_BIAS, + &bias) == 0) { + alloc_bias = vdev_derive_alloc_bias(bias); + } + if (top_level && alloc_bias != VDEV_BIAS_NONE) vd->vdev_alloc_bias = alloc_bias; @@ -1028,6 +1086,40 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, */ vdev_add_child(parent, vd); + /* + * Now that we're added to our parent, we can lookup if we're an alloc + * class device. Functions like vdev_is_special() and vdev_is_dedup() + * will look at the parent. + */ + vd->vdev_backup_to_pool = B_FALSE; + if (vdev_is_alloc_class(vd)) { + if (alloctype == VDEV_ALLOC_LOAD || + alloctype == VDEV_ALLOC_SPLIT) { + /* + * If ZPOOL_CONFIG_BACKUP_TO_POOL exists then + * vdev_backup_to_pool is true + */ + if (nvlist_lookup_boolean(nv, + ZPOOL_CONFIG_BACKUP_TO_POOL) == 0) { + vd->vdev_backup_to_pool = B_TRUE; + } + } else if (alloctype == VDEV_ALLOC_ADD) { + vd->vdev_backup_to_pool = spa->spa_backup_alloc_class; + } + } else if ((nvlist_lookup_boolean(nv, + ZPOOL_CONFIG_BACKUP_TO_POOL) == 0) && + alloctype == VDEV_ALLOC_SPLIT) { + /* + * Special case: our vd may not be marked as alloc class if + * it's in a mirror and its parent 'mirror-1' device is not + * initialized fully (as in the case of a split). If the user + * is doing a split, and the old vdev had + * ZPOOL_CONFIG_BACKUP_TO_POOL set, then also set it for the + * new vdev. + */ + vd->vdev_backup_to_pool = B_TRUE; + } + *vdp = vd; return (0); @@ -3680,8 +3772,9 @@ vdev_load(vdev_t *vd) VDEV_TOP_ZAP_ALLOCATION_BIAS, 1, sizeof (bias_str), bias_str); if (error == 0) { - ASSERT(vd->vdev_alloc_bias == VDEV_BIAS_NONE); - vd->vdev_alloc_bias = vdev_derive_alloc_bias(bias_str); + if (vd->vdev_alloc_bias == VDEV_BIAS_NONE) + vd->vdev_alloc_bias = + vdev_derive_alloc_bias(bias_str); } else if (error != ENOENT) { vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, VDEV_AUX_CORRUPT_DATA); @@ -4140,7 +4233,8 @@ vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux) * If this device has the only valid copy of the data, then * back off and simply mark the vdev as degraded instead. */ - if (!tvd->vdev_islog && vd->vdev_aux == NULL && vdev_dtl_required(vd)) { + if (!tvd->vdev_islog && !vdev_is_fully_backed_up(vd) && + vd->vdev_aux == NULL && vdev_dtl_required(vd)) { vd->vdev_degraded = 1ULL; vd->vdev_faulted = 0ULL; @@ -4356,8 +4450,8 @@ top: * don't allow it to be offlined. Log devices are always * expendable. */ - if (!tvd->vdev_islog && vd->vdev_aux == NULL && - vdev_dtl_required(vd)) + if (!tvd->vdev_islog && !vdev_is_fully_backed_up(vd) && + vd->vdev_aux == NULL && vdev_dtl_required(vd)) return (spa_vdev_state_exit(spa, NULL, SET_ERROR(EBUSY))); @@ -4413,7 +4507,8 @@ top: vd->vdev_offline = B_TRUE; vdev_reopen(tvd); - if (!tvd->vdev_islog && vd->vdev_aux == NULL && + if (!tvd->vdev_islog && !vdev_is_fully_backed_up(vd) && + vd->vdev_aux == NULL && vdev_is_dead(tvd)) { vd->vdev_offline = B_FALSE; vdev_reopen(tvd); @@ -5095,6 +5190,104 @@ vdev_space_update(vdev_t *vd, int64_t alloc_delta, int64_t defer_delta, /* Note: metaslab_class_space_update moved to metaslab_space_update */ } + +/* If the vdev matches any of the flags, then return true. */ +static boolean_t vdev_array_vdev_is_in_flags(vdev_t *vd, uint64_t flags) +{ + uint64_t vdflags = 0; + + if (vdev_is_leaf(vd)) { + vdflags |= VDEV_ARRAY_ANY_LEAF; + if (vdev_is_special(vd)) + vdflags |= VDEV_ARRAY_SPECIAL_LEAF; + + if (vdev_is_dedup(vd)) + vdflags |= VDEV_ARRAY_DEDUP_LEAF; + } + + /* If any flags match then success */ + if (flags & vdflags) + return (B_TRUE); + + return (B_FALSE); +} + +/* + * We assume vda->vds[] is already allocated with the correct number of entries. + */ +static void +vdev_array_visit(vdev_t *vd, uint64_t flags, struct vdev_array *vda) +{ + if (vdev_array_vdev_is_in_flags(vd, flags)) { + if (!(flags & VDEV_ARRAY_COUNT)) { + /* Add it to our array */ + vda->vds[vda->count] = vd; + } + vda->count++; + } + + for (uint64_t i = 0; i < vd->vdev_children; i++) { + vdev_array_visit(vd->vdev_child[i], flags, vda); + } +} + +void +vdev_array_free(struct vdev_array *vda) +{ + if (vda->vds != NULL) + kmem_free(vda->vds, sizeof (*vda->vds) * vda->count); + + kmem_free(vda, sizeof (*vda)); +} + +/* + * Convenience function to iterate over the vdev tree, selecting only the vdevs + * you want, and return an array of the vdevs. + * + * Flags are OR'd to include vdevs. When flags == 0x0, no vdevs are matched. + * + * Array entries are returned in breadth first search order. + * + * The vdev_array returned needs to be freed with vdev_array_free() when you + * are done with it. + * + * You must have SCL_VDEV held so that the vdev tree doesn't change out from + * under you while calling this function or using the vdev_array returned. + */ +struct vdev_array * +vdev_array_alloc(vdev_t *rvd, uint64_t flags) +{ + struct vdev_array *vda; + + ASSERT(spa_config_held(rvd->vdev_spa, SCL_VDEV, RW_READER)); + + vda = kmem_zalloc(sizeof (*vda), KM_SLEEP); + if (!vda) + return (NULL); + + /* + * We're going to do a first pass where we visit all the vdevs + * to get the count. After we get the count, we can then do the + * real visit to all the vdevs and add them to the array. + */ + vda->count = 0; + vda->vds = NULL; + + /* Do a dry run to get the count only */ + vdev_array_visit(rvd, VDEV_ARRAY_COUNT | flags, vda); + + /* We have the count, allocate the array */ + vda->vds = kmem_zalloc(sizeof (vda->vds[0]) * vda->count, KM_SLEEP); + if (vda->vds == NULL) { + vdev_array_free(vda); + return (NULL); + } + + vda->count = 0; /* init count to 0 again for vdev_array_visit() */ + vdev_array_visit(rvd, flags, vda); + return (vda); +} + /* * Mark a top-level vdev's config as dirty, placing it on the dirty list * so that it will be written out next time the vdev configuration is synced. @@ -5259,10 +5452,14 @@ vdev_propagate_state(vdev_t *vd) * device, treat the root vdev as if it were * degraded. */ - if (child->vdev_islog && vd == rvd) + if ((child->vdev_islog || + vdev_is_fully_backed_up(child)) && + (vd == rvd)) { degraded++; - else + } else { faulted++; + } + } else if (child->vdev_state <= VDEV_STATE_DEGRADED) { degraded++; } @@ -5438,8 +5635,9 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux) zfs_post_state_change(spa, vd, save_state); } - if (!isopen && vd->vdev_parent) + if (!isopen && vd->vdev_parent) { vdev_propagate_state(vd->vdev_parent); + } } boolean_t @@ -5488,6 +5686,54 @@ vdev_is_concrete(vdev_t *vd) } } +/* + * Given a TLD vdev one level under the root vdev, return true if the TLD + * is fully backed-up to the pool. Backed-up means: + * + * 1. TLD is an alloc class device + * 2. The TLD has vdev_backup_to_pool set. If the TLD is a group (like a + * mirror) then all the child devices in the group must have + * vdev_backup_to_pool set. + * 3. vdev_backup_to_pool has always been set since the creation of this vdev. + * That means that all it's data has a copy in the pool. + */ +static boolean_t +tld_is_fully_backed_up(vdev_t *tvd) +{ + if (!vdev_is_alloc_class(tvd)) + return (B_FALSE); + + /* Just a single device under the root */ + if (vdev_is_leaf(tvd)) + return (tvd->vdev_backup_to_pool); + + for (int c = 0; c < tvd->vdev_children; c++) { + vdev_t *cvd = tvd->vdev_child[c]; + + if (!cvd->vdev_backup_to_pool) + return (B_FALSE); + } + + return (B_TRUE); +} + +/* + * Is the vdev an alloc class vdev that is fully backed up to the pool? + * + * This function works for both top-level vdevs and leaf vdevs. + */ +boolean_t +vdev_is_fully_backed_up(vdev_t *vd) +{ + if (!vdev_is_alloc_class(vd)) + return (B_FALSE); + + if (!vdev_is_leaf(vd)) + return (tld_is_fully_backed_up(vd)); + + return (vd->vdev_backup_to_pool); +} + /* * Determine if a log device has valid content. If the vdev was * removed or faulted in the MOS config then we know that @@ -6296,8 +6542,22 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) } continue; /* Numeric Properites */ + case VDEV_PROP_BACKUP_TO_POOL: + /* + * Property is only used on leaf alloc class + * vdevs. + */ + if (vdev_is_leaf(vd) && vdev_is_alloc_class(vd)) + vdev_prop_add_list(outnvl, propname, + NULL, vd->vdev_backup_to_pool, + ZPROP_SRC_NONE); + else + vdev_prop_add_list(outnvl, propname, + NULL, ZPROP_BOOLEAN_NA, + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_ALLOCATING: - /* Leaf vdevs cannot have this property */ if (vd->vdev_mg == NULL && vd->vdev_top != NULL) { src = ZPROP_SRC_NONE; diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index c31f48028b..745dc1062e 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -521,8 +521,7 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, vd->vdev_removing); } - /* zpool command expects alloc class data */ - if (getstats && vd->vdev_alloc_bias != VDEV_BIAS_NONE) { + if (vd->vdev_alloc_bias != VDEV_BIAS_NONE) { const char *bias = NULL; switch (vd->vdev_alloc_bias) { @@ -539,11 +538,17 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, ASSERT3U(vd->vdev_alloc_bias, ==, VDEV_BIAS_NONE); } + fnvlist_add_string(nv, ZPOOL_CONFIG_ALLOCATION_BIAS, bias); } } + if (vdev_is_alloc_class(vd) && vdev_is_leaf(vd) && + vd->vdev_backup_to_pool) { + fnvlist_add_boolean(nv, ZPOOL_CONFIG_BACKUP_TO_POOL); + } + if (vd->vdev_dtl_sm != NULL) { fnvlist_add_uint64(nv, ZPOOL_CONFIG_DTL, space_map_object(vd->vdev_dtl_sm)); @@ -1804,9 +1809,10 @@ vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags) spa_t *spa = svd[0]->vdev_spa; zio_t *zio; uint64_t good_writes = 0; + boolean_t all_failures_are_backed_up = B_FALSE; + int rc; zio = zio_root(spa, NULL, NULL, flags); - for (int v = 0; v < svdcount; v++) vdev_uberblock_sync(zio, &good_writes, ub, svd[v], flags); @@ -1850,7 +1856,38 @@ vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags) (void) zio_wait(zio); - return (good_writes >= 1 ? 0 : EIO); + /* + * Special case: + * + * If we had zero good writes, but all the writes were to alloc class + * disks that were fully backed up to the pool, then it's not fatal. + */ + if (good_writes == 0) { + all_failures_are_backed_up = B_TRUE; + + for (int v = 0; v < svdcount; v++) { + if (!vdev_is_fully_backed_up(svd[v])) { + all_failures_are_backed_up = B_FALSE; + break; + } + } + } + + if (good_writes >= 1) { + /* success */ + rc = 0; + } else if (all_failures_are_backed_up) { + /* + * All the failures are on allocation class disks that were + * fully backed up to the pool, so this isn't fatal. + */ + rc = 0; + } else { + /* failure */ + rc = EIO; + } + + return (rc); } /* @@ -1966,7 +2003,8 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags) good_writes = kmem_zalloc(sizeof (uint64_t), KM_SLEEP); zio_t *vio = zio_null(zio, spa, NULL, - (vd->vdev_islog || vd->vdev_aux != NULL) ? + (vd->vdev_islog || vd->vdev_aux != NULL || + vdev_is_fully_backed_up(vd)) ? vdev_label_sync_ignore_done : vdev_label_sync_top_done, good_writes, flags); vdev_label_sync(vio, good_writes, vd, l, txg, flags); @@ -2019,6 +2057,7 @@ retry: if (error != 0) { if ((flags & ZIO_FLAG_TRYHARD) != 0) return (error); + flags |= ZIO_FLAG_TRYHARD; } diff --git a/module/zfs/vdev_root.c b/module/zfs/vdev_root.c index e132643dc3..5ce21a6338 100644 --- a/module/zfs/vdev_root.c +++ b/module/zfs/vdev_root.c @@ -32,6 +32,7 @@ #include #include #include +#include /* * Virtual device vector for the pool's root vdev. @@ -46,6 +47,7 @@ vdev_root_core_tvds(vdev_t *vd) vdev_t *cvd = vd->vdev_child[c]; if (!cvd->vdev_ishole && !cvd->vdev_islog && + !vdev_is_fully_backed_up(vd) && cvd->vdev_ops != &vdev_indirect_ops) { tvds++; } @@ -87,6 +89,7 @@ vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, spa_t *spa = vd->vdev_spa; int lasterror = 0; int numerrors = 0; + int numerrors_recovered = 0; if (vd->vdev_children == 0) { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; @@ -97,18 +100,25 @@ vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, for (int c = 0; c < vd->vdev_children; c++) { vdev_t *cvd = vd->vdev_child[c]; - if (cvd->vdev_open_error && !cvd->vdev_islog && cvd->vdev_ops != &vdev_indirect_ops) { lasterror = cvd->vdev_open_error; numerrors++; + if (vdev_is_fully_backed_up(cvd)) + numerrors_recovered++; } } - if (spa_load_state(spa) != SPA_LOAD_NONE) - spa_set_missing_tvds(spa, numerrors); + if (spa_load_state(spa) != SPA_LOAD_NONE) { + spa_set_missing_tvds(spa, numerrors, numerrors_recovered); + } - if (too_many_errors(vd, numerrors)) { + if (numerrors != 0 && (numerrors == numerrors_recovered)) { + vdev_dbgmsg(vd, "there were %lu top-level errors, but they were" + " all on backed up alloc class devices. Keep trying to " + "import.", + (long unsigned) numerrors); + } else if (too_many_errors(vd, numerrors)) { vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS; return (lasterror); } diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 08d56eef83..7fc6794b32 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -3505,6 +3505,19 @@ zio_ddt_write(zio_t *zio) ASSERT(BP_IS_HOLE(bp) || zio->io_bp_override); ASSERT(!(zio->io_bp_override && (zio->io_flags & ZIO_FLAG_RAW))); + /* + * Dedup writes can either to do a dedicated dedup device or to a + * dedicated special device. If we have alloc class backups on, we need + * to make an extra copy of the data to go on the pool. To do this + * we need to adjust the ZIO's copies here so the later stages in the + * ZIO pipeline work correctly. + */ + if (spa->spa_backup_alloc_class && zp->zp_copies == 1) { + zp->zp_copies = 2; + } + + p = zp->zp_copies; + ddt_enter(ddt); dde = ddt_lookup(ddt, bp, B_TRUE); ddp = &dde->dde_phys[p]; @@ -3635,6 +3648,22 @@ zio_dva_throttle(zio_t *zio) mc = spa_preferred_class(spa, zio->io_size, zio->io_prop.zp_type, zio->io_prop.zp_level, zio->io_prop.zp_zpl_smallblk); + /* + * If backup alloc classes is enabled, we will do the regular + * write to the special/dedup device and an additional "backup" + * write to the normal pool. That way if the special/dedup devices + * all fail, we don't lose all data in our pool. + * + * Reserve that 2nd write to the regular pool here. The DVAs + * for both writes will later be allocated in the + * next step in the ZIO pipeline in + * zio_dva_allocate()->metaslab_alloc(). + */ + if ((spa->spa_backup_alloc_class && (mc == spa_special_class(spa) || + mc == spa_dedup_class(spa))) && zio->io_prop.zp_copies == 1) { + zio->io_prop.zp_copies = 2; + } + if (zio->io_priority == ZIO_PRIORITY_SYNC_WRITE || !mc->mc_alloc_throttle_enabled || zio->io_child_type == ZIO_CHILD_GANG || diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 912344b4ed..a648fabe39 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -53,6 +53,14 @@ tags = ['functional', 'arc'] tests = ['atime_001_pos', 'atime_002_neg', 'root_atime_off', 'root_atime_on'] tags = ['functional', 'atime'] + +[tests/functional/backup_alloc_class] +tests = ['backup_alloc_class_add', 'backup_alloc_class_create', + 'backup_alloc_class_files', 'backup_alloc_class_import', + 'backup_alloc_class_offline', 'backup_alloc_class_prop', + 'backup_alloc_class_scrub', 'backup_alloc_class_split'] +tags = ['functional', 'backup_alloc_class'] + [tests/functional/bclone] tests = ['bclone_crossfs_corner_cases_limited', 'bclone_crossfs_data', diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index dfab48d2cd..49a508cc59 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -1081,6 +1081,16 @@ function get_pool_prop # property pool zpool get -Hpo value "$prop" "$pool" || log_fail "zpool get $prop $pool" } +# Get the specified vdev property in parsable format or fail +function get_vdev_prop +{ + typeset prop=$1 + typeset pool=$2 + typeset vdev=$3 + + zpool get -Hpo value "$prop" "$pool" "$vdev" || log_fail "zpool get $prop $pool $vdev" +} + # Return 0 if a pool exists; $? otherwise # # $1 - pool name @@ -1815,7 +1825,7 @@ function verify_pool function get_disklist # pool { echo $(zpool iostat -v $1 | awk '(NR > 4) {print $1}' | \ - grep -vEe '^-----' -e "^(mirror|raidz[1-3]|draid[1-3]|spare|log|cache|special|dedup)|\-[0-9]$") + grep -vEe '^-----' -e "^(mirror|raidz[1-3]|draid[1-3]|spare|log|cache|special|dedup)(\-[0-9])+$") } # @@ -3907,3 +3917,28 @@ function pop_coredump_pattern ;; esac } + +# Get a list of all vdevs in the pool that are a certain type. +# +# The returned list is in a space-separated string, with the full path of each +# vdev included: +# +# "/dev/sda /dev/sdb /dev/sdc" +# +# $1: Type of disk to get ('special', 'dedup', 'log', 'cache', 'spare') +# $2: (optional) pool name +function get_list_of_vdevs_that_are { + poolname=${2:-$TESTPOOL} + + zpool status -P $poolname | sed -r '/\s+(mirror|draid|raidz)/d' | \ + awk -v token="$1" '{ + if (tmp == 1 && substr($1,1,1) == "/") { + if (first != 1) { + printf "%s", $1; + first=1; + } else { + printf " %s", $1; + } + } else {tmp=0}; if ($1 == token) {tmp=1}} + END {print ""}' +} diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index db6b4c0146..51d75cf516 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -90,6 +90,8 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \ functional/alloc_class/alloc_class.kshlib \ functional/atime/atime.cfg \ functional/atime/atime_common.kshlib \ + functional/backup_alloc_class/backup_alloc_class.cfg \ + functional/backup_alloc_class/backup_alloc_class.kshlib \ functional/bclone/bclone.cfg \ functional/bclone/bclone_common.kshlib \ functional/bclone/bclone_corner_cases.kshlib \ @@ -441,6 +443,16 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/atime/root_atime_on.ksh \ functional/atime/root_relatime_on.ksh \ functional/atime/setup.ksh \ + functional/backup_alloc_class/backup_alloc_class_add.ksh \ + functional/backup_alloc_class/backup_alloc_class_create.ksh \ + functional/backup_alloc_class/backup_alloc_class_files.ksh \ + functional/backup_alloc_class/backup_alloc_class_import.ksh \ + functional/backup_alloc_class/backup_alloc_class_prop.ksh \ + functional/backup_alloc_class/backup_alloc_class_offline.ksh \ + functional/backup_alloc_class/backup_alloc_class_scrub.ksh \ + functional/backup_alloc_class/backup_alloc_class_split.ksh \ + functional/backup_alloc_class/cleanup.ksh \ + functional/backup_alloc_class/setup.ksh \ functional/bclone/bclone_crossfs_corner_cases.ksh \ functional/bclone/bclone_crossfs_corner_cases_limited.ksh \ functional/bclone/bclone_crossfs_data.ksh \ diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_001_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_001_pos.ksh index 3237d7cb78..7fbae70681 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_001_pos.ksh @@ -32,12 +32,16 @@ log_assert $claim log_onexit cleanup log_must disk_setup -for type in special dedup; do - log_mustnot zpool create -d $TESTPOOL $CLASS_DISK0 $type $CLASS_DISK1 + +for arg in '-o feature@allow_backup_to_pool=disabled' '' ; do + for type in special dedup; do + log_mustnot zpool create $args -d $TESTPOOL $CLASS_DISK0 $type \ + $CLASS_DISK1 + done + log_must zpool create $TESTPOOL raidz $ZPOOL_DISKS special mirror \ + $CLASS_DISK0 $CLASS_DISK1 + log_must display_status "$TESTPOOL" + log_must zpool destroy -f "$TESTPOOL" done -log_must zpool create $TESTPOOL raidz $ZPOOL_DISKS special mirror \ - $CLASS_DISK0 $CLASS_DISK1 -log_must display_status "$TESTPOOL" -log_must zpool destroy -f "$TESTPOOL" log_pass $claim diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_002_neg.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_002_neg.ksh index b2cac59fd4..7f11ccfe34 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_002_neg.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_002_neg.ksh @@ -31,18 +31,31 @@ log_onexit cleanup log_must disk_setup -log_mustnot zpool create $TESTPOOL raidz $ZPOOL_DISKS special $CLASS_DISK0 -log_mustnot display_status $TESTPOOL -log_mustnot zpool destroy -f $TESTPOOL +# Test with the older mode where there was no allow_backup_to_pool +# feature. With this configuration, the special device redundancy needs +# to match the pool. +arg='-o feature@allow_backup_to_pool=disabled' +for atype in "special" "dedup" ; do + log_mustnot zpool create $arg $TESTPOOL raidz $ZPOOL_DISKS $atype $CLASS_DISK0 -log_mustnot zpool create $TESTPOOL $ZPOOL_DISKS special mirror \ - $CLASS_DISK0 $CLASS_DISK1 -log_mustnot display_status $TESTPOOL -log_mustnot zpool destroy -f $TESTPOOL + log_mustnot zpool create $arg $TESTPOOL $ZPOOL_DISKS $atype mirror \ + $CLASS_DISK0 $CLASS_DISK1 -log_mustnot zpool create $TESTPOOL raidz $ZPOOL_DISKS special raidz \ - $CLASS_DISK0 $CLASS_DISK1 $CLASS_DISK2 -log_mustnot display_status $TESTPOOL -log_mustnot zpool destroy -f $TESTPOOL + log_mustnot zpool create $arg $TESTPOOL raidz $ZPOOL_DISKS $atype raidz \ + $CLASS_DISK0 $CLASS_DISK1 $CLASS_DISK2 + + # Now test with backup_allocation_classes=enabled (default setting) + log_must zpool create $TESTPOOL raidz $ZPOOL_DISKS $atype $CLASS_DISK0 + log_must zpool destroy $TESTPOOL + + log_must zpool create $TESTPOOL $ZPOOL_DISKS $atype mirror \ + $CLASS_DISK0 $CLASS_DISK1 + + log_must zpool destroy $TESTPOOL + + log_mustnot zpool create $TESTPOOL raidz $ZPOOL_DISKS $atype raidz \ + $CLASS_DISK0 $CLASS_DISK1 $CLASS_DISK2 + +done log_pass $claim diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_003_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_003_pos.ksh index 78d40ce56d..1aa8f59eeb 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_003_pos.ksh @@ -31,27 +31,29 @@ log_onexit cleanup log_must disk_setup -for type in "" "mirror" "raidz" -do - log_must zpool create $TESTPOOL $type $ZPOOL_DISKS +for arg in '-o feature@allow_backup_to_pool=disabled' '' ; do + for type in "" "mirror" "raidz" + do + log_must zpool create $arg $TESTPOOL $type $ZPOOL_DISKS - if [ "$type" = "mirror" ]; then - log_must zpool add $TESTPOOL special mirror \ - $CLASS_DISK0 $CLASS_DISK1 $CLASS_DISK2 - log_must zpool iostat -H $TESTPOOL $CLASS_DISK0 - log_must zpool iostat -H $TESTPOOL $CLASS_DISK1 - log_must zpool iostat -H $TESTPOOL $CLASS_DISK2 - elif [ "$type" = "raidz" ]; then - log_must zpool add $TESTPOOL special mirror \ - $CLASS_DISK0 $CLASS_DISK1 - log_must zpool iostat -H $TESTPOOL $CLASS_DISK0 - log_must zpool iostat -H $TESTPOOL $CLASS_DISK1 - else - log_must zpool add $TESTPOOL special $CLASS_DISK0 - log_must zpool iostat -H $TESTPOOL $CLASS_DISK0 - fi + if [ "$type" = "mirror" ]; then + log_must zpool add $TESTPOOL special mirror \ + $CLASS_DISK0 $CLASS_DISK1 $CLASS_DISK2 + log_must zpool iostat -H $TESTPOOL $CLASS_DISK0 + log_must zpool iostat -H $TESTPOOL $CLASS_DISK1 + log_must zpool iostat -H $TESTPOOL $CLASS_DISK2 + elif [ "$type" = "raidz" ]; then + log_must zpool add $TESTPOOL special mirror \ + $CLASS_DISK0 $CLASS_DISK1 + log_must zpool iostat -H $TESTPOOL $CLASS_DISK0 + log_must zpool iostat -H $TESTPOOL $CLASS_DISK1 + else + log_must zpool add $TESTPOOL special $CLASS_DISK0 + log_must zpool iostat -H $TESTPOOL $CLASS_DISK0 + fi - log_must zpool destroy -f $TESTPOOL + log_must zpool destroy -f $TESTPOOL + done done log_pass $claim diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_004_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_004_pos.ksh index 04ce486adb..7347c89bb7 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_004_pos.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_004_pos.ksh @@ -36,31 +36,35 @@ typeset ac_value typeset stype="" typeset sdisks="" -for type in "" "mirror" "raidz" -do - if [ "$type" = "mirror" ]; then - stype="mirror" - sdisks="${CLASS_DISK0} ${CLASS_DISK1} ${CLASS_DISK2}" - elif [ "$type" = "raidz" ]; then - stype="mirror" - sdisks="${CLASS_DISK0} ${CLASS_DISK1}" - else - stype="" - sdisks="${CLASS_DISK0}" - fi +for arg in '-o feature@allow_backup_to_pool=disabled' '' ; do + for type in "" "mirror" "raidz" + do + if [ "$type" = "mirror" ]; then + stype="mirror" + sdisks="${CLASS_DISK0} ${CLASS_DISK1} ${CLASS_DISK2}" + elif [ "$type" = "raidz" ]; then + stype="mirror" + sdisks="${CLASS_DISK0} ${CLASS_DISK1}" + else + stype="" + sdisks="${CLASS_DISK0}" + fi - log_must zpool create $TESTPOOL $type $ZPOOL_DISKS \ - special $stype $sdisks + log_must zpool create $arg $TESTPOOL $type $ZPOOL_DISKS \ + special $stype $sdisks - ac_value="$(zpool get -H -o property,value all | awk '/allocation_classes/ {print $2}')" - if [ "$ac_value" = "active" ]; then - log_note "feature@allocation_classes is active" - else - log_fail "feature@allocation_classes not active, \ - status = $ac_value" - fi + ac_value="$(zpool get -H -o property,value \ + feature@allocation_classes | \ + awk '/allocation_classes/ {print $2}')" + if [ "$ac_value" = "active" ]; then + log_note "feature@allocation_classes is active" + else + log_fail "feature@allocation_classes not active, \ + status = $ac_value" + fi - log_must zpool destroy -f $TESTPOOL + log_must zpool destroy -f $TESTPOOL + done done log_pass $claim diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_005_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_005_pos.ksh index 08c703e21a..734488290c 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_005_pos.ksh @@ -34,38 +34,44 @@ log_must disk_setup typeset ac_value -for type in "" "mirror" "raidz" -do - if [ "$type" = "mirror" ]; then - log_must zpool create $TESTPOOL $type $ZPOOL_DISK0 $ZPOOL_DISK1 - else - log_must zpool create $TESTPOOL $type $ZPOOL_DISKS - fi - ac_value="$(zpool get -H -o property,value all | \ - awk '/allocation_classes/ {print $2}')" - if [ "$ac_value" = "enabled" ]; then - log_note "feature@allocation_classes is enabled" - else - log_fail "feature@allocation_classes not enabled, \ - status = $ac_value" - fi +for arg in '-o feature@allow_backup_to_pool=disabled' '' ; do + for type in "" "mirror" "raidz" + do + if [ "$type" = "mirror" ]; then + log_must zpool create $arg $TESTPOOL $type $ZPOOL_DISK0 \ + $ZPOOL_DISK1 + else + log_must zpool create $arg $TESTPOOL $type $ZPOOL_DISKS + fi + ac_value="$(zpool get -H -o property,value \ + feature@allocation_classes | \ + awk '/allocation_classes/ {print $2}')" + if [ "$ac_value" = "enabled" ]; then + log_note "feature@allocation_classes is enabled" + else + log_fail "feature@allocation_classes not enabled, \ + status = $ac_value" + fi - if [ "$type" = "" ]; then - log_must zpool add $TESTPOOL special $CLASS_DISK0 - else - log_must zpool add $TESTPOOL special mirror \ - $CLASS_DISK0 $CLASS_DISK1 - fi - ac_value="$(zpool get -H -o property,value all | \ - awk '/allocation_classes/ {print $2}')" - if [ "$ac_value" = "active" ]; then - log_note "feature@allocation_classes is active" - else - log_fail "feature@allocation_classes not active, \ - status = $ac_value" - fi + if [ "$type" = "" ]; then + log_must zpool add $TESTPOOL special $CLASS_DISK0 + else + log_must zpool add $TESTPOOL special mirror \ + $CLASS_DISK0 $CLASS_DISK1 + fi + ac_value="$(zpool get -H -o property,value \ + feature@allocation_classes | \ + awk '/allocation_classes/ {print $2}')" - log_must zpool destroy -f $TESTPOOL + if [ "$ac_value" = "active" ]; then + log_note "feature@allocation_classes is active" + else + log_fail "feature@allocation_classes not active, \ + status = $ac_value" + fi + + log_must zpool destroy -f $TESTPOOL + done done log_pass "Values of allocation_classes feature flag correct." diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_006_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_006_pos.ksh index 5852b2876e..60b485d7dc 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_006_pos.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_006_pos.ksh @@ -32,10 +32,14 @@ log_onexit cleanup log_must disk_setup -log_must zpool create $TESTPOOL \ - mirror $ZPOOL_DISK0 $ZPOOL_DISK1 \ - special mirror $CLASS_DISK0 $CLASS_DISK1 -log_must zpool split $TESTPOOL split_pool -log_must zpool destroy -f $TESTPOOL +for arg in '-o feature@allow_backup_to_pool=disabled' '' ; do + log_must zpool create $arg $TESTPOOL \ + mirror $ZPOOL_DISK0 $ZPOOL_DISK1 \ + special mirror $CLASS_DISK0 $CLASS_DISK1 + log_must zpool split $TESTPOOL split_pool + log_must zpool import -d $(dirname $CLASS_DISK1) split_pool + log_must zpool destroy -f $TESTPOOL + log_must zpool destroy -f split_pool +done log_pass $claim diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_007_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_007_pos.ksh index 106a6d933a..cc7df310d1 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_007_pos.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_007_pos.ksh @@ -31,11 +31,13 @@ log_onexit cleanup log_must disk_setup -log_must zpool create $TESTPOOL raidz $ZPOOL_DISKS \ - special mirror $CLASS_DISK0 $CLASS_DISK1 -log_must zpool replace $TESTPOOL $CLASS_DISK1 $CLASS_DISK2 -log_must sleep 10 -log_must zpool iostat -H $TESTPOOL $CLASS_DISK2 -log_must zpool destroy -f $TESTPOOL +for arg in '-o feature@allow_backup_to_pool=disabled' '' ; do + log_must zpool create $arg $TESTPOOL raidz $ZPOOL_DISKS \ + special mirror $CLASS_DISK0 $CLASS_DISK1 + log_must zpool replace $TESTPOOL $CLASS_DISK1 $CLASS_DISK2 + log_must sleep 10 + log_must zpool iostat -H $TESTPOOL $CLASS_DISK2 + log_must zpool destroy -f $TESTPOOL +done log_pass $claim diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_008_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_008_pos.ksh index f73fbbe38c..772b9e77ee 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_008_pos.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_008_pos.ksh @@ -35,22 +35,24 @@ typeset special_type="" typeset create_disks="" typeset added_disks="" -for type in "" "raidz" -do - if [ "$type" = "raidz" ]; then - special_type="mirror" - create_disks="${CLASS_DISK0} ${CLASS_DISK1}" - added_disks="${CLASS_DISK2} ${CLASS_DISK3}" - else - special_type="" - create_disks="${CLASS_DISK0}" - added_disks="${CLASS_DISK1}" - fi - log_must zpool create $TESTPOOL $type $ZPOOL_DISKS \ - special $special_type $create_disks - log_must zpool add $TESTPOOL special $special_type $added_disks - log_must zpool iostat $TESTPOOL $added_disks - log_must zpool destroy -f $TESTPOOL +for arg in '-o feature@allow_backup_to_pool=disabled' '' ; do + for type in "" "raidz" + do + if [ "$type" = "raidz" ]; then + special_type="mirror" + create_disks="${CLASS_DISK0} ${CLASS_DISK1}" + added_disks="${CLASS_DISK2} ${CLASS_DISK3}" + else + special_type="" + create_disks="${CLASS_DISK0}" + added_disks="${CLASS_DISK1}" + fi + log_must zpool create $args$TESTPOOL $type $ZPOOL_DISKS \ + special $special_type $create_disks + log_must zpool add $TESTPOOL special $special_type $added_disks + log_must zpool iostat $TESTPOOL $added_disks + log_must zpool destroy -f $TESTPOOL + done done log_pass $claim diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_009_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_009_pos.ksh index e8061fdabc..e6f807d5d5 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_009_pos.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_009_pos.ksh @@ -35,35 +35,39 @@ typeset stype="" typeset sdisks="" typeset props="" -for type in "" "mirror" "raidz" -do - if [ "$type" = "mirror" ]; then - stype="mirror" - sdisks="${CLASS_DISK0} ${CLASS_DISK1} ${CLASS_DISK2}" - props="-o ashift=12" - elif [ "$type" = "raidz" ]; then - stype="mirror" - sdisks="${CLASS_DISK0} ${CLASS_DISK1}" - else - stype="" - sdisks="${CLASS_DISK0}" - fi +for arg in '-o feature@allow_backup_to_pool=disabled' '' ; do + for type in "" "mirror" "raidz" + do + if [ "$type" = "mirror" ]; then + stype="mirror" + sdisks="${CLASS_DISK0} ${CLASS_DISK1} ${CLASS_DISK2}" + props="-o ashift=12" + elif [ "$type" = "raidz" ]; then + stype="mirror" + sdisks="${CLASS_DISK0} ${CLASS_DISK1}" + else + stype="" + sdisks="${CLASS_DISK0}" + fi - # - # 1/3 of the time add the special vdev after creating the pool - # - if [ $((RANDOM % 3)) -eq 0 ]; then - log_must zpool create ${props} $TESTPOOL $type $ZPOOL_DISKS - log_must zpool add ${props} $TESTPOOL special $stype $sdisks - else - log_must zpool create ${props} $TESTPOOL $type $ZPOOL_DISKS \ - special $stype $sdisks - fi + # + # 1/3 of the time add the special vdev after creating the pool + # + if [ $((RANDOM % 3)) -eq 0 ]; then + log_must zpool create $arg ${props} $TESTPOOL $type \ + $ZPOOL_DISKS + log_must zpool add ${props} $TESTPOOL special $stype \ + $sdisks + else + log_must zpool create $arg ${props} $TESTPOOL $type \ + $ZPOOL_DISKS special $stype $sdisks + fi - log_must zpool export $TESTPOOL - log_must zpool import -d $TEST_BASE_DIR -s $TESTPOOL - log_must display_status $TESTPOOL - log_must zpool destroy -f $TESTPOOL + log_must zpool export $TESTPOOL + log_must zpool import -d $TEST_BASE_DIR -s $TESTPOOL + log_must display_status $TESTPOOL + log_must zpool destroy -f $TESTPOOL + done done log_pass $claim diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_010_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_010_pos.ksh index cbf5cbf89b..344725d9e5 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_010_pos.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_010_pos.ksh @@ -32,19 +32,22 @@ log_onexit cleanup log_must disk_setup -log_must zpool create $TESTPOOL raidz $ZPOOL_DISKS special mirror \ - $CLASS_DISK0 $CLASS_DISK1 +for arg in '-o feature@allow_backup_to_pool=disabled' '' ; do + log_must zpool create $arg $TESTPOOL raidz $ZPOOL_DISKS special mirror \ + $CLASS_DISK0 $CLASS_DISK1 -for value in 0 512 1024 2048 4096 8192 16384 32768 65536 131072 -do - log_must zfs set special_small_blocks=$value $TESTPOOL - ACTUAL=$(zfs get -p special_small_blocks $TESTPOOL | \ - awk '/special_small_blocks/ {print $3}') - if [ "$ACTUAL" != "$value" ] - then - log_fail "v. $ACTUAL set for $TESTPOOL, expected v. $value!" - fi + for value in 0 512 1024 2048 4096 8192 16384 32768 65536 131072 + do + log_must zfs set special_small_blocks=$value $TESTPOOL + ACTUAL=$(zfs get -p special_small_blocks $TESTPOOL | \ + awk '/special_small_blocks/ {print $3}') + if [ "$ACTUAL" != "$value" ] + then + log_fail "v. $ACTUAL set for $TESTPOOL, expected v. $value" + fi + done + + log_must zpool destroy -f "$TESTPOOL" done -log_must zpool destroy -f "$TESTPOOL" log_pass $claim diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_011_neg.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_011_neg.ksh index 0be49b8587..4b07900752 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_011_neg.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_011_neg.ksh @@ -32,13 +32,17 @@ log_assert $claim log_onexit cleanup log_must disk_setup -log_must zpool create $TESTPOOL raidz $ZPOOL_DISKS special mirror \ - $CLASS_DISK0 $CLASS_DISK1 -for value in 256 1025 33554432 -do - log_mustnot zfs set special_small_blocks=$value $TESTPOOL +for arg in '-o feature@allow_backup_to_pool=disabled' '' ; do + log_must zpool create $arg $TESTPOOL raidz $ZPOOL_DISKS special mirror \ + $CLASS_DISK0 $CLASS_DISK1 + + for value in 256 1025 33554432 + do + log_mustnot zfs set special_small_blocks=$value $TESTPOOL + done + + log_must zpool destroy -f "$TESTPOOL" done -log_must zpool destroy -f "$TESTPOOL" log_pass $claim diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_012_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_012_pos.ksh index 0b1c18bafd..9902e6922d 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_012_pos.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_012_pos.ksh @@ -25,20 +25,20 @@ verify_runnable "global" # -# Verify the file identified by the input is written on a special vdev -# According to the pool layout used in this test vdev_id 3 and 4 are special -# XXX: move this function to libtest.shlib once we get "Vdev Properties" +# Given a dataset and an inode number, return a list of all the vdev numbers +# that the inode has blocks on. # -function file_in_special_vdev # +# For example, if the inode has blocks on vdevs 0, 1 and 2, this would return +# the string "0 1 2" +# +function vdevs_file_is_on # { typeset dataset="$1" typeset inum="$2" - typeset num_normal=$(echo $ZPOOL_DISKS | wc -w) - num_normal=${num_normal##* } - - zdb -dddddd $dataset $inum | awk -v d=$num_normal '{ + zdb -dddddd $dataset $inum | awk ' +/L0 [0-9]+/{ # find DVAs from string "offset level dva" only for L0 (data) blocks -if (match($0,"L0 [0-9]+")) { +# if (match($0,"L0 [0-9]+")) { dvas[0]=$3 dvas[1]=$4 dvas[2]=$5 @@ -50,25 +50,46 @@ if (match($0,"L0 [0-9]+")) { print "Error parsing DVA: <" dva ">"; exit 1; } - # verify vdev is "special" - if (arr[1] < d) { - exit 1; - } + count[arr[1]]++; } } -}}' +#} +} +END { + # Print out the unique vdev numbers that had data + firstprint=1; + for (i in count) { + if (firstprint==1) { + printf("%d", i); + firstprint=0; + } else { + printf(" %d", i); + } + } +} +' } # # Check that device removal works for special class vdevs # +# $1: Set to 1 to backup alloc class data to the pool. Leave blank to disable +# backup. function check_removal { + typeset backup + if [ "$1" == "1" ] ; then + backup=1 + args="" + else + backup=0 + args="-o feature@allow_backup_to_pool=disabled" + fi + # # Create a non-raidz pool so we can remove top-level vdevs # - log_must disk_setup - log_must zpool create $TESTPOOL $ZPOOL_DISKS \ + log_must zpool create $args $TESTPOOL $ZPOOL_DISKS \ special $CLASS_DISK0 special $CLASS_DISK1 log_must display_status "$TESTPOOL" @@ -93,19 +114,49 @@ function check_removal for i in 1 2 3 4; do dataset="$TESTPOOL/$TESTFS" inum="$(get_objnum /$TESTPOOL/$TESTFS/testfile.$i)" - log_must file_in_special_vdev $dataset $inum + + # Get a list of all the vdevs 'testfile.$i' has blocks on. + # The list will be string like "0 1 2 3" if the blocks are on + # vdevs 0-3. + on_vdevs="$(vdevs_file_is_on $dataset $inum)" + + # Get the number of normal (non-special) pool disks + num_pool_disks=$(echo $ZPOOL_DISKS | wc -w) + num_pool_disks=${num_pool_disks##* } + + if [ "$backup" == "1" ] ; then + # Data should be on all vdevs (both pool and special + # devices). + lowest_data_disk=0 + highest_data_disk=$(($num_pool_disks + 1)) + else + + # Data should only be on special devices + lowest_data_disk=$num_pool_disks + highest_data_disk=$(($lowest_data_disk + 1)) + fi + + # Get the starting disks that we expect the data to be on. + # We assume two special devices are attached to the pool. + # Disk numbers start at zero. + expected_on_vdevs="$(seq -s ' ' $lowest_data_disk $highest_data_disk)" + + # Compare the disks we expect to see the blocks on with + # the actual disks they're on. + if [ "$on_vdevs" != "$expected_on_vdevs" ] ; then + # Data distribution is not what we expected, break out of + # the loop so we can properly tear down the pool. We will + # error out after the loop. + break; + fi done log_must zpool remove $TESTPOOL $CLASS_DISK0 - - sleep 5 - sync_pool $TESTPOOL - sleep 1 - - log_must zdb -bbcc $TESTPOOL - log_must zpool list -v $TESTPOOL log_must zpool destroy -f "$TESTPOOL" - log_must disk_cleanup + + if [ "$on_vdevs" != "$expected_on_vdevs" ] ; then + log_fail "Expected data on disks $expected_on_vdevs, got $on_vdevs" + fi } claim="Removing a special device from a pool succeeds." @@ -113,12 +164,15 @@ claim="Removing a special device from a pool succeeds." log_assert $claim log_onexit cleanup -typeset CLASS_DEVSIZE=$CLASS_DEVSIZE -for CLASS_DEVSIZE in $CLASS_DEVSIZE $ZPOOL_DEVSIZE; do - typeset ZPOOL_DISKS=$ZPOOL_DISKS - for ZPOOL_DISKS in "$ZPOOL_DISKS" $ZPOOL_DISK0; do - check_removal +log_must disk_setup +for backup in "1" "" ; do + typeset CLASS_DEVSIZE=$CLASS_DEVSIZE + for CLASS_DEVSIZE in $CLASS_DEVSIZE $ZPOOL_DEVSIZE; do + typeset ZPOOL_DISKS=$ZPOOL_DISKS + for ZPOOL_DISKS in "$ZPOOL_DISKS" $ZPOOL_DISK0; do + check_removal $backup + done done done - +log_must disk_cleanup log_pass $claim diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh index 624cab88af..97e177e10a 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh @@ -33,31 +33,34 @@ log_onexit cleanup # Create a non-raidz pool so we can remove top-level vdevs # log_must disk_setup -log_must zpool create $TESTPOOL $ZPOOL_DISKS dedup $CLASS_DISK0 -log_must display_status "$TESTPOOL" -# -# Generate some dedup data in the dedup class before removal -# +for arg in '-o feature@allow_backup_to_pool=disabled' '' ; do + log_must zpool create $arg $TESTPOOL $ZPOOL_DISKS dedup $CLASS_DISK0 + log_must display_status "$TESTPOOL" -log_must zfs create -o dedup=on -V 2G $TESTPOOL/$TESTVOL -block_device_wait "$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL" -log_must eval "new_fs $ZVOL_DEVDIR/$TESTPOOL/$TESTVOL >/dev/null" + # + # Generate some dedup data in the dedup class before removal + # -sync_pool -log_must zpool list -v $TESTPOOL + log_must zfs create -o dedup=on -V 2G $TESTPOOL/$TESTVOL + block_device_wait "$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL" + log_must eval "new_fs $ZVOL_DEVDIR/$TESTPOOL/$TESTVOL >/dev/null" -# -# remove a dedup allocation vdev -# -log_must zpool remove $TESTPOOL $CLASS_DISK0 + sync_pool + log_must zpool list -v $TESTPOOL -sleep 5 -sync_pool $TESTPOOL -sleep 1 + # + # remove a dedup allocation vdev + # + log_must zpool remove $TESTPOOL $CLASS_DISK0 -log_must zdb -bbcc $TESTPOOL + sleep 5 + sync_pool $TESTPOOL + sleep 1 -log_must zpool destroy -f "$TESTPOOL" + log_must zdb -bbcc $TESTPOOL + + log_must zpool destroy -f "$TESTPOOL" +done log_pass $claim diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_neg.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_neg.ksh index 1b52014fd2..1b83b8cc09 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_neg.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_neg.ksh @@ -26,13 +26,15 @@ log_assert $claim log_onexit cleanup log_must disk_setup -for size in 512 4096 32768 131072 524288 1048576 -do - let bigger=$size*2 - log_mustnot zpool create -O recordsize=$size \ - -O special_small_blocks=$bigger \ - $TESTPOOL raidz $ZPOOL_DISKS special mirror \ - $CLASS_DISK0 $CLASS_DISK1 +for arg in '-o feature@allow_backup_to_pool=disabled' '' ; do + for size in 512 4096 32768 131072 524288 1048576 + do + let bigger=$size*2 + log_mustnot zpool create $arg -O recordsize=$size \ + -O special_small_blocks=$bigger \ + $TESTPOOL raidz $ZPOOL_DISKS special mirror \ + $CLASS_DISK0 $CLASS_DISK1 + done done log_pass $claim diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_pos.ksh index 49c468af67..693f68ac36 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_pos.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_pos.ksh @@ -26,20 +26,22 @@ log_assert $claim log_onexit cleanup log_must disk_setup -for size in 8192 32768 131072 524288 1048576 -do - let smaller=$size/2 - log_must zpool create -O recordsize=$size \ - -O special_small_blocks=$smaller \ - $TESTPOOL raidz $ZPOOL_DISKS special mirror \ - $CLASS_DISK0 $CLASS_DISK1 - log_must zpool destroy -f "$TESTPOOL" +for arg in '-o feature@allow_backup_to_pool=disabled' '' ; do + for size in 8192 32768 131072 524288 1048576 + do + let smaller=$size/2 + log_must zpool create $arg -O recordsize=$size \ + -O special_small_blocks=$smaller \ + $TESTPOOL raidz $ZPOOL_DISKS special mirror \ + $CLASS_DISK0 $CLASS_DISK1 + log_must zpool destroy -f "$TESTPOOL" - log_must zpool create -O recordsize=$size \ - -O special_small_blocks=$size \ - $TESTPOOL raidz $ZPOOL_DISKS special mirror \ - $CLASS_DISK0 $CLASS_DISK1 - log_must zpool destroy -f "$TESTPOOL" + log_must zpool create $arg -O recordsize=$size \ + -O special_small_blocks=$size \ + $TESTPOOL raidz $ZPOOL_DISKS special mirror \ + $CLASS_DISK0 $CLASS_DISK1 + log_must zpool destroy -f "$TESTPOOL" + done done log_pass $claim diff --git a/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class.cfg b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class.cfg new file mode 100644 index 0000000000..84200593eb --- /dev/null +++ b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class.cfg @@ -0,0 +1,36 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2017, Intel Corporation. +# Copyright (c) 2018 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +export ZPOOL_DISK0="$TEST_BASE_DIR/device-0" +export ZPOOL_DISK1="$TEST_BASE_DIR/device-1" +export ZPOOL_DISK2="$TEST_BASE_DIR/device-2" +export ZPOOL_DISKS="${ZPOOL_DISK0} ${ZPOOL_DISK1} ${ZPOOL_DISK2}" + +export CLASS_DISK0="$TEST_BASE_DIR/device-3" +export CLASS_DISK1="$TEST_BASE_DIR/device-4" +export CLASS_DISK2="$TEST_BASE_DIR/device-5" +export CLASS_DISK3="$TEST_BASE_DIR/device-6" +export CLASS_DISK4="$TEST_BASE_DIR/device-7" +export CLASS_DISK5="$TEST_BASE_DIR/device-8" + +export CLASS_DISKS="${CLASS_DISK0} ${CLASS_DISK1} ${CLASS_DISK2} ${CLASS_DISK3} ${CLASS_DISK4} ${CLASS_DISK5}" + +export ZPOOL_DEVSIZE=200M +export CLASS_DEVSIZE=200M + +export IMPORTDIR="$TEST_BASE_DIR" diff --git a/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class.kshlib b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class.kshlib new file mode 100644 index 0000000000..06ed4f8fd0 --- /dev/null +++ b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class.kshlib @@ -0,0 +1,283 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2017, Intel Corporation. +# Copyright (c) 2018 by Delphix. All rights reserved. +# Copyright (C) 2024 Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/backup_alloc_class/backup_alloc_class.cfg + +BACKUP_DIR=$TEST_BASE_DIR/backups + +function disk_setup +{ + truncate -s $ZPOOL_DEVSIZE $ZPOOL_DISKS + truncate -s $CLASS_DEVSIZE $CLASS_DISKS + + if [ -d $BACKUP_DIR ] ; then + log_fail "Existing $TEST_BASE_DIR/backups directory (maybe leftover from failed test run?)" + fi + + mkdir -p $BACKUP_DIR +} + +function disk_cleanup +{ + rm -f $ZPOOL_DEVSIZE $ZPOOL_DISKS 2> /dev/null + rm -f $CLASS_DEVSIZE $CLASS_DISKS 2> /dev/null + + rm -f backup_alloc_class.key + rm -fr $BACKUP_DIR +} + +function cleanup +{ + if datasetexists $TESTPOOL ; then + zpool destroy -f $TESTPOOL 2> /dev/null + fi + + disk_cleanup +} + +# Write zeros to an existing file, keeping the same size. +function zero_file { + dd status=none if=/dev/zero of="$1" bs=$(stat_size "$1") count=1 +} + +# Write a verifiable file that will end up on a 'dedup' or 'special' vdev. +# The filename will include the sha256 of the file for easy verification later. +# +# $1: Write type - "dedup" or "special" +# $2: Path to directory to write the file to +# +# Note: we don't use log_must here since this can get really chatty and +# we don't want to spam the logs. It will log_fail if there is an error. +function write_verifiable_file { + class="$1" + writedir="$2" + + if [[ "$class" == "dedup" ]] ; then + # Our dedup file size can be up to a megabyte-ish + filesize=$((32768 + ($RANDOM * $RANDOM % 1000000))) + + # Make write a multiple of the recordsize for dedup + bs=32768 + count=$(($filesize / $bs)) + + # Fill data with the letter 'a' for dedup + file_write -b $bs -c $count -d 'a' -o create -f $writedir/tmp || return + else + # Make all files less than the 32k special_small_blocks size we + # setup at dataset creation time + filesize=$((($RANDOM % 32767) + 1)) + bs=$filesize + count=1 + dd status=none if=/dev/urandom bs=$bs count=$count of="$writedir/tmp" || return + fi + + + csum=$(sha256digest "$writedir/tmp") + newfile=$csum.$class$totalwritten + mv "$writedir/tmp" "$writedir/$newfile" + + # Basic sanity that we created our final file, and it has a non-zero size + expectedsize=$(($bs * $count)) + actualsize=$(stat_size "$writedir/$newfile") + if [[ "$actualsize" != "$expectedsize" ]] || [[ "$actualsize" == "0" ]] ; then + log_fail "File $writedir/$newfile bad size $actualsize (expected $expectedsize)" + return + fi + + totalwritten=$(($totalwritten + 1)) +} + +# Write some files to all our datasets. +# +# For each dataset: +# +# - 10 files should hit special vdevs +# - 10 files should hit dedup vdevs +function write_some_files { + typeset i + for i in $TESTFS 2copies 3copies encrypted encrypted2copies encrypted3copies ; do + for j in $(seq 1 10) ; do + write_verifiable_file special /$TESTPOOL/$i + write_verifiable_file dedup /$TESTPOOL/$i + done + done +} + +# Given a directory containing only files created by write_verifiable_file(), +# verify that the contents of the file match the sha256sum in the file's name. +# +# $1: Dir path with files to verify +function verify_directory { + typeset verifydir="$1" + typeset i + for i in $(ls $verifydir) ; do + + # Files will look like: + # + # ed324386045fa39d3f41d4f13c8c3e6a4698466e2b694c327f7e490be9e4e33f.dedup13 + # + # Just grab the sha256 part + + shaname="$(echo $i | cut -f1 -d'.')" + if [[ $(sha256digest "$verifydir/$i") != "$shaname" ]] ; then + log_fail "$verifydir/$i sha256 not $shaname" + false + return + fi + done + true +} + +function backup_alloc_class_disks { + typeset i + for i in $@ ; do + cp ${i} $BACKUP_DIR/$(basename $i) + done +} + +function restore_alloc_class_disks { + typeset i + for i in $@ ; do + mv $BACKUP_DIR/$(basename $i) ${i} + done +} + +function zero_alloc_class_disks { + typeset i + for i in $@ ; do + zero_file "${i}" + done +} + +# Create multiple datasets with different permutations of copies and encryption +function backup_alloc_class_make_datasets { + + log_must zfs create -o compression=off -o special_small_blocks=32K -o recordsize=32K \ + -o dedup=on $TESTPOOL/$TESTFS + + keyfile=$(pwd)/backup_alloc_class.key + dd if=/dev/random of=$keyfile bs=32 count=1 + + log_must zfs create -o copies=2 -o special_small_blocks=32K -o recordsize=32K -o dedup=on \ + $TESTPOOL/2copies + + log_must zfs create -o copies=3 -o special_small_blocks=32K -o recordsize=32K -o dedup=on \ + $TESTPOOL/3copies + + log_must zfs create -o encryption=on -o keylocation=file:///$keyfile -o keyformat=raw -o special_small_blocks=32K -o recordsize=32K -o dedup=on \ + $TESTPOOL/encrypted + + log_must zfs create -o copies=2 -o encryption=on -o keylocation=file:///$keyfile -o keyformat=raw -o special_small_blocks=32K -o recordsize=32K -o dedup=on \ + $TESTPOOL/encrypted2copies + + log_must zfs create -o copies=3 -o encryption=on -o keylocation=file:///$keyfile -o keyformat=raw -o special_small_blocks=32K -o recordsize=32K -o dedup=on \ + $TESTPOOL/encrypted3copies +} + +# For each dataset we created in backup_alloc_class_make_datasets, go though +# and check that all the files in the datasets have the correct data. +function verify_all_directories { + typeset i + for i in $TESTFS 2copies 3copies encrypted encrypted2copies encrypted3copies ; do + verify_directory /$TESTPOOL/$i + done + + # ...we should also have the correct number of files + totalfiles=0 + for i in $TESTFS 2copies 3copies encrypted encrypted2copies encrypted3copies ; do + totalfiles=$(($totalfiles + $(ls /$TESTPOOL/$i | wc -w))) + done + + if [[ "$totalfiles" != "$totalwritten" ]] ; then + log_fail "Wrong file count: expected $totalwritten, got $totalfiles" + else + log_note "Verified $totalfiles files" + fi +} + +# Return a space separated string of disks that are alloc class vdevs. Disk +# names will include the full path. +function get_list_of_alloc_class_disks { + typeset special_disks=$(get_list_of_vdevs_that_are "special") + typeset dedup_disks=$(get_list_of_vdevs_that_are "dedup") + typeset disks="$dedup_disks" + + if [ -n "$special_disks" ] ; then + disks="$special_disks $disks" + fi + + echo "$disks" +} + +# Check that backup_to_pool is set to $1 on all disks in $2. +function check_backup_to_pool_is { + typeset val=$1 + typeset disks="$2" + typeset i + for i in $disks ; do + # Backup to pool should be enabled on all leaf vdevs + str="$(zpool get -H -o value backup_to_pool $TESTPOOL $i)" + if [ "$str" != "$val" ] ; then + log_fail "$i reported $str, expected $val" + fi + done +} + +# Check that the pool/vdev proprieties and features for alloc class backups +# are sane. For example, if the feature is disabled, then backup_to_pool +# should not be enabled on any of the disks. +function check_pool_alloc_class_props { + typeset allow_backup_to_pool=$(get_pool_prop feature@allow_backup_to_pool $TESTPOOL) + typeset backup_alloc_class_to_pool=$(get_pool_prop backup_alloc_class_to_pool $TESTPOOL) + typeset alloc_class_disks="$(get_list_of_alloc_class_disks)" + + if [ "$allow_backup_to_pool" == "disabled" ] ; then + log_must [ "$backup_alloc_class_to_pool" == "off" ] + fi + + if [ "$backup_alloc_class_to_pool" == "off" ] ; then + check_backup_to_pool_is "off" "$alloc_class_disks" + fi +} + + +# Simple function to check pool and vdev proprieties are what we expect. The +# values we expect are passed to this function: +# +# $1: 'feature@allow_backup_to_pool' pool feature +# $2: 'backup_alloc_class_to_pool' pool prop +# $3: All alloc class vdev's 'backup_to_pool' vdev prop +# +# This function will log_fail on error. +function boilerplate_check { + typeset allow_backup_to_pool=$1 + typeset backup_alloc_class_to_pool=$2 + typeset special_val=$3 + + typeset alloc_class_disks="$(get_list_of_alloc_class_disks)" + + if [ "$(get_pool_prop feature@allow_backup_to_pool $TESTPOOL)" != "$allow_backup_to_pool" ] ; then + log_fail "feature@allow_backup_to_pool = $(get_pool_prop feature@allow_backup_to_pool $TESTPOOL), expected $allow_backup_to_pool" + fi + + if [ "$(get_pool_prop backup_alloc_class_to_pool $TESTPOOL)" != "$backup_alloc_class_to_pool" ] ; then + log_fail "backup_alloc_class_to_pool = $(get_pool_prop backup_alloc_class_to_pool $TESTPOOL), expected $backup_alloc_class_to_pool" + fi + + check_backup_to_pool_is "$special_val" "$alloc_class_disks" +} diff --git a/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_add.ksh b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_add.ksh new file mode 100755 index 0000000000..82697d269d --- /dev/null +++ b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_add.ksh @@ -0,0 +1,94 @@ +#!/bin/ksh -p + +# Copyright (C) 2024 Lawrence Livermore National Security, LLC. +# Refer to the OpenZFS git commit log for authoritative copyright attribution. +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License Version 1.0 (CDDL-1.0). +# You can obtain a copy of the license from the top-level file +# "OPENSOLARIS.LICENSE" or at . +# You may not use this file except in compliance with the license. +# +# Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049) + +. $STF_SUITE/tests/functional/backup_alloc_class/backup_alloc_class.kshlib + +# +# DESCRIPTION: +# Verify that 'zpool add' and 'zpool attach' disks have the correct backup +# to pool settings. + +verify_runnable "global" + +claim="zpool add|attach disks have correct backup_to_pool settings" + +log_assert $claim +log_onexit cleanup + +# Try different pool configurations +configs="mirror $ZPOOL_DISK0 $ZPOOL_DISK1 special mirror $CLASS_DISK0 $CLASS_DISK1 +mirror $ZPOOL_DISK0 $ZPOOL_DISK1 dedup mirror $CLASS_DISK0 $CLASS_DISK1" + +log_must disk_setup + +function do_test { + typeset config="$1" + typeset initial=$2 + typeset new=$3 + + log_must zpool create -o backup_alloc_class_to_pool=$initial $TESTPOOL $config + totalwritten=0 + + boilerplate_check "active" "$initial" "$initial" + backup_alloc_class_make_datasets + write_some_files + + log_must zpool set backup_alloc_class_to_pool=$new $TESTPOOL + + # We've just set backup_alloc_class_to_pool (possibly) a new value. Check + # that our new value still gives us the right props. + if [ $new == "off" ] || [ $initial == "off" ] ; then + initial_expected="off" + else + initial_expected="on" + fi + + # Attach to our special/dedup mirror. New device should be fully + # backed up, but the old devices should remain not baked up. + alloc_class_disks="$(get_list_of_alloc_class_disks)" + log_must zpool attach $TESTPOOL $CLASS_DISK0 $CLASS_DISK2 + check_backup_to_pool_is "$initial_expected" "$alloc_class_disks" + check_backup_to_pool_is "$new" "$CLASS_DISK2" + write_some_files + + # Now add a new special/dedup disk. It should be backed up. + log_must zpool add $TESTPOOL special $CLASS_DISK4 + + check_backup_to_pool_is "$initial_expected" "$alloc_class_disks" + check_backup_to_pool_is "$new" "$CLASS_DISK2 $CLASS_DISK4" + + write_some_files + verify_all_directories + + log_must zpool export $TESTPOOL + log_must zpool import -l -d $IMPORTDIR $TESTPOOL + + verify_all_directories + + log_must zpool destroy $TESTPOOL +} + +# Create a pool that is initially not backed up. Then, enable backups +# and add/attach a disk. The new disks should be backed up, but the +# old disks should not be backed up. +echo "$configs" | while read config ; do + for initial in "on" "off" ; do + for new in "on" "off" ; do + do_test "$config" $initial $new + done + done +done + +cleanup + +log_pass $claim diff --git a/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_create.ksh b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_create.ksh new file mode 100755 index 0000000000..4f5ac99ada --- /dev/null +++ b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_create.ksh @@ -0,0 +1,86 @@ +#!/bin/ksh -p + +# Copyright (C) 2024 Lawrence Livermore National Security, LLC. +# Refer to the OpenZFS git commit log for authoritative copyright attribution. +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License Version 1.0 (CDDL-1.0). +# You can obtain a copy of the license from the top-level file +# "OPENSOLARIS.LICENSE" or at . +# You may not use this file except in compliance with the license. +# +# Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049) + +. $STF_SUITE/tests/functional/backup_alloc_class/backup_alloc_class.kshlib + +# DESCRIPTION: +# Verify zpool create with different alloc class redundancy +# levels correctly succeed or fail. + +verify_runnable "global" + +claim="zpool create with different backup and disk permutations work" + +log_assert $claim +log_onexit cleanup + +# These should always pass since they have same redundancy level +configs_pass="mirror $ZPOOL_DISK1 $ZPOOL_DISK2 special mirror $CLASS_DISK0 $CLASS_DISK1 +mirror $ZPOOL_DISK1 $ZPOOL_DISK2 dedup mirror $CLASS_DISK0 $CLASS_DISK1 +mirror $ZPOOL_DISK1 $ZPOOL_DISK2 special mirror $CLASS_DISK0 $CLASS_DISK1 dedup mirror $CLASS_DISK2 $CLASS_DISK3" + +# These should always pass with backup_to_pool enabled or when '-f' is passed. +# They should fail otherwise. +configs_pass_backup="mirror $ZPOOL_DISK1 $ZPOOL_DISK2 special $CLASS_DISK0 +mirror $ZPOOL_DISK1 $ZPOOL_DISK2 dedup $CLASS_DISK0 +mirror $ZPOOL_DISK1 $ZPOOL_DISK2 special $CLASS_DISK0 dedup $CLASS_DISK2 +mirror $ZPOOL_DISK1 $ZPOOL_DISK2 special mirror $CLASS_DISK0 $CLASS_DISK1 dedup $CLASS_DISK2" + +log_must disk_setup + +# Try configs with matching redundancy levels. They should all pass. +echo "$configs_pass" | while read config ; do + log_must zpool create -o feature@allow_backup_to_pool=disabled $TESTPOOL $config + log_must zpool destroy $TESTPOOL + + log_must zpool create -o feature@allow_backup_to_pool=enabled $TESTPOOL $config + log_must zpool destroy $TESTPOOL + + log_must zpool create -f -o feature@allow_backup_to_pool=disabled $TESTPOOL $config + log_must zpool destroy $TESTPOOL + + log_must zpool create -f -o feature@allow_backup_to_pool=enabled $TESTPOOL $config + log_must zpool destroy $TESTPOOL + + log_must zpool create -o feature@allow_backup_to_pool=disabled -o backup_alloc_class_to_pool=off $TESTPOOL $config + log_must zpool destroy $TESTPOOL + + log_must zpool create -o feature@allow_backup_to_pool=enabled -o backup_alloc_class_to_pool=on $TESTPOOL $config + log_must zpool destroy $TESTPOOL +done + +# Try configs with lower redundancy level. They should fail if backup to +# pool is turned off and -f is not used. +echo "$configs_pass_backup" | while read config ; do + log_mustnot zpool create -o feature@allow_backup_to_pool=disabled $TESTPOOL $config + + log_must zpool create -o feature@allow_backup_to_pool=enabled $TESTPOOL $config + log_must zpool destroy $TESTPOOL + + log_must zpool create -f -o feature@allow_backup_to_pool=disabled $TESTPOOL $config + log_must zpool destroy $TESTPOOL + + log_must zpool create -f -o feature@allow_backup_to_pool=enabled $TESTPOOL $config + log_must zpool destroy $TESTPOOL + + log_mustnot zpool create -o feature@allow_backup_to_pool=disabled -o backup_alloc_class_to_pool=off $TESTPOOL $config + + log_must zpool create -f -o feature@allow_backup_to_pool=disabled -o backup_alloc_class_to_pool=off $TESTPOOL $config + log_must zpool destroy $TESTPOOL + + log_mustnot zpool create -o feature@allow_backup_to_pool=enabled -o backup_alloc_class_to_pool=off $TESTPOOL $config +done + +cleanup + +log_pass $claim diff --git a/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_files.ksh b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_files.ksh new file mode 100755 index 0000000000..76c3a6a679 --- /dev/null +++ b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_files.ksh @@ -0,0 +1,124 @@ +#!/bin/ksh -p + +# Copyright (C) 2024 Lawrence Livermore National Security, LLC. +# Refer to the OpenZFS git commit log for authoritative copyright attribution. +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License Version 1.0 (CDDL-1.0). +# You can obtain a copy of the license from the top-level file +# "OPENSOLARIS.LICENSE" or at . +# You may not use this file except in compliance with the license. +# +# Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049) + +. $STF_SUITE/tests/functional/backup_alloc_class/backup_alloc_class.kshlib + +# +# DESCRIPTION: +# Test multiple different backup to pool permutations. After each step +# write a bunch of known files. Verify all files are present and correct +# after all the steps are complete. + +verify_runnable "global" + +claim="Files on backed-up disks do not get corrupted" + +log_assert $claim +log_onexit cleanup + +# Try different pool configurations +configs="mirror $ZPOOL_DISKS special $CLASS_DISK0 $CLASS_DISK1 dedup $CLASS_DISK2 $CLASS_DISK3 +raidz $ZPOOL_DISKS special mirror $CLASS_DISK0 $CLASS_DISK1 dedup mirror $CLASS_DISK2 $CLASS_DISK3 +$ZPOOL_DISKS special $CLASS_DISK0 dedup $CLASS_DISK1 +$ZPOOL_DISKS special $CLASS_DISK0 +$ZPOOL_DISKS dedup $CLASS_DISK0" + +echo "$configs" | while read config ; do + log_must disk_setup + log_must zpool create -o backup_alloc_class_to_pool=on $TESTPOOL $config + totalwritten=0 + backup_alloc_class_make_datasets + + write_some_files + verify_all_directories + + alloc_class_disks="$(get_list_of_alloc_class_disks)" + log_must zpool export $TESTPOOL + + backup_alloc_class_disks $alloc_class_disks + zero_alloc_class_disks $alloc_class_disks + + log_must zpool import -l -d "$IMPORTDIR" $TESTPOOL + + # Our pool is imported but has all its special devices zeroed out. Try + # writing some files to it and export the pool + write_some_files + + log_must zpool export $TESTPOOL + log_must zpool import -l -d "$IMPORTDIR" $TESTPOOL + + write_some_files + + log_must zpool export $TESTPOOL + log_must zpool import -l -d "$IMPORTDIR" $TESTPOOL + + write_some_files + + # Make our old disks appear again (which have older data). Do a zpool + # clear to make them come back online and resilver. + restore_alloc_class_disks $alloc_class_disks + log_must zpool clear $TESTPOOL + + write_some_files + + # At this point the pool should be normal. The next test is to + # corrupt the alloc class devices while the pool is running. + zero_alloc_class_disks $alloc_class_disks + + # Trigger a scrub with our newly-zeroed alloc class disks + log_must zpool scrub $TESTPOOL + + # The pool should be degraded, but still alive. + check_state $TESTPOOL "" "DEGRADED" + + write_some_files + + # Replace all the alloc class disks. This should get the pool + # back to normal. + for disk in $alloc_class_disks ; do + log_must zpool replace $TESTPOOL $disk + done + + write_some_files + + log_must zpool export $TESTPOOL + + # Backup special disks, then totally remove them. + backup_alloc_class_disks $alloc_class_disks + + rm -f $alloc_class_disks + + # Try to import with the alloc class disks missing - it should work. + log_must zpool import -l -d "$IMPORTDIR" $TESTPOOL + + # After all the pain we've put our pool though, it should still have all the + # correct file data. + log_must verify_all_directories + + if [[ "$totalwritten" != "840" ]] ; then + log_fail "Didn't see 840 files, saw $totalwritten" + fi + + # We've checked all the files. Do some more verifications. + verify_pool $TESTPOOL + verify_filesys $TESTPOOL $TESTPOOL $IMPORTDIR + + # Record a few stats that show metadata re in use + zpool get dedup $TESTPOOL + zdb -bb $TESTPOOL 2>&1 | grep -Ei 'normal|special|dedup|ddt' + + log_must zpool destroy $TESTPOOL + cleanup +done + +log_pass $claim diff --git a/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_import.ksh b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_import.ksh new file mode 100755 index 0000000000..7f026f748a --- /dev/null +++ b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_import.ksh @@ -0,0 +1,95 @@ +#!/bin/ksh -p + +# Copyright (C) 2024 Lawrence Livermore National Security, LLC. +# Refer to the OpenZFS git commit log for authoritative copyright attribution. +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License Version 1.0 (CDDL-1.0). +# You can obtain a copy of the license from the top-level file +# "OPENSOLARIS.LICENSE" or at . +# You may not use this file except in compliance with the license. +# +# Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049) + +. $STF_SUITE/tests/functional/backup_alloc_class/backup_alloc_class.kshlib + +# +# DESCRIPTION: +# Verify we can import a backed-up pool even if all its alloc class +# devices are missing. +# +verify_runnable "global" + +claim="Verify imports work when backed-up devices are missing" + +log_assert $claim +log_onexit cleanup + +TWO_ZPOOL_DISKS="$ZPOOL_DISK0 $ZPOOL_DISK1" +REPLACE_DISK="$ZPOOL_DISK2" + +# Try a bunch of different pool configurations +configs="$TWO_ZPOOL_DISKS special $CLASS_DISK0 $CLASS_DISK1 dedup $CLASS_DISK2 $CLASS_DISK3 +raidz $TWO_ZPOOL_DISKS special mirror $CLASS_DISK0 $CLASS_DISK1 dedup mirror $CLASS_DISK2 $CLASS_DISK3 +$TWO_ZPOOL_DISKS special $CLASS_DISK0 dedup $CLASS_DISK1 +$TWO_ZPOOL_DISKS special $CLASS_DISK0 +$TWO_ZPOOL_DISKS dedup $CLASS_DISK0" + +function do_test { + typeset config="$1" + typeset action="$2" + typeset onoff="$3" + + totalwritten=0 + log_must disk_setup + log_must zpool create -o backup_alloc_class_to_pool=$onoff $TESTPOOL $config + + alloc_class_disks="$(get_list_of_alloc_class_disks)" + + check_backup_to_pool_is "$onoff" "$alloc_class_disks" + + backup_alloc_class_make_datasets + write_some_files + verify_all_directories + + log_must zpool export $TESTPOOL + + # Backup alloc class disk before removing them + backup_alloc_class_disks $alloc_class_disks + if [ "$action" == "remove" ] ; then + rm -f $alloc_class_disks + else + zero_alloc_class_disks $alloc_class_disks + fi + + # import should succeed or fail depending on how we're backed up + if [ "$onoff" == "on" ] ; then + log_must zpool import -l -d "$IMPORTDIR" $TESTPOOL + else + log_mustnot zpool import -l -d "$IMPORTDIR" $TESTPOOL + + # With the disks restored, we should be able to import + restore_alloc_class_disks $alloc_class_disks + log_must zpool import -l -d "$IMPORTDIR" $TESTPOOL + fi + write_some_files + + # Do a scrub and verify everything is correct + verify_pool $TESTPOOL + + verify_all_directories + + zpool destroy $TESTPOOL + + cleanup +} + +echo "$configs" | while read config ; do + for action in "remove" "zero" ; do + for onoff in "off" "on" ; do + do_test "$config" "$action" "$onoff" + done + done +done + +log_pass $claim diff --git a/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_offline.ksh b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_offline.ksh new file mode 100755 index 0000000000..7bbdd7e1ff --- /dev/null +++ b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_offline.ksh @@ -0,0 +1,126 @@ +#!/bin/ksh -p + +# Copyright (C) 2024 Lawrence Livermore National Security, LLC. +# Refer to the OpenZFS git commit log for authoritative copyright attribution. +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License Version 1.0 (CDDL-1.0). +# You can obtain a copy of the license from the top-level file +# "OPENSOLARIS.LICENSE" or at . +# You may not use this file except in compliance with the license. +# +# Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049) + +. $STF_SUITE/tests/functional/backup_alloc_class/backup_alloc_class.kshlib + +# +# DESCRIPTION: +# Verify we can offline backed-up alloc class disks. +# Verify we cannot offline non-backed-up alloc class disks. +# +verify_runnable "global" + +claim="Verify correct behavior when we force fault an alloc class disk" + +log_assert $claim +log_onexit cleanup + +# Try a bunch of different pool configurations +configs="mirror $ZPOOL_DISKS special $CLASS_DISK0 $CLASS_DISK1 dedup $CLASS_DISK2 $CLASS_DISK3 +raidz $ZPOOL_DISKS special mirror $CLASS_DISK0 $CLASS_DISK1 dedup mirror $CLASS_DISK2 $CLASS_DISK3 +$ZPOOL_DISKS special $CLASS_DISK0 dedup $CLASS_DISK1 +$ZPOOL_DISKS special $CLASS_DISK0 +$ZPOOL_DISKS dedup $CLASS_DISK0" + +function do_test { + prop="$1" + config="$2" + log_must disk_setup + log_must zpool create -f $prop $TESTPOOL $config + check_pool_alloc_class_props + + backup_alloc_class_make_datasets + totalwritten=0 + write_some_files + + alloc_class_disks=$(get_list_of_alloc_class_disks) + alloc_class_disks_arr=($alloc_class_disks) + + if [ "$prop" == "" ] ; then + log_must [ "$(get_pool_prop feature@allow_backup_to_pool $TESTPOOL)" == "active" ] + else + log_must [ "$(get_pool_prop feature@allow_backup_to_pool $TESTPOOL)" == "disabled" ] + fi + + for ((i = 0; i < ${#alloc_class_disks_arr[@]}; i++)); do + disk="${alloc_class_disks_arr[$i]}" + if [ "$prop" == "" ] ; then + # Everything is backed-up. We should be able to + # offline all the disks. + log_must zpool offline $TESTPOOL $disk + log_note "$(zpool status)" + log_must check_state $TESTPOOL "$disk" "OFFLINE" + log_must check_state $TESTPOOL "" "DEGRADED" + else + PARENT=$(get_vdev_prop parent $TESTPOOL $disk) + if [ "$PARENT" == "$TESTPOOL" ] ; then + # Leaf is TLD, offline should fail + log_mustnot zpool offline $TESTPOOL $disk + log_must check_state $TESTPOOL "$disk" "ONLINE" + log_must check_state $TESTPOOL "" "ONLINE" + else + # We're part of a mirror. We know all + # mirrors in our test pool are two disk + # so we should be able to offline the + # first disk, but not the second. + if [ "$i" == "0" ] ; then + # First alloc class disk - pretend + # "previous" disk was online to + # make things easy. + prev_online=1 + else + if check_state $TESTPOOL "${alloc_class_disks_arr[$i - 1]}" "ONLINE" ; then + prev_online=1 + else + prev_online=0 + fi + fi + + if [ "$prev_online" == "1" ] ; then + # First disk in mirror, can offline + log_must zpool offline $TESTPOOL $disk + log_must check_state $TESTPOOL "$disk" "OFFLINE" + log_must check_state $TESTPOOL "" "DEGRADED" + else + # Second disk in mirror, can't offline + # but we should still be in a pool + # degraded state from the first disk + # going offline. + log_note "$(zpool status)" + log_mustnot zpool offline $TESTPOOL $disk + log_must check_state $TESTPOOL "$disk" "ONLINE" + log_must check_state $TESTPOOL "" "DEGRADED" + fi + fi + fi + done + + write_some_files + verify_all_directories + + # We've checked all the files. Do some more verifications. + verify_pool $TESTPOOL + verify_filesys $TESTPOOL $TESTPOOL $IMPORTDIR + + zpool clear $TESTPOOL + zpool destroy $TESTPOOL + cleanup +} + +for prop in "-o feature@allow_backup_to_pool=disabled" "" ; do + echo "$configs" | while read config ; do + do_test "$prop" "$config" + done +done + +log_pass $claim diff --git a/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_prop.ksh b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_prop.ksh new file mode 100755 index 0000000000..5096d22499 --- /dev/null +++ b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_prop.ksh @@ -0,0 +1,98 @@ +#!/bin/ksh -p + +# Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). +# Copyright (C) 2024 Lawrence Livermore National Security, LLC. +# Refer to the OpenZFS git commit log for authoritative copyright attribution. +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License Version 1.0 (CDDL-1.0). +# You can obtain a copy of the license from the top-level file +# "OPENSOLARIS.LICENSE" or at . +# You may not use this file except in compliance with the license. +# +# Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049) + +. $STF_SUITE/tests/functional/backup_alloc_class/backup_alloc_class.kshlib + +# +# DESCRIPTION: +# Verify that alloc class backups to pool do not work if +# SPA_FEATURE_ALLOW_BACKUP_TO_POOL is disabled. Also, test upgrades. + +verify_runnable "global" + +claim="alloc class backups shouldn't work without SPA_FEATURE_ALLOW_BACKUP_TO_POOL" + +log_assert $claim +log_onexit cleanup + +IMPORTDIR="$(dirname ${CLASS_DISK0})" + +# Try a bunch of different pool configurations +configs="$ZPOOL_DISKS special $CLASS_DISK0 $CLASS_DISK1 dedup $CLASS_DISK2 $CLASS_DISK3 +raidz $ZPOOL_DISKS special mirror $CLASS_DISK0 $CLASS_DISK1 dedup mirror $CLASS_DISK2 $CLASS_DISK3 +$ZPOOL_DISKS special $CLASS_DISK0 dedup $CLASS_DISK1 +$ZPOOL_DISKS special $CLASS_DISK0 +$ZPOOL_DISKS dedup $CLASS_DISK0" + +# Make the pool disks smaller to make them quicker to back up. We don't use +# much data on them. +export ZPOOL_DEVSIZE=200M +export CLASS_DEVSIZE=200M + +log_must disk_setup + +echo "$configs" | while read config ; do + # We should not be able to set backup_alloc_class_to_pool=on if feature + # flag is disabled. + log_mustnot zpool create -o feature@allow_backup_to_pool=disabled -o backup_alloc_class_to_pool=on $TESTPOOL $config + + # Try a few permutations that should succeed + log_must zpool create -o backup_alloc_class_to_pool=off $TESTPOOL $config + boilerplate_check "active" "off" "off" + log_must zpool destroy $TESTPOOL + + log_must zpool create -o backup_alloc_class_to_pool=on $TESTPOOL $config + boilerplate_check "active" "on" "on" + log_must zpool destroy $TESTPOOL + + log_must zpool create -o feature@allow_backup_to_pool=enabled -o backup_alloc_class_to_pool=on $TESTPOOL $config + boilerplate_check "active" "on" "on" + log_must zpool destroy $TESTPOOL + + # Now let's do a multi-step test + for cmd in "zpool set feature@allow_backup_to_pool=enabled $TESTPOOL" "zpool upgrade $TESTPOOL" ; do + log_note "config='$config'" + log_must zpool create -o feature@allow_backup_to_pool=disabled -o backup_alloc_class_to_pool=off $TESTPOOL $config + totalwritten=0 + + boilerplate_check "disabled" "off" "off" + backup_alloc_class_make_datasets + write_some_files + + # Test enabling the feature in two different ways: + # + # zpool set allow_backup_to_pool=enabled ... + # zpool upgrade ... + # + log_must eval "$cmd" + boilerplate_check "active" "off" "off" + write_some_files + + log_must zpool set backup_alloc_class_to_pool=on $TESTPOOL + boilerplate_check "active" "on" "off" + write_some_files + + log_must zpool export $TESTPOOL + log_must zpool import -l -d $IMPORTDIR $TESTPOOL + + verify_all_directories + + log_must zpool destroy $TESTPOOL + done + +done + +cleanup + +log_pass $claim diff --git a/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_scrub.ksh b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_scrub.ksh new file mode 100755 index 0000000000..210fa53bd9 --- /dev/null +++ b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_scrub.ksh @@ -0,0 +1,112 @@ +#!/bin/ksh -p + +# Copyright (C) 2024 Lawrence Livermore National Security, LLC. +# Refer to the OpenZFS git commit log for authoritative copyright attribution. +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License Version 1.0 (CDDL-1.0). +# You can obtain a copy of the license from the top-level file +# "OPENSOLARIS.LICENSE" or at . +# You may not use this file except in compliance with the license. +# +# Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049) + +. $STF_SUITE/tests/functional/backup_alloc_class/backup_alloc_class.kshlib + +# +# DESCRIPTION: +# Destroy alloc class disks and then do a scrub on both a +# backed-up and non-backed-up pool. The backed-up pool +# should only be DEGRADED, while the no-backed-up pool should be +# SUSPENDED. + +verify_runnable "global" + +claim="Backed-up pools survive a normally fatal scrub with bad disks" + +log_assert $claim +log_onexit cleanup + +# Try different pool configurations +configs="$ZPOOL_DISKS special $CLASS_DISK0 $CLASS_DISK1 dedup $CLASS_DISK2 $CLASS_DISK3 +raidz $ZPOOL_DISKS special mirror $CLASS_DISK0 $CLASS_DISK1 dedup mirror $CLASS_DISK2 $CLASS_DISK3 +$ZPOOL_DISKS special $CLASS_DISK0 dedup $CLASS_DISK1 +$ZPOOL_DISKS special $CLASS_DISK0 +$ZPOOL_DISKS dedup $CLASS_DISK0" + +function do_test { + typeset config="$1" + typeset action="$2" + typeset onoff="$3" + totalwritten=0 + + log_must disk_setup + log_must zpool create -o feature@allow_backup_to_pool=enabled -o backup_alloc_class_to_pool=$onoff $TESTPOOL $config + + backup_alloc_class_make_datasets + + totalwritten=0 + write_some_files + + alloc_class_disks="$(get_list_of_alloc_class_disks)" + log_note "$(zpool status)" + check_backup_to_pool_is "$onoff" "$alloc_class_disks" + + # When we do a scrub later, we will either want it to suspend or not + # suspended the pool, depending on our backup settings. + # Make sure we are able to ride though the suspended pool so we + # can continue with our tests. + log_must zpool set failmode=continue $TESTPOOL + + backup_alloc_class_disks $alloc_class_disks + + check_backup_to_pool_is "$onoff" "$alloc_class_disks" + + zero_alloc_class_disks $alloc_class_disks + + # Spawn scrub into the background since the pool may be suspended and + # it will hang. We need to continue pass the hung scrub so we + # can restore the bad disks and do a 'zpool clear' to remove the + # suspended pool. + zpool scrub $TESTPOOL & + + wait_scrubbed $TESTPOOL 3 + if [ "$onoff" == "on" ] ; then + log_must check_state $TESTPOOL "" "DEGRADED" + + verify_pool $TESTPOOL + + write_some_files + verify_all_directories + else + log_must check_state $TESTPOOL "" "SUSPENDED" + + # Pool should be suspended. Restore the old disks so we can + # clear the suspension. 'zpool clear' here will delete the + # pool. + restore_alloc_class_disks $alloc_class_disks + log_must zpool clear $TESTPOOL + fi + + cleanup +} + +# Stop zed in case we left it running from an old, aborted, test run. +zed_stop +zed_cleanup + +log_must zed_setup +log_must zed_start +log_must zed_events_drain + +# Verify scrubs work as expected with different permutations of backup_to_pool. +echo "$configs" | while read config ; do + for i in "on" "off" ; do + do_test "$config" "zero" "$i" + done +done + +log_must zed_stop +log_must zed_cleanup + +log_pass $claim diff --git a/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_split.ksh b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_split.ksh new file mode 100755 index 0000000000..1de18a8f61 --- /dev/null +++ b/tests/zfs-tests/tests/functional/backup_alloc_class/backup_alloc_class_split.ksh @@ -0,0 +1,101 @@ +#!/bin/ksh -p + +# Copyright (C) 2024 Lawrence Livermore National Security, LLC. +# Refer to the OpenZFS git commit log for authoritative copyright attribution. +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License Version 1.0 (CDDL-1.0). +# You can obtain a copy of the license from the top-level file +# "OPENSOLARIS.LICENSE" or at . +# You may not use this file except in compliance with the license. +# +# Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049) + +. $STF_SUITE/tests/functional/backup_alloc_class/backup_alloc_class.kshlib + +# +# DESCRIPTION: +# Verify we can split a pool with backup to pool, and the new pool +# keeps the backup to pool settings. Also verify the new pool has +# all the data if the pool is backed up. +# +verify_runnable "global" + +claim="zpool split works with backup to pool" + +log_assert $claim +log_onexit cleanup + +IMPORTDIR="$(dirname ${CLASS_DISK0})" + + +# Create a normal, backed-up pool +log_must disk_setup +log_must zpool create -o backup_alloc_class_to_pool=on $TESTPOOL mirror \ + $ZPOOL_DISK0 $ZPOOL_DISK1 special mirror $CLASS_DISK0 $CLASS_DISK1 dedup \ + mirror $CLASS_DISK2 $CLASS_DISK3 + +totalwritten=0 +backup_alloc_class_make_datasets +write_some_files +verify_all_directories + +# Split the pool and verify the old pool has all the data +newpool="${TESTPOOL}-2" + +log_must zpool split $TESTPOOL $newpool +check_backup_to_pool_is "on" +check_pool_alloc_class_props +verify_all_directories + +# Forcefault alloc class devices on the old pool and verify we have all the +# data. +log_must zpool offline -f $TESTPOOL $CLASS_DISK0 +log_must zpool offline -f $TESTPOOL $CLASS_DISK2 +log_must check_state $TESTPOOL $CLASS_DISK0 "FAULTED" +log_must check_state $TESTPOOL $CLASS_DISK2 "FAULTED" + +log_must check_state $TESTPOOL "" "DEGRADED" +verify_all_directories + +log_must zpool clear $TESTPOOL + +# All done with the old pool +log_must zpool destroy $TESTPOOL + +# Import the new split pool and rename it $TESTPOOL since all our verification +# functions expect the pool to be called $TESTPOOL. +log_must zpool import -l -f -d $IMPORTDIR $newpool $TESTPOOL + +check_backup_to_pool_is "on" +check_pool_alloc_class_props +verify_all_directories + +# zero alloc class devices on the old pool and verify we have all the +# data. +log_must zpool export $TESTPOOL + +zero_file $CLASS_DISK1 +zero_file $CLASS_DISK3 + +log_must zpool import -l -f -d $IMPORTDIR $TESTPOOL + +verify_all_directories +log_must zpool destroy $TESTPOOL + +# Create a non-backed-up pool, split it, and verify the split pool is also +# not backed-up. +log_must zpool create -o backup_alloc_class_to_pool=off $TESTPOOL mirror \ + $ZPOOL_DISK0 $ZPOOL_DISK1 special mirror $CLASS_DISK0 $CLASS_DISK1 dedup \ + mirror $CLASS_DISK2 $CLASS_DISK3 + +log_must zpool split $TESTPOOL $newpool +check_backup_to_pool_is "off" +check_pool_alloc_class_props +log_must zpool destroy $TESTPOOL +log_must zpool import -l -f -d $IMPORTDIR $newpool $TESTPOOL +check_backup_to_pool_is "off" +check_pool_alloc_class_props +log_must zpool destroy $TESTPOOL + +log_pass $claim diff --git a/tests/zfs-tests/tests/functional/backup_alloc_class/cleanup.ksh b/tests/zfs-tests/tests/functional/backup_alloc_class/cleanup.ksh new file mode 100755 index 0000000000..a3c60a3925 --- /dev/null +++ b/tests/zfs-tests/tests/functional/backup_alloc_class/cleanup.ksh @@ -0,0 +1,27 @@ +#!/bin/ksh -p + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2017, Intel Corporation. +# Copyright (c) 2018, Delphix +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/backup_alloc_class/backup_alloc_class.kshlib + +verify_runnable "global" + +default_cleanup_noexit +disk_cleanup + +log_pass diff --git a/tests/zfs-tests/tests/functional/backup_alloc_class/setup.ksh b/tests/zfs-tests/tests/functional/backup_alloc_class/setup.ksh new file mode 100755 index 0000000000..bf044daa92 --- /dev/null +++ b/tests/zfs-tests/tests/functional/backup_alloc_class/setup.ksh @@ -0,0 +1,24 @@ +#!/bin/ksh -p + +# Copyright (C) 2024 Lawrence Livermore National Security, LLC. +# Refer to the OpenZFS git commit log for authoritative copyright attribution. +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License Version 1.0 (CDDL-1.0). +# You can obtain a copy of the license from the top-level file +# "OPENSOLARIS.LICENSE" or at . +# You may not use this file except in compliance with the license. +# +# Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049) +# +# Copyright (c) 2017, Intel Corporation. +# Copyright (c) 2018 by Delphix. All rights reserved. + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/backup_alloc_class/backup_alloc_class.kshlib + +verify_runnable "global" + +disk_cleanup + +log_pass