diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index 4b9921d47b..87499cdc95 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -5293,8 +5293,18 @@ dump_one_objset(const char *dsname, void *arg)
avl_first(&dmu_objset_ds(os)->ds_bookmarks); dbn != NULL;
dbn = AVL_NEXT(&dmu_objset_ds(os)->ds_bookmarks, dbn)) {
mos_obj_refd(dbn->dbn_phys.zbm_redaction_obj);
- if (dbn->dbn_phys.zbm_redaction_obj != 0)
- global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS]++;
+ if (dbn->dbn_phys.zbm_redaction_obj != 0) {
+ global_feature_count[
+ SPA_FEATURE_REDACTION_BOOKMARKS]++;
+ objset_t *mos = os->os_spa->spa_meta_objset;
+ dnode_t *rl;
+ VERIFY0(dnode_hold(mos,
+ dbn->dbn_phys.zbm_redaction_obj, FTAG, &rl));
+ if (rl->dn_have_spill) {
+ global_feature_count[
+ SPA_FEATURE_REDACTION_LIST_SPILL]++;
+ }
+ }
if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN)
global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN]++;
}
@@ -8135,6 +8145,7 @@ dump_zpool(spa_t *spa)
for (spa_feature_t f = 0; f < SPA_FEATURES; f++)
global_feature_count[f] = UINT64_MAX;
global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS] = 0;
+ global_feature_count[SPA_FEATURE_REDACTION_LIST_SPILL] = 0;
global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN] = 0;
global_feature_count[SPA_FEATURE_LIVELIST] = 0;
diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c
index 5ed25d1ea7..ea73bb018a 100644
--- a/cmd/zfs/zfs_main.c
+++ b/cmd/zfs/zfs_main.c
@@ -3978,6 +3978,10 @@ zfs_do_redact(int argc, char **argv)
(void) fprintf(stderr, gettext("potentially invalid redaction "
"snapshot; full dataset names required\n"));
break;
+ case ESRCH:
+ (void) fprintf(stderr, gettext("attempted to resume redaction "
+ " with a mismatched redaction list\n"));
+ break;
default:
(void) fprintf(stderr, gettext("internal error: %s\n"),
strerror(errno));
diff --git a/include/sys/dsl_bookmark.h b/include/sys/dsl_bookmark.h
index 353c5c2d26..d4e559a090 100644
--- a/include/sys/dsl_bookmark.h
+++ b/include/sys/dsl_bookmark.h
@@ -72,6 +72,7 @@ typedef struct redaction_list_phys {
typedef struct redaction_list {
dmu_buf_user_t rl_dbu;
redaction_list_phys_t *rl_phys;
+ dmu_buf_t *rl_bonus;
dmu_buf_t *rl_dbuf;
uint64_t rl_object;
zfs_refcount_t rl_longholds;
diff --git a/include/zfeature_common.h b/include/zfeature_common.h
index 7066c699e2..1025c44738 100644
--- a/include/zfeature_common.h
+++ b/include/zfeature_common.h
@@ -80,6 +80,7 @@ typedef enum spa_feature {
SPA_FEATURE_BLAKE3,
SPA_FEATURE_BLOCK_CLONING,
SPA_FEATURE_AVZ_V2,
+ SPA_FEATURE_REDACTION_LIST_SPILL,
SPA_FEATURES
} spa_feature_t;
diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi
index 6e53bcb41a..0a8e9bcbd7 100644
--- a/lib/libzfs/libzfs.abi
+++ b/lib/libzfs/libzfs.abi
@@ -596,7 +596,7 @@
-
+
@@ -5809,7 +5809,8 @@
-
+
+
@@ -8706,8 +8707,8 @@
-
-
+
+
diff --git a/man/man7/zpool-features.7 b/man/man7/zpool-features.7
index b901ce6c29..3c7b0b345d 100644
--- a/man/man7/zpool-features.7
+++ b/man/man7/zpool-features.7
@@ -947,6 +947,18 @@ once all filesystems that have ever had their
property set to
.Sy zstd
are destroyed.
+.
+.feature com.delphix redaction_list_spill no redaction_bookmarks
+This feature enables the redaction list created by zfs redact to store
+many more entries.
+It becomes
+.Sy active
+when a redaction list is created with more than 36 entries,
+and returns to being
+.Sy enabled
+when no long redaction lists remain in the pool.
+For more information about redacted sends, see
+.Xr zfs-send 8 .
.El
.
.Sh SEE ALSO
diff --git a/module/zcommon/zfeature_common.c b/module/zcommon/zfeature_common.c
index 4c9b7ed72a..2c74d10f43 100644
--- a/module/zcommon/zfeature_common.c
+++ b/module/zcommon/zfeature_common.c
@@ -737,6 +737,18 @@ zpool_feature_init(void)
ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL,
sfeatures);
+ {
+ static const spa_feature_t redact_list_spill_deps[] = {
+ SPA_FEATURE_REDACTION_BOOKMARKS,
+ SPA_FEATURE_NONE
+ };
+ zfeature_register(SPA_FEATURE_REDACTION_LIST_SPILL,
+ "com.delphix:redaction_list_spill", "redaction_list_spill",
+ "Support for increased number of redaction_snapshot "
+ "arguments in zfs redact.", 0, ZFEATURE_TYPE_BOOLEAN,
+ redact_list_spill_deps, sfeatures);
+ }
+
zfs_mod_list_supported_free(sfeatures);
}
diff --git a/module/zfs/dmu_redact.c b/module/zfs/dmu_redact.c
index 6bd35713ff..5ac14edfca 100644
--- a/module/zfs/dmu_redact.c
+++ b/module/zfs/dmu_redact.c
@@ -746,7 +746,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads,
bqueue_enqueue(q, record, sizeof (*record));
return (0);
}
- redact_nodes = kmem_zalloc(num_threads *
+ redact_nodes = vmem_zalloc(num_threads *
sizeof (*redact_nodes), KM_SLEEP);
avl_create(&start_tree, redact_node_compare_start,
@@ -820,7 +820,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads,
avl_destroy(&start_tree);
avl_destroy(&end_tree);
- kmem_free(redact_nodes, num_threads * sizeof (*redact_nodes));
+ vmem_free(redact_nodes, num_threads * sizeof (*redact_nodes));
if (current_record != NULL)
bqueue_enqueue(q, current_record, sizeof (*current_record));
return (err);
@@ -1030,7 +1030,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
numsnaps = fnvlist_num_pairs(redactnvl);
if (numsnaps > 0)
- args = kmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP);
+ args = vmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP);
nvpair_t *pair = NULL;
for (int i = 0; i < numsnaps; i++) {
@@ -1079,7 +1079,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
kmem_free(newredactbook,
sizeof (char) * ZFS_MAX_DATASET_NAME_LEN);
if (args != NULL)
- kmem_free(args, numsnaps * sizeof (*args));
+ vmem_free(args, numsnaps * sizeof (*args));
return (SET_ERROR(ENAMETOOLONG));
}
err = dsl_bookmark_lookup(dp, newredactbook, NULL, &bookmark);
@@ -1119,7 +1119,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
} else {
uint64_t *guids = NULL;
if (numsnaps > 0) {
- guids = kmem_zalloc(numsnaps * sizeof (uint64_t),
+ guids = vmem_zalloc(numsnaps * sizeof (uint64_t),
KM_SLEEP);
}
for (int i = 0; i < numsnaps; i++) {
@@ -1131,10 +1131,9 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
dp = NULL;
err = dsl_bookmark_create_redacted(newredactbook, snapname,
numsnaps, guids, FTAG, &new_rl);
- kmem_free(guids, numsnaps * sizeof (uint64_t));
- if (err != 0) {
+ vmem_free(guids, numsnaps * sizeof (uint64_t));
+ if (err != 0)
goto out;
- }
}
for (int i = 0; i < numsnaps; i++) {
@@ -1188,7 +1187,7 @@ out:
}
if (args != NULL)
- kmem_free(args, numsnaps * sizeof (*args));
+ vmem_free(args, numsnaps * sizeof (*args));
if (dp != NULL)
dsl_pool_rele(dp, FTAG);
if (ds != NULL) {
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
index 7cf03264dc..79fd02dcb9 100644
--- a/module/zfs/dnode.c
+++ b/module/zfs/dnode.c
@@ -720,6 +720,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
ASSERT(DMU_OT_IS_VALID(ot));
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
(bonustype == DMU_OT_SA && bonuslen == 0) ||
+ (bonustype == DMU_OTN_UINT64_METADATA && bonuslen == 0) ||
(bonustype != DMU_OT_NONE && bonuslen != 0));
ASSERT(DMU_OT_IS_VALID(bonustype));
ASSERT3U(bonuslen, <=, DN_SLOTS_TO_BONUSLEN(dn_slots));
diff --git a/module/zfs/dsl_bookmark.c b/module/zfs/dsl_bookmark.c
index e04796a081..03d9420dbd 100644
--- a/module/zfs/dsl_bookmark.c
+++ b/module/zfs/dsl_bookmark.c
@@ -34,6 +34,7 @@
#include
#include
#include
+#include
static int
dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname,
@@ -459,25 +460,42 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,
SPA_FEATURE_REDACTED_DATASETS, &dsnumsnaps, &dsredactsnaps);
if (redaction_list != NULL || bookmark_redacted) {
redaction_list_t *local_rl;
+ boolean_t spill = B_FALSE;
if (bookmark_redacted) {
redact_snaps = dsredactsnaps;
num_redact_snaps = dsnumsnaps;
}
+ int bonuslen = sizeof (redaction_list_phys_t) +
+ num_redact_snaps * sizeof (uint64_t);
+ if (bonuslen > dmu_bonus_max())
+ spill = B_TRUE;
dbn->dbn_phys.zbm_redaction_obj = dmu_object_alloc(mos,
DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
- DMU_OTN_UINT64_METADATA, sizeof (redaction_list_phys_t) +
- num_redact_snaps * sizeof (uint64_t), tx);
+ DMU_OTN_UINT64_METADATA, spill ? 0 : bonuslen, tx);
spa_feature_incr(dp->dp_spa,
SPA_FEATURE_REDACTION_BOOKMARKS, tx);
+ if (spill) {
+ spa_feature_incr(dp->dp_spa,
+ SPA_FEATURE_REDACTION_LIST_SPILL, tx);
+ }
VERIFY0(dsl_redaction_list_hold_obj(dp,
dbn->dbn_phys.zbm_redaction_obj, tag, &local_rl));
dsl_redaction_list_long_hold(dp, local_rl, tag);
- ASSERT3U((local_rl)->rl_dbuf->db_size, >=,
- sizeof (redaction_list_phys_t) + num_redact_snaps *
- sizeof (uint64_t));
- dmu_buf_will_dirty(local_rl->rl_dbuf, tx);
+ if (!spill) {
+ ASSERT3U(local_rl->rl_bonus->db_size, >=, bonuslen);
+ dmu_buf_will_dirty(local_rl->rl_bonus, tx);
+ } else {
+ dmu_buf_t *db;
+ VERIFY0(dmu_spill_hold_by_bonus(local_rl->rl_bonus,
+ DB_RF_MUST_SUCCEED, FTAG, &db));
+ dmu_buf_will_fill(db, tx);
+ VERIFY0(dbuf_spill_set_blksz(db, P2ROUNDUP(bonuslen,
+ SPA_MINBLOCKSIZE), tx));
+ local_rl->rl_phys = db->db_data;
+ local_rl->rl_dbuf = db;
+ }
memcpy(local_rl->rl_phys->rlp_snaps, redact_snaps,
sizeof (uint64_t) * num_redact_snaps);
local_rl->rl_phys->rlp_num_snaps = num_redact_snaps;
@@ -636,11 +654,15 @@ dsl_bookmark_create_redacted_check(void *arg, dmu_tx_t *tx)
SPA_FEATURE_REDACTION_BOOKMARKS))
return (SET_ERROR(ENOTSUP));
/*
- * If the list of redact snaps will not fit in the bonus buffer with
- * the furthest reached object and offset, fail.
+ * If the list of redact snaps will not fit in the bonus buffer (or
+ * spill block, with the REDACTION_LIST_SPILL feature) with the
+ * furthest reached object and offset, fail.
*/
- if (dbcra->dbcra_numsnaps > (dmu_bonus_max() -
- sizeof (redaction_list_phys_t)) / sizeof (uint64_t))
+ uint64_t snaplimit = ((spa_feature_is_enabled(dp->dp_spa,
+ SPA_FEATURE_REDACTION_LIST_SPILL) ? spa_maxblocksize(dp->dp_spa) :
+ dmu_bonus_max()) -
+ sizeof (redaction_list_phys_t)) / sizeof (uint64_t);
+ if (dbcra->dbcra_numsnaps > snaplimit)
return (SET_ERROR(E2BIG));
if (dsl_bookmark_create_nvl_validate_pair(
@@ -1040,6 +1062,14 @@ dsl_bookmark_destroy_sync_impl(dsl_dataset_t *ds, const char *name,
}
if (dbn->dbn_phys.zbm_redaction_obj != 0) {
+ dnode_t *rl;
+ VERIFY0(dnode_hold(mos,
+ dbn->dbn_phys.zbm_redaction_obj, FTAG, &rl));
+ if (rl->dn_have_spill) {
+ spa_feature_decr(dmu_objset_spa(mos),
+ SPA_FEATURE_REDACTION_LIST_SPILL, tx);
+ }
+ dnode_rele(rl, FTAG);
VERIFY0(dmu_object_free(mos,
dbn->dbn_phys.zbm_redaction_obj, tx));
spa_feature_decr(dmu_objset_spa(mos),
@@ -1213,7 +1243,9 @@ redaction_list_evict_sync(void *rlu)
void
dsl_redaction_list_rele(redaction_list_t *rl, const void *tag)
{
- dmu_buf_rele(rl->rl_dbuf, tag);
+ if (rl->rl_bonus != rl->rl_dbuf)
+ dmu_buf_rele(rl->rl_dbuf, tag);
+ dmu_buf_rele(rl->rl_bonus, tag);
}
int
@@ -1221,7 +1253,7 @@ dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag,
redaction_list_t **rlp)
{
objset_t *mos = dp->dp_meta_objset;
- dmu_buf_t *dbuf;
+ dmu_buf_t *dbuf, *spill_dbuf;
redaction_list_t *rl;
int err;
@@ -1236,13 +1268,18 @@ dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag,
redaction_list_t *winner = NULL;
rl = kmem_zalloc(sizeof (redaction_list_t), KM_SLEEP);
- rl->rl_dbuf = dbuf;
+ rl->rl_bonus = dbuf;
+ if (dmu_spill_hold_existing(dbuf, tag, &spill_dbuf) == 0) {
+ rl->rl_dbuf = spill_dbuf;
+ } else {
+ rl->rl_dbuf = dbuf;
+ }
rl->rl_object = rlobj;
- rl->rl_phys = dbuf->db_data;
+ rl->rl_phys = rl->rl_dbuf->db_data;
rl->rl_mos = dp->dp_meta_objset;
zfs_refcount_create(&rl->rl_longholds);
dmu_buf_init_user(&rl->rl_dbu, redaction_list_evict_sync, NULL,
- &rl->rl_dbuf);
+ &rl->rl_bonus);
if ((winner = dmu_buf_set_user_ie(dbuf, &rl->rl_dbu)) != NULL) {
kmem_free(rl, sizeof (*rl));
rl = winner;
diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c
index 053f26878c..d9d88a981e 100644
--- a/module/zfs/dsl_destroy.c
+++ b/module/zfs/dsl_destroy.c
@@ -1125,6 +1125,16 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
while ((dbn = avl_destroy_nodes(&ds->ds_bookmarks, &cookie)) !=
NULL) {
if (dbn->dbn_phys.zbm_redaction_obj != 0) {
+ dnode_t *rl;
+ VERIFY0(dnode_hold(mos,
+ dbn->dbn_phys.zbm_redaction_obj, FTAG,
+ &rl));
+ if (rl->dn_have_spill) {
+ spa_feature_decr(dmu_objset_spa(mos),
+ SPA_FEATURE_REDACTION_LIST_SPILL,
+ tx);
+ }
+ dnode_rele(rl, FTAG);
VERIFY0(dmu_object_free(mos,
dbn->dbn_phys.zbm_redaction_obj, tx));
spa_feature_decr(dmu_objset_spa(mos),
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
index 160a0ca2e6..4248578cde 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
@@ -86,6 +86,7 @@ typeset -a properties=(
"feature@log_spacemap"
"feature@device_rebuild"
"feature@draid"
+ "feature@redaction_list_spill"
)
if is_linux || is_freebsd; then
diff --git a/tests/zfs-tests/tests/functional/redacted_send/redacted_many_clones.ksh b/tests/zfs-tests/tests/functional/redacted_send/redacted_many_clones.ksh
index 3386643b29..f2150be3bc 100755
--- a/tests/zfs-tests/tests/functional/redacted_send/redacted_many_clones.ksh
+++ b/tests/zfs-tests/tests/functional/redacted_send/redacted_many_clones.ksh
@@ -27,7 +27,6 @@
# second (the last block in the file) is common to them all.
# 2. Verify a redacted stream with a reasonable redaction list length can
# be correctly processed.
-# 3. Verify that if the list is too long, the send fails gracefully.
#
typeset ds_name="many_clones"
@@ -56,13 +55,18 @@ for i in {1..64}; do
log_must zfs snapshot ${clone}$i@snap
done
-# The limit isn't necessarily 32 snapshots. The maximum number of snapshots in
+# The limit isn't necessarily 64 snapshots. The maximum number of snapshots in
# the redacted list is determined in dsl_bookmark_create_redacted_check().
-log_must zfs redact $sendfs@snap book1 $clone{1..32}@snap
+log_must zfs redact $sendfs@snap book1 $clone{1..64}@snap
log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
compare_files $sendfs $recvfs "f2" "$RANGE8"
-log_mustnot zfs redact $sendfs@snap book2 $clone{1..64}@snap
+rls_value="$(zpool get -H -o value feature@redaction_list_spill $POOL)"
+if [ "$rls_value" = "active" ]; then
+ log_note "redaction_list_spill feature active"
+else
+ log_fail "redaction_list_spill feature not active"
+fi
log_pass "Redacted send can deal with a large redaction list."