Fix send/recv lost spill block

When receiving a DRR_OBJECT record the receive_object() function
needs to determine how to handle a spill block associated with the
object.  It may need to be removed or kept depending on how the
object was modified at the source.

This determination is currently accomplished using a heuristic which
takes in to account the DRR_OBJECT record and the existing object
properties.  This is a problem because there isn't quite enough
information available to do the right thing under all circumstances.
For example, when only the block size changes the spill block is
removed when it should be kept.

What's needed to resolve this is an additional flag in the DRR_OBJECT
which indicates if the object being received references a spill block.
The DRR_OBJECT_SPILL flag was added for this purpose.  When set then
the object references a spill block and it must be kept.  Either
it is update to date, or it will be replaced by a subsequent DRR_SPILL
record.  Conversely, if the object being received doesn't reference
a spill block then any existing spill block should always be removed.

Since previous versions of ZFS do not understand this new flag
additional DRR_SPILL records will be inserted in to the stream.
This has the advantage of being fully backward compatible.  Existing
ZFS systems receiving this stream will recreate the spill block if
it was incorrectly removed.  Updated ZFS versions will correctly
ignore the additional spill blocks which can be identified by
checking for the DRR_SPILL_UNMODIFIED flag.

The small downside to this approach is that is may increase the size
of the stream and of the received snapshot on previous versions of
ZFS.  Additionally, when receiving streams generated by previous
unpatched versions of ZFS spill blocks may still be lost.

OpenZFS-issue: https://www.illumos.org/issues/9952
FreeBSD-issue: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=233277

Reviewed-by: Paul Dagnelie <pcd@delphix.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Tom Caputi <tcaputi@datto.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #8668
This commit is contained in:
Brian Behlendorf 2019-05-07 15:18:44 -07:00 committed by GitHub
parent 9c53e51616
commit caf9dd209f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 398 additions and 42 deletions

View File

@ -420,7 +420,8 @@ int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *txp);
int dmu_object_reclaim_dnsize(objset_t *os, uint64_t object,
dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype,
int bonuslen, int dnodesize, dmu_tx_t *txp);
int bonuslen, int dnodesize, boolean_t keep_spill, dmu_tx_t *tx);
int dmu_object_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx);
/*
* Free an object from this objset.

View File

@ -265,6 +265,7 @@ typedef struct dmu_sendarg {
objset_t *dsa_os;
zio_cksum_t dsa_zc;
uint64_t dsa_toguid;
uint64_t dsa_fromtxg;
int dsa_err;
dmu_pendop_t dsa_pending_op;
uint64_t dsa_featureflags;

View File

@ -48,6 +48,7 @@ typedef struct dmu_recv_cookie {
boolean_t drc_resumable;
boolean_t drc_raw;
boolean_t drc_clone;
boolean_t drc_spill;
struct avl_tree *drc_guid_to_ds_map;
nvlist_t *drc_keynvl;
zio_cksum_t drc_cksum;

View File

@ -267,8 +267,8 @@ typedef struct dnode_phys {
};
} dnode_phys_t;
#define DN_SPILL_BLKPTR(dnp) (blkptr_t *)((char *)(dnp) + \
(((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT))
#define DN_SPILL_BLKPTR(dnp) ((blkptr_t *)((char *)(dnp) + \
(((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT)))
struct dnode {
/*
@ -420,7 +420,8 @@ void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx);
void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx);
dmu_object_type_t bonustype, int bonuslen, int dn_slots,
boolean_t keep_spill, dmu_tx_t *tx);
void dnode_free(dnode_t *dn, dmu_tx_t *tx);
void dnode_byteswap(dnode_phys_t *dnp);
void dnode_buf_byteswap(void *buf, size_t size);

View File

@ -1317,6 +1317,7 @@ typedef enum {
ZFS_ERR_WRONG_PARENT,
ZFS_ERR_FROM_IVSET_GUID_MISSING,
ZFS_ERR_FROM_IVSET_GUID_MISMATCH,
ZFS_ERR_SPILL_BLOCK_FLAG_MISSING,
} zfs_errno_t;
/*

View File

@ -101,7 +101,7 @@ typedef enum drr_headertype {
/* flag #18 is reserved for a Delphix feature */
#define DMU_BACKUP_FEATURE_LARGE_BLOCKS (1 << 19)
#define DMU_BACKUP_FEATURE_RESUMING (1 << 20)
/* flag #21 is reserved for a Delphix feature */
/* flag #21 is reserved for the redacted send/receive feature */
#define DMU_BACKUP_FEATURE_COMPRESSED (1 << 22)
#define DMU_BACKUP_FEATURE_LARGE_DNODE (1 << 23)
#define DMU_BACKUP_FEATURE_RAW (1 << 24)
@ -160,16 +160,38 @@ typedef enum dmu_send_resume_token_version {
* cannot necessarily be received as a clone correctly.
*/
#define DRR_FLAG_FREERECORDS (1<<2)
/*
* When DRR_FLAG_SPILL_BLOCK is set it indicates the DRR_OBJECT_SPILL
* and DRR_SPILL_UNMODIFIED flags are meaningful in the send stream.
*
* When DRR_FLAG_SPILL_BLOCK is set, DRR_OBJECT records will have
* DRR_OBJECT_SPILL set if and only if they should have a spill block
* (either an existing one, or a new one in the send stream). When clear
* the object does not have a spill block and any existing spill block
* should be freed.
*
* Similarly, when DRR_FLAG_SPILL_BLOCK is set, DRR_SPILL records will
* have DRR_SPILL_UNMODIFIED set if and only if they were included for
* backward compatibility purposes, and can be safely ignored by new versions
* of zfs receive. Previous versions of ZFS which do not understand the
* DRR_FLAG_SPILL_BLOCK will process this record and recreate any missing
* spill blocks.
*/
#define DRR_FLAG_SPILL_BLOCK (1<<3)
/*
* flags in the drr_flags field in the DRR_WRITE, DRR_SPILL, DRR_OBJECT,
* DRR_WRITE_BYREF, and DRR_OBJECT_RANGE blocks
*/
#define DRR_CHECKSUM_DEDUP (1<<0) /* not used for DRR_SPILL blocks */
#define DRR_CHECKSUM_DEDUP (1<<0) /* not used for SPILL records */
#define DRR_RAW_BYTESWAP (1<<1)
#define DRR_OBJECT_SPILL (1<<2) /* OBJECT record has a spill block */
#define DRR_SPILL_UNMODIFIED (1<<2) /* SPILL record for unmodified block */
#define DRR_IS_DEDUP_CAPABLE(flags) ((flags) & DRR_CHECKSUM_DEDUP)
#define DRR_IS_RAW_BYTESWAPPED(flags) ((flags) & DRR_RAW_BYTESWAP)
#define DRR_OBJECT_HAS_SPILL(flags) ((flags) & DRR_OBJECT_SPILL)
#define DRR_SPILL_IS_UNMODIFIED(flags) ((flags) & DRR_SPILL_UNMODIFIED)
/* deal with compressed drr_write replay records */
#define DRR_WRITE_COMPRESSED(drrw) ((drrw)->drr_compressiontype != 0)

View File

@ -4466,6 +4466,13 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
"of raw encrypted send streams."));
(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
break;
case ZFS_ERR_SPILL_BLOCK_FLAG_MISSING:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"Spill block flag missing for raw send.\n"
"The zfs software on the sending system must "
"be updated."));
(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
break;
case EBUSY:
if (hastoken) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,

View File

@ -2337,6 +2337,21 @@ Allow sending of corrupt data (ignore read/checksum errors when sending data)
Use \fB1\fR for yes and \fB0\fR for no (default).
.RE
.sp
.ne 2
.na
\fBzfs_send_unmodified_spill_blocks\fR (int)
.ad
.RS 12n
Include unmodified spill blocks in the send stream. Under certain circumstances
previous versions of ZFS could incorrectly remove the spill block from an
existing object. Including unmodified copies of the spill blocks creates a
backwards compatible stream which will recreate a spill block if it was
incorrectly removed.
.sp
Use \fB1\fR for yes (default) and \fB0\fR for no.
.RE
.sp
.ne 2
.na
@ -2355,7 +2370,6 @@ Default value: \fB16,777,216\fR.
\fBzfs_recv_queue_length\fR (int)
.ad
.RS 12n
.sp
The maximum number of bytes allowed in the \fBzfs receive\fR queue. This value
must be at least twice the maximum block size in use.
.sp

View File

@ -2466,7 +2466,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
ASSERT(db->db_level == 0);
ASSERT3U(dbuf_is_metadata(db), ==, arc_is_metadata(buf));
ASSERT(buf != NULL);
ASSERT(arc_buf_lsize(buf) == db->db.db_size);
ASSERT3U(arc_buf_lsize(buf), ==, db->db.db_size);
ASSERT(tx->tx_txg != 0);
arc_return_buf(buf, db);

View File

@ -24,6 +24,7 @@
* Copyright 2014 HybridCluster. All rights reserved.
*/
#include <sys/dbuf.h>
#include <sys/dmu.h>
#include <sys/dmu_objset.h>
#include <sys/dmu_tx.h>
@ -304,13 +305,13 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
{
return (dmu_object_reclaim_dnsize(os, object, ot, blocksize, bonustype,
bonuslen, DNODE_MIN_SIZE, tx));
bonuslen, DNODE_MIN_SIZE, B_FALSE, tx));
}
int
dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonustype, int bonuslen, int dnodesize,
dmu_tx_t *tx)
boolean_t keep_spill, dmu_tx_t *tx)
{
dnode_t *dn;
int dn_slots = dnodesize >> DNODE_SHIFT;
@ -327,7 +328,30 @@ dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
if (err)
return (err);
dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots, tx);
dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots,
keep_spill, tx);
dnode_rele(dn, FTAG);
return (err);
}
int
dmu_object_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx)
{
dnode_t *dn;
int err;
err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0,
FTAG, &dn);
if (err)
return (err);
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
dbuf_rm_spill(dn, tx);
dnode_rm_spill(dn, tx);
}
rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
return (err);
@ -489,6 +513,7 @@ EXPORT_SYMBOL(dmu_object_claim);
EXPORT_SYMBOL(dmu_object_claim_dnsize);
EXPORT_SYMBOL(dmu_object_reclaim);
EXPORT_SYMBOL(dmu_object_reclaim_dnsize);
EXPORT_SYMBOL(dmu_object_rm_spill);
EXPORT_SYMBOL(dmu_object_free);
EXPORT_SYMBOL(dmu_object_next);
EXPORT_SYMBOL(dmu_object_zapify);

View File

@ -274,6 +274,10 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
/* embedded data is incompatible with encryption and raw recv */
if (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)
return (SET_ERROR(EINVAL));
/* raw receives require spill block allocation flag */
if (!(flags & DRR_FLAG_SPILL_BLOCK))
return (SET_ERROR(ZFS_ERR_SPILL_BLOCK_FLAG_MISSING));
} else {
dsflags |= DS_HOLD_FLAG_DECRYPT;
}
@ -615,8 +619,13 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
(void) snprintf(recvname, sizeof (recvname), "%s/%s",
tofs, recv_clone_name);
if ((featureflags & DMU_BACKUP_FEATURE_RAW) == 0)
if (featureflags & DMU_BACKUP_FEATURE_RAW) {
/* raw receives require spill block allocation flag */
if (!(drrb->drr_flags & DRR_FLAG_SPILL_BLOCK))
return (SET_ERROR(ZFS_ERR_SPILL_BLOCK_FLAG_MISSING));
} else {
dsflags |= DS_HOLD_FLAG_DECRYPT;
}
if (dsl_dataset_hold_flags(dp, recvname, dsflags, FTAG, &ds) != 0) {
/* %recv does not exist; continue in tofs */
@ -764,6 +773,9 @@ dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin,
return (SET_ERROR(EINVAL));
}
if (drc->drc_drrb->drr_flags & DRR_FLAG_SPILL_BLOCK)
drc->drc_spill = B_TRUE;
drba.drba_origin = origin;
drba.drba_cookie = drc;
drba.drba_cred = CRED();
@ -835,7 +847,8 @@ struct receive_writer_arg {
/* A map from guid to dataset to help handle dedup'd streams. */
avl_tree_t *guid_to_ds_map;
boolean_t resumable;
boolean_t raw;
boolean_t raw; /* DMU_BACKUP_FEATURE_RAW set */
boolean_t spill; /* DRR_FLAG_SPILL_BLOCK set */
uint64_t last_object;
uint64_t last_offset;
uint64_t max_object; /* highest object ID referenced in stream */
@ -1151,12 +1164,21 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
drro->drr_raw_bonuslen)
return (SET_ERROR(EINVAL));
} else {
if (drro->drr_flags != 0 || drro->drr_raw_bonuslen != 0 ||
drro->drr_indblkshift != 0 || drro->drr_nlevels != 0 ||
drro->drr_nblkptr != 0)
/*
* The DRR_OBJECT_SPILL flag is valid when the DRR_BEGIN
* record indicates this by setting DRR_FLAG_SPILL_BLOCK.
*/
if (((drro->drr_flags & ~(DRR_OBJECT_SPILL))) ||
(!rwa->spill && DRR_OBJECT_HAS_SPILL(drro->drr_flags))) {
return (SET_ERROR(EINVAL));
}
if (drro->drr_raw_bonuslen != 0 || drro->drr_nblkptr != 0 ||
drro->drr_indblkshift != 0 || drro->drr_nlevels != 0) {
return (SET_ERROR(EINVAL));
}
}
err = dmu_object_info(rwa->os, drro->drr_object, &doi);
if (err != 0 && err != ENOENT && err != EEXIST)
return (SET_ERROR(EINVAL));
@ -1312,7 +1334,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
}
if (object == DMU_NEW_OBJECT) {
/* currently free, want to be allocated */
/* Currently free, wants to be allocated */
err = dmu_object_claim_dnsize(rwa->os, drro->drr_object,
drro->drr_type, drro->drr_blksz,
drro->drr_bonustype, drro->drr_bonuslen,
@ -1321,11 +1343,19 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
drro->drr_blksz != doi.doi_data_block_size ||
drro->drr_bonustype != doi.doi_bonus_type ||
drro->drr_bonuslen != doi.doi_bonus_size) {
/* currently allocated, but with different properties */
/* Currently allocated, but with different properties */
err = dmu_object_reclaim_dnsize(rwa->os, drro->drr_object,
drro->drr_type, drro->drr_blksz,
drro->drr_bonustype, drro->drr_bonuslen,
dn_slots << DNODE_SHIFT, tx);
dn_slots << DNODE_SHIFT, rwa->spill ?
DRR_OBJECT_HAS_SPILL(drro->drr_flags) : B_FALSE, tx);
} else if (rwa->spill && !DRR_OBJECT_HAS_SPILL(drro->drr_flags)) {
/*
* Currently allocated, the existing version of this object
* may reference a spill block that is no longer allocated
* at the source and needs to be freed.
*/
err = dmu_object_rm_spill(rwa->os, drro->drr_object, tx);
}
if (err != 0) {
@ -1665,6 +1695,17 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
drrs->drr_length > spa_maxblocksize(dmu_objset_spa(rwa->os)))
return (SET_ERROR(EINVAL));
/*
* This is an unmodified spill block which was added to the stream
* to resolve an issue with incorrectly removing spill blocks. It
* should be ignored by current versions of the code which support
* the DRR_FLAG_SPILL_BLOCK flag.
*/
if (rwa->spill && DRR_SPILL_IS_UNMODIFIED(drrs->drr_flags)) {
dmu_return_arcbuf(abuf);
return (0);
}
if (rwa->raw) {
if (!DMU_OT_IS_VALID(drrs->drr_type) ||
drrs->drr_compressiontype >= ZIO_COMPRESS_FUNCTIONS ||
@ -1699,9 +1740,16 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
return (err);
}
if (db_spill->db_size < drrs->drr_length)
/*
* Spill blocks may both grow and shrink. When a change in size
* occurs any existing dbuf must be updated to match the logical
* size of the provided arc_buf_t.
*/
if (db_spill->db_size != drrs->drr_length) {
dmu_buf_will_fill(db_spill, tx);
VERIFY(0 == dbuf_spill_set_blksz(db_spill,
drrs->drr_length, tx));
}
if (rwa->byteswap && !arc_is_encrypted(abuf) &&
arc_get_compression(abuf) == ZIO_COMPRESS_OFF) {
@ -2575,6 +2623,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
rwa->byteswap = drc->drc_byteswap;
rwa->resumable = drc->drc_resumable;
rwa->raw = drc->drc_raw;
rwa->spill = drc->drc_spill;
rwa->os->os_raw_receive = drc->drc_raw;
(void) thread_create(NULL, 0, receive_writer_thread, rwa, 0, curproc,

View File

@ -64,6 +64,8 @@ int zfs_send_corrupt_data = B_FALSE;
int zfs_send_queue_length = SPA_MAXBLOCKSIZE;
/* Set this tunable to FALSE to disable setting of DRR_FLAG_FREERECORDS */
int zfs_send_set_freerecords_bit = B_TRUE;
/* Set this tunable to FALSE is disable sending unmodified spill blocks. */
int zfs_send_unmodified_spill_blocks = B_TRUE;
/*
* Use this to override the recordsize calculation for fast zfs send estimates.
@ -99,6 +101,8 @@ typedef struct dump_bytes_io {
int dbi_len;
} dump_bytes_io_t;
static int do_dump(dmu_sendarg_t *dsa, struct send_block_record *data);
static void
dump_bytes_cb(void *arg)
{
@ -436,6 +440,12 @@ dump_spill(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, void *data)
drrs->drr_length = blksz;
drrs->drr_toguid = dsp->dsa_toguid;
/* See comment in dump_dnode() for full details */
if (zfs_send_unmodified_spill_blocks &&
(bp->blk_birth <= dsp->dsa_fromtxg)) {
drrs->drr_flags |= DRR_SPILL_UNMODIFIED;
}
/* handle raw send fields */
if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
ASSERT(BP_IS_PROTECTED(bp));
@ -587,6 +597,14 @@ dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object,
}
}
/*
* DRR_OBJECT_SPILL is set for every dnode which references a
* spill block. This allows the receiving pool to definitively
* determine when a spill block should be kept or freed.
*/
if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)
drro->drr_flags |= DRR_OBJECT_SPILL;
if (dump_record(dsp, DN_BONUS(dnp), bonuslen) != 0)
return (SET_ERROR(EINTR));
@ -594,8 +612,34 @@ dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object,
if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
(dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), DMU_OBJECT_END) != 0)
return (SET_ERROR(EINTR));
/*
* Send DRR_SPILL records for unmodified spill blocks. This is useful
* because changing certain attributes of the object (e.g. blocksize)
* can cause old versions of ZFS to incorrectly remove a spill block.
* Including these records in the stream forces an up to date version
* to always be written ensuring they're never lost. Current versions
* of the code which understand the DRR_FLAG_SPILL_BLOCK feature can
* ignore these unmodified spill blocks.
*/
if (zfs_send_unmodified_spill_blocks &&
(dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
(DN_SPILL_BLKPTR(dnp)->blk_birth <= dsp->dsa_fromtxg)) {
struct send_block_record record;
bzero(&record, sizeof (struct send_block_record));
record.eos_marker = B_FALSE;
record.bp = *DN_SPILL_BLKPTR(dnp);
SET_BOOKMARK(&(record.zb), dmu_objset_id(dsp->dsa_os),
object, 0, DMU_SPILL_BLKID);
if (do_dump(dsp, &record) != 0)
return (SET_ERROR(EINTR));
}
if (dsp->dsa_err != 0)
return (SET_ERROR(EINTR));
return (0);
}
@ -1036,6 +1080,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
/* raw send implies compressok */
if (compressok || rawok)
featureflags |= DMU_BACKUP_FEATURE_COMPRESSED;
if (rawok && os->os_encrypted)
featureflags |= DMU_BACKUP_FEATURE_RAW;
@ -1064,6 +1109,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
if (zfs_send_set_freerecords_bit)
drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_FREERECORDS;
drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_SPILL_BLOCK;
if (ancestor_zb != NULL) {
drr->drr_u.drr_begin.drr_fromguid =
ancestor_zb->zbm_guid;
@ -1084,6 +1131,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
dsp->dsa_os = os;
dsp->dsa_off = off;
dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid;
dsp->dsa_fromtxg = fromtxg;
dsp->dsa_pending_op = PENDING_NONE;
dsp->dsa_featureflags = featureflags;
dsp->dsa_resume_object = resumeobj;
@ -1552,4 +1600,8 @@ MODULE_PARM_DESC(zfs_send_corrupt_data, "Allow sending corrupt data");
module_param(zfs_send_queue_length, int, 0644);
MODULE_PARM_DESC(zfs_send_queue_length, "Maximum send queue length");
module_param(zfs_send_unmodified_spill_blocks, int, 0644);
MODULE_PARM_DESC(zfs_send_unmodified_spill_blocks,
"Send unmodified spill blocks");
#endif

View File

@ -660,7 +660,8 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
void
dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx)
dmu_object_type_t bonustype, int bonuslen, int dn_slots,
boolean_t keep_spill, dmu_tx_t *tx)
{
int nblkptr;
@ -710,7 +711,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = bonustype;
if (dn->dn_nblkptr != nblkptr)
dn->dn_next_nblkptr[tx->tx_txg & TXG_MASK] = nblkptr;
if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR && !keep_spill) {
dbuf_rm_spill(dn, tx);
dnode_rm_spill(dn, tx);
}

View File

@ -807,8 +807,8 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos',
'send-c_recv_dedup', 'send_encrypted_files', 'send_encrypted_hierarchy',
'send_encrypted_props', 'send_encrypted_truncated_files',
'send_freeobjects', 'send_realloc_dnode_size', 'send_realloc_files',
'send_realloc_encrypted_files', 'send_holds', 'send_hole_birth',
'send_mixed_raw', 'send-wDR_encrypted_zvol']
'send_realloc_encrypted_files', 'send_spill_block', 'send_holds',
'send_hole_birth', 'send_mixed_raw', 'send-wDR_encrypted_zvol']
tags = ['functional', 'rsend']
[tests/functional/scrub_mirror]

View File

@ -44,6 +44,7 @@ dist_pkgdata_SCRIPTS = \
send_realloc_dnode_size.ksh \
send_realloc_files.ksh \
send_realloc_encrypted_files.ksh \
send_spill_block.ksh \
send_holds.ksh \
send_hole_birth.ksh \
send_mixed_raw.ksh \

View File

@ -30,6 +30,7 @@
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/include/math.shlib
. $STF_SUITE/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib
. $STF_SUITE/tests/functional/rsend/rsend.cfg
#
@ -518,9 +519,13 @@ function churn_files
value=$((RANDOM % 5))
if [ $value -eq 0 -a $xattrs -ne 0 ]; then
attrname="testattr$((RANDOM % 3))"
attrlen="$(((RANDOM % 1000) + 1))"
attrvalue="$(random_string VALID_NAME_CHAR \
$attrlen)"
attr -qr $attrname $file_name || \
log_fail "Failed to remove $attrname"
attr -qs $attrname -V TestValue $file_name || \
attr -qs $attrname \
-V "$attrvalue" $file_name || \
log_fail "Failed to set $attrname"
elif [ $value -eq 1 ]; then
dd if=/dev/urandom of=$file_name \
@ -548,9 +553,12 @@ function churn_files
if [ $xattrs -ne 0 ]; then
for j in {0..2}; do
attrname="testattr$j"
attr -qs $attrname -V TestValue \
$file_name || log_fail \
"Failed to set $attrname"
attrlen="$(((RANDOM % 1000) + 1))"
attrvalue="$(random_string \
VALID_NAME_CHAR $attrlen)"
attr -qs $attrname \
-V "$attrvalue" $file_name || \
log_fail "Failed to set $attrname"
done
fi
fi
@ -791,10 +799,11 @@ function rand_set_prop
log_must eval "zfs set $prop='$value' $dtst"
}
# Generate a recursive checksum of a filesystems contents. Only file
# data is included in the checksum (no meta data, or xattrs).
# Generate a recursive checksum of a filesystem which includes the file
# contents and any associated xattrs.
function recursive_cksum
{
find $1 -type f -exec sha256sum {} \; | \
find $1 -type f -exec sh -c 'sha256sum {}; getfattr \
--absolute-names --only-values -d {} | sha256sum' \; | \
sort -k 2 | awk '{ print $1 }' | sha256sum
}

View File

@ -65,7 +65,16 @@ log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs@snap${last_snap}"
# Set atime=off to prevent the recursive_cksum from modifying newfs.
log_must zfs set atime=off $POOL/newfs
for i in {1..5}; do
# Due to reduced performance on debug kernels use fewer files by default.
if is_kmemleak; then
nr_files=100
passes=2
else
nr_files=1000
passes=3
fi
for i in {1..$passes}; do
# Randomly modify several dataset properties in order to generate
# more interesting incremental send streams.
rand_set_prop $POOL/fs checksum "off" "fletcher4" "sha256"
@ -76,12 +85,8 @@ for i in {1..5}; do
# Churn the filesystem in such a way that we're likely to be both
# allocating and reallocating objects in the incremental stream.
#
# Disable xattrs until the following spill block issue is resolved:
# https://github.com/openzfs/openzfs/pull/705
#
log_must churn_files 1000 524288 $POOL/fs 0
expected_cksum=$(recursive_cksum /$fs)
log_must churn_files $nr_files 524288 $POOL/fs
expected_cksum=$(recursive_cksum /$POOL/fs)
# Create a snapshot and use it to send an incremental stream.
this_snap=$((last_snap + 1))

View File

@ -35,6 +35,8 @@
# e) Destroy the incremental stream and old snapshot.
#
verify_runnable "both"
log_assert "Verify incremental receive handles reallocation"
function cleanup
@ -56,7 +58,16 @@ log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs@snap${last_snap}"
# Set atime=off to prevent the recursive_cksum from modifying newfs.
log_must zfs set atime=off $POOL/newfs
for i in {1..5}; do
# Due to reduced performance on debug kernels use fewer files by default.
if is_kmemleak; then
nr_files=100
passes=2
else
nr_files=1000
passes=3
fi
for i in {1..$passes}; do
# Randomly modify several dataset properties in order to generate
# more interesting incremental send streams.
rand_set_prop $POOL/fs checksum "off" "fletcher4" "sha256"
@ -67,8 +78,8 @@ for i in {1..5}; do
# Churn the filesystem in such a way that we're likely to be both
# allocating and reallocating objects in the incremental stream.
log_must churn_files 1000 524288 $POOL/fs
expected_cksum=$(recursive_cksum /$fs)
log_must churn_files $nr_files 524288 $POOL/fs
expected_cksum=$(recursive_cksum /$POOL/fs)
# Create a snapshot and use it to send an incremental stream.
this_snap=$((last_snap + 1))

View File

@ -0,0 +1,155 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2019 by Lawrence Livermore National Security, LLC.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/rsend/rsend.kshlib
#
# Description:
# Verify spill blocks are correctly preserved.
#
# Strategy:
# 1) Create a set of files each containing some file data.
# 2) Add enough xattrs to the file to require a spill block.
# 3) Snapshot and send these files to a new dataset.
# 4) Modify the files and spill blocks in a variety of ways.
# 5) Send the changes using an incremental send stream.
# 6) Verify that all the xattrs (and thus the spill block) were
# preserved when receiving the incremental stream.
#
verify_runnable "both"
log_assert "Verify spill blocks are correctly preserved"
function cleanup
{
rm -f $BACKDIR/fs@*
destroy_dataset $POOL/fs "-rR"
destroy_dataset $POOL/newfs "-rR"
}
attrvalue="abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"
log_onexit cleanup
log_must zfs create $POOL/fs
log_must zfs set xattr=sa $POOL/fs
log_must zfs set dnodesize=legacy $POOL/fs
log_must zfs set recordsize=128k $POOL/fs
# Create 40 files each with a spill block containing xattrs. Each file
# will be modified in a different way to validate the incremental receive.
for i in {1..40}; do
file="/$POOL/fs/file$i"
log_must mkfile 16384 $file
for j in {1..20}; do
log_must attr -qs "testattr$j" -V "$attrvalue" $file
done
done
# Snapshot the pool and send it to the new dataset.
log_must zfs snapshot $POOL/fs@snap1
log_must eval "zfs send -e $POOL/fs@snap1 >$BACKDIR/fs@snap1"
log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs@snap1"
#
# Modify file[1-6]'s contents but not the spill blocks.
#
# file1 - Increase record size; single block
# file2 - Increase record size; multiple blocks
# file3 - Truncate file to zero size; single block
# file4 - Truncate file to smaller size; single block
# file5 - Truncate file to much larger size; add holes
# file6 - Truncate file to embedded size; embedded data
#
log_must mkfile 32768 /$POOL/fs/file1
log_must mkfile 1048576 /$POOL/fs/file2
log_must truncate -s 0 /$POOL/fs/file3
log_must truncate -s 8192 /$POOL/fs/file4
log_must truncate -s 1073741824 /$POOL/fs/file5
log_must truncate -s 50 /$POOL/fs/file6
#
# Modify file[11-16]'s contents and their spill blocks.
#
# file11 - Increase record size; single block
# file12 - Increase record size; multiple blocks
# file13 - Truncate file to zero size; single block
# file14 - Truncate file to smaller size; single block
# file15 - Truncate file to much larger size; add holes
# file16 - Truncate file to embedded size; embedded data
#
log_must mkfile 32768 /$POOL/fs/file11
log_must mkfile 1048576 /$POOL/fs/file12
log_must truncate -s 0 /$POOL/fs/file13
log_must truncate -s 8192 /$POOL/fs/file14
log_must truncate -s 1073741824 /$POOL/fs/file15
log_must truncate -s 50 /$POOL/fs/file16
for i in {11..20}; do
log_must attr -qr testattr1 /$POOL/fs/file$i
done
#
# Modify file[21-26]'s contents and remove their spill blocks.
#
# file21 - Increase record size; single block
# file22 - Increase record size; multiple blocks
# file23 - Truncate file to zero size; single block
# file24 - Truncate file to smaller size; single block
# file25 - Truncate file to much larger size; add holes
# file26 - Truncate file to embedded size; embedded data
#
log_must mkfile 32768 /$POOL/fs/file21
log_must mkfile 1048576 /$POOL/fs/file22
log_must truncate -s 0 /$POOL/fs/file23
log_must truncate -s 8192 /$POOL/fs/file24
log_must truncate -s 1073741824 /$POOL/fs/file25
log_must truncate -s 50 /$POOL/fs/file26
for i in {21..30}; do
for j in {1..20}; do
log_must attr -qr testattr$j /$POOL/fs/file$i
done
done
#
# Modify file[31-40]'s spill blocks but not the file contents.
#
for i in {31..40}; do
file="/$POOL/fs/file$i"
log_must attr -qr testattr$(((RANDOM % 20) + 1)) $file
log_must attr -qs testattr$(((RANDOM % 20) + 1)) -V "$attrvalue" $file
done
# Calculate the expected recursive checksum for the source.
expected_cksum=$(recursive_cksum /$POOL/fs)
# Snapshot the pool and send the incremental snapshot.
log_must zfs snapshot $POOL/fs@snap2
log_must eval "zfs send -e -i $POOL/fs@snap1 $POOL/fs@snap2 >$BACKDIR/fs@snap2"
log_must eval "zfs recv -F $POOL/newfs < $BACKDIR/fs@snap2"
# Validate the received copy using the received recursive checksum.
actual_cksum=$(recursive_cksum /$POOL/newfs)
if [[ "$expected_cksum" != "$actual_cksum" ]]; then
log_fail "Checksums differ ($expected_cksum != $actual_cksum)"
fi
log_pass "Verify spill blocks are correctly preserved"