Raw DRR_OBJECT records must write raw data
b1d21733
made it possible for empty metadnode blocks to be
compressed to a hole, fixing a bug that would cause invalid
metadnode MACs when a send stream attempted to free objects
and allowing the blocks to be reclaimed when they were no
longer needed. However, this patch also introduced a race
condition; if a txg sync occurred after a DRR_OBJECT_RANGE
record was received but before any objects were added, the
metadnode block would be compressed to a hole and lose all
of its encryption parameters. This would cause subsequent
DRR_OBJECT records to fail when they attempted to write
their data into an unencrypted block. This patch defers the
DRR_OBJECT_RANGE handling to receive_object() so that the
encryption parameters are set with each object that is
written into that block.
Reviewed-by: Kash Pande <kash@tripleback.net>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #7215
Closes #7236
This commit is contained in:
parent
8b5814393f
commit
095495e008
|
@ -821,8 +821,9 @@ void dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset,
|
||||||
void dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset,
|
void dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset,
|
||||||
struct arc_buf *buf, dmu_tx_t *tx);
|
struct arc_buf *buf, dmu_tx_t *tx);
|
||||||
#define dmu_assign_arcbuf dmu_assign_arcbuf_by_dbuf
|
#define dmu_assign_arcbuf dmu_assign_arcbuf_by_dbuf
|
||||||
void dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder,
|
int dmu_convert_mdn_block_to_raw(objset_t *os, uint64_t firstobj,
|
||||||
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx);
|
boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
|
||||||
|
const uint8_t *mac, dmu_tx_t *tx);
|
||||||
void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
|
void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
dmu_buf_t *handle, dmu_tx_t *tx);
|
dmu_buf_t *handle, dmu_tx_t *tx);
|
||||||
#ifdef HAVE_UIO_ZEROCOPY
|
#ifdef HAVE_UIO_ZEROCOPY
|
||||||
|
|
|
@ -3465,8 +3465,7 @@ dbuf_check_crypt(dbuf_dirty_record_t *dr)
|
||||||
* Writing raw encrypted data requires the db's arc buffer
|
* Writing raw encrypted data requires the db's arc buffer
|
||||||
* to be converted to raw by the caller.
|
* to be converted to raw by the caller.
|
||||||
*/
|
*/
|
||||||
ASSERT(arc_is_encrypted(db->db_buf) ||
|
ASSERT(arc_is_encrypted(db->db_buf));
|
||||||
db->db.db_object == DMU_META_DNODE_OBJECT);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1542,29 +1542,39 @@ dmu_return_arcbuf(arc_buf_t *buf)
|
||||||
arc_buf_destroy(buf, FTAG);
|
arc_buf_destroy(buf, FTAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
int
|
||||||
dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder, const uint8_t *salt,
|
dmu_convert_mdn_block_to_raw(objset_t *os, uint64_t firstobj,
|
||||||
const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx)
|
boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
|
||||||
|
const uint8_t *mac, dmu_tx_t *tx)
|
||||||
{
|
{
|
||||||
dmu_object_type_t type;
|
int ret;
|
||||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle;
|
dmu_buf_t *handle = NULL;
|
||||||
uint64_t dsobj = dmu_objset_id(db->db_objset);
|
dmu_buf_impl_t *db = NULL;
|
||||||
|
uint64_t offset = firstobj * DNODE_MIN_SIZE;
|
||||||
|
uint64_t dsobj = dmu_objset_id(os);
|
||||||
|
|
||||||
ASSERT3P(db->db_buf, !=, NULL);
|
ret = dmu_buf_hold_by_dnode(DMU_META_DNODE(os), offset, FTAG, &handle,
|
||||||
ASSERT3U(dsobj, !=, 0);
|
DMU_READ_PREFETCH | DMU_READ_NO_DECRYPT);
|
||||||
|
if (ret != 0)
|
||||||
|
return (ret);
|
||||||
|
|
||||||
dmu_buf_will_change_crypt_params(handle, tx);
|
dmu_buf_will_change_crypt_params(handle, tx);
|
||||||
|
|
||||||
DB_DNODE_ENTER(db);
|
db = (dmu_buf_impl_t *)handle;
|
||||||
type = DB_DNODE(db)->dn_type;
|
ASSERT3P(db->db_buf, !=, NULL);
|
||||||
DB_DNODE_EXIT(db);
|
ASSERT3U(dsobj, !=, 0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This technically violates the assumption the dmu code makes
|
* This technically violates the assumption the dmu code makes
|
||||||
* that dnode blocks are only released in syncing context.
|
* that dnode blocks are only released in syncing context.
|
||||||
*/
|
*/
|
||||||
(void) arc_release(db->db_buf, db);
|
(void) arc_release(db->db_buf, db);
|
||||||
arc_convert_to_raw(db->db_buf, dsobj, byteorder, type, salt, iv, mac);
|
arc_convert_to_raw(db->db_buf, dsobj, byteorder, DMU_OT_DNODE,
|
||||||
|
salt, iv, mac);
|
||||||
|
|
||||||
|
dmu_buf_rele(handle, FTAG);
|
||||||
|
|
||||||
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
@ -2167,6 +2167,14 @@ struct receive_writer_arg {
|
||||||
uint64_t last_offset;
|
uint64_t last_offset;
|
||||||
uint64_t max_object; /* highest object ID referenced in stream */
|
uint64_t max_object; /* highest object ID referenced in stream */
|
||||||
uint64_t bytes_read; /* bytes read when current record created */
|
uint64_t bytes_read; /* bytes read when current record created */
|
||||||
|
|
||||||
|
/* Encryption parameters for the last received DRR_OBJECT_RANGE */
|
||||||
|
uint64_t or_firstobj;
|
||||||
|
uint64_t or_numslots;
|
||||||
|
uint8_t or_salt[ZIO_DATA_SALT_LEN];
|
||||||
|
uint8_t or_iv[ZIO_DATA_IV_LEN];
|
||||||
|
uint8_t or_mac[ZIO_DATA_MAC_LEN];
|
||||||
|
boolean_t or_byteorder;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct objlist {
|
struct objlist {
|
||||||
|
@ -2448,7 +2456,13 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rwa->raw) {
|
if (rwa->raw) {
|
||||||
if (drro->drr_raw_bonuslen < drro->drr_bonuslen ||
|
/*
|
||||||
|
* We should have received a DRR_OBJECT_RANGE record
|
||||||
|
* containing this block and stored it in rwa.
|
||||||
|
*/
|
||||||
|
if (drro->drr_object < rwa->or_firstobj ||
|
||||||
|
drro->drr_object >= rwa->or_firstobj + rwa->or_numslots ||
|
||||||
|
drro->drr_raw_bonuslen < drro->drr_bonuslen ||
|
||||||
drro->drr_indblkshift > SPA_MAXBLOCKSHIFT ||
|
drro->drr_indblkshift > SPA_MAXBLOCKSHIFT ||
|
||||||
drro->drr_nlevels > DN_MAX_LEVELS ||
|
drro->drr_nlevels > DN_MAX_LEVELS ||
|
||||||
drro->drr_nblkptr > DN_MAX_NBLKPTR ||
|
drro->drr_nblkptr > DN_MAX_NBLKPTR ||
|
||||||
|
@ -2611,8 +2625,27 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
|
||||||
return (SET_ERROR(EINVAL));
|
return (SET_ERROR(EINVAL));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rwa->raw)
|
if (rwa->raw) {
|
||||||
VERIFY0(dmu_object_dirty_raw(rwa->os, drro->drr_object, tx));
|
/*
|
||||||
|
* Convert the buffer associated with this range of dnodes
|
||||||
|
* to a raw buffer. This ensures that it will be written out
|
||||||
|
* as a raw buffer when we fill in the dnode object. Since we
|
||||||
|
* are committing this tx now, it is possible for the dnode
|
||||||
|
* block to end up on-disk with the incorrect MAC. Despite
|
||||||
|
* this, the dataset is marked as inconsistent so no other
|
||||||
|
* code paths (apart from scrubs) will attempt to read this
|
||||||
|
* data. Scrubs will not be effected by this either since
|
||||||
|
* scrubs only read raw data and do not attempt to check
|
||||||
|
* the MAC.
|
||||||
|
*/
|
||||||
|
err = dmu_convert_mdn_block_to_raw(rwa->os, rwa->or_firstobj,
|
||||||
|
rwa->or_byteorder, rwa->or_salt, rwa->or_iv, rwa->or_mac,
|
||||||
|
tx);
|
||||||
|
if (err != 0) {
|
||||||
|
dmu_tx_commit(tx);
|
||||||
|
return (SET_ERROR(EINVAL));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
dmu_object_set_checksum(rwa->os, drro->drr_object,
|
dmu_object_set_checksum(rwa->os, drro->drr_object,
|
||||||
drro->drr_checksumtype, tx);
|
drro->drr_checksumtype, tx);
|
||||||
|
@ -2984,12 +3017,6 @@ static int
|
||||||
receive_object_range(struct receive_writer_arg *rwa,
|
receive_object_range(struct receive_writer_arg *rwa,
|
||||||
struct drr_object_range *drror)
|
struct drr_object_range *drror)
|
||||||
{
|
{
|
||||||
int ret;
|
|
||||||
dmu_tx_t *tx;
|
|
||||||
dnode_t *mdn = NULL;
|
|
||||||
dmu_buf_t *db = NULL;
|
|
||||||
uint64_t offset;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* By default, we assume this block is in our native format
|
* By default, we assume this block is in our native format
|
||||||
* (ZFS_HOST_BYTEORDER). We then take into account whether
|
* (ZFS_HOST_BYTEORDER). We then take into account whether
|
||||||
|
@ -3019,38 +3046,18 @@ receive_object_range(struct receive_writer_arg *rwa,
|
||||||
if (drror->drr_firstobj > rwa->max_object)
|
if (drror->drr_firstobj > rwa->max_object)
|
||||||
rwa->max_object = drror->drr_firstobj;
|
rwa->max_object = drror->drr_firstobj;
|
||||||
|
|
||||||
offset = drror->drr_firstobj * sizeof (dnode_phys_t);
|
|
||||||
mdn = DMU_META_DNODE(rwa->os);
|
|
||||||
|
|
||||||
tx = dmu_tx_create(rwa->os);
|
|
||||||
ret = dmu_tx_assign(tx, TXG_WAIT);
|
|
||||||
if (ret != 0) {
|
|
||||||
dmu_tx_abort(tx);
|
|
||||||
return (ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = dmu_buf_hold_by_dnode(mdn, offset, FTAG, &db,
|
|
||||||
DMU_READ_PREFETCH | DMU_READ_NO_DECRYPT);
|
|
||||||
if (ret != 0) {
|
|
||||||
dmu_tx_commit(tx);
|
|
||||||
return (ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Convert the buffer associated with this range of dnodes to a
|
* The DRR_OBJECT_RANGE handling must be deferred to receive_object()
|
||||||
* raw buffer. This ensures that it will be written out as a raw
|
* so that the encryption parameters are set with each object that is
|
||||||
* buffer when we fill in the dnode objects in future records.
|
* written into that block.
|
||||||
* Since we are commiting this tx now, it is technically possible
|
|
||||||
* for the dnode block to end up on-disk with the incorrect MAC.
|
|
||||||
* Despite this, the dataset is marked as inconsistent so no other
|
|
||||||
* code paths (apart from scrubs) will attempt to read this data.
|
|
||||||
* Scrubs will not be effected by this either since scrubs only
|
|
||||||
* read raw data and do not attempt to check the MAC.
|
|
||||||
*/
|
*/
|
||||||
dmu_convert_to_raw(db, byteorder, drror->drr_salt, drror->drr_iv,
|
rwa->or_firstobj = drror->drr_firstobj;
|
||||||
drror->drr_mac, tx);
|
rwa->or_numslots = drror->drr_numslots;
|
||||||
dmu_buf_rele(db, FTAG);
|
bcopy(drror->drr_salt, rwa->or_salt, ZIO_DATA_SALT_LEN);
|
||||||
dmu_tx_commit(tx);
|
bcopy(drror->drr_iv, rwa->or_iv, ZIO_DATA_IV_LEN);
|
||||||
|
bcopy(drror->drr_mac, rwa->or_mac, ZIO_DATA_MAC_LEN);
|
||||||
|
rwa->or_byteorder = byteorder;
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue