Fixes for #6639
Several issues were uncovered by running stress tests with zfs encryption and raw sends in particular. The issues and their associated fixes are as follows: * arc_read_done() has the ability to chain several requests for the same block of data via the arc_callback_t struct. In these cases, the ARC would only use the first request's dsobj from the bookmark to decrypt the data. This is problematic because the first request might be a prefetch zio which is able to handle the key not being loaded, while the second might use a different key that it is sure will work. The fix here is to pass the dsobj with each individual arc_callback_t so that each request can attempt to decrypt the data separately. * DRR_FREE and DRR_FREEOBJECT records in a send file were not having their transactions properly tagged as raw during raw sends, which caused a panic when the dbuf code attempted to decrypt these blocks. * traverse_prefetch_metadata() did not properly set ZIO_FLAG_SPECULATIVE when issuing prefetch IOs. * Added a few asserts and code cleanups to ensure these issues are more detectable in the future. Signed-off-by: Tom Caputi <tcaputi@datto.com>
This commit is contained in:
parent
4807c0badb
commit
440a3eb939
|
@ -1958,7 +1958,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
|
||||||
dmu_write(os, lr->lr_foid, offset, length, data, tx);
|
dmu_write(os, lr->lr_foid, offset, length, data, tx);
|
||||||
} else {
|
} else {
|
||||||
bcopy(data, abuf->b_data, length);
|
bcopy(data, abuf->b_data, length);
|
||||||
dmu_assign_arcbuf(db, offset, abuf, tx);
|
dmu_assign_arcbuf_by_dbuf(db, offset, abuf, tx);
|
||||||
}
|
}
|
||||||
|
|
||||||
(void) ztest_log_write(zd, tx, lr);
|
(void) ztest_log_write(zd, tx, lr);
|
||||||
|
@ -4346,7 +4346,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
|
||||||
* bigobj, at the tail of the nth chunk
|
* bigobj, at the tail of the nth chunk
|
||||||
*
|
*
|
||||||
* The chunk size is set equal to bigobj block size so that
|
* The chunk size is set equal to bigobj block size so that
|
||||||
* dmu_assign_arcbuf() can be tested for object updates.
|
* dmu_assign_arcbuf_by_dbuf() can be tested for object updates.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -4408,7 +4408,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
|
||||||
/*
|
/*
|
||||||
* In iteration 5 (i == 5) use arcbufs
|
* In iteration 5 (i == 5) use arcbufs
|
||||||
* that don't match bigobj blksz to test
|
* that don't match bigobj blksz to test
|
||||||
* dmu_assign_arcbuf() when it can't directly
|
* dmu_assign_arcbuf_by_dbuf() when it can't directly
|
||||||
* assign an arcbuf to a dbuf.
|
* assign an arcbuf to a dbuf.
|
||||||
*/
|
*/
|
||||||
for (j = 0; j < s; j++) {
|
for (j = 0; j < s; j++) {
|
||||||
|
@ -4454,8 +4454,8 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* 50% of the time don't read objects in the 1st iteration to
|
* 50% of the time don't read objects in the 1st iteration to
|
||||||
* test dmu_assign_arcbuf() for the case when there're no
|
* test dmu_assign_arcbuf_by_dbuf() for the case when there are
|
||||||
* existing dbufs for the specified offsets.
|
* no existing dbufs for the specified offsets.
|
||||||
*/
|
*/
|
||||||
if (i != 0 || ztest_random(2) != 0) {
|
if (i != 0 || ztest_random(2) != 0) {
|
||||||
error = dmu_read(os, packobj, packoff,
|
error = dmu_read(os, packobj, packoff,
|
||||||
|
@ -4500,12 +4500,12 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
|
||||||
FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0);
|
FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0);
|
||||||
}
|
}
|
||||||
if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
|
if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
|
||||||
dmu_assign_arcbuf(bonus_db, off,
|
dmu_assign_arcbuf_by_dbuf(bonus_db, off,
|
||||||
bigbuf_arcbufs[j], tx);
|
bigbuf_arcbufs[j], tx);
|
||||||
} else {
|
} else {
|
||||||
dmu_assign_arcbuf(bonus_db, off,
|
dmu_assign_arcbuf_by_dbuf(bonus_db, off,
|
||||||
bigbuf_arcbufs[2 * j], tx);
|
bigbuf_arcbufs[2 * j], tx);
|
||||||
dmu_assign_arcbuf(bonus_db,
|
dmu_assign_arcbuf_by_dbuf(bonus_db,
|
||||||
off + chunksize / 2,
|
off + chunksize / 2,
|
||||||
bigbuf_arcbufs[2 * j + 1], tx);
|
bigbuf_arcbufs[2 * j + 1], tx);
|
||||||
}
|
}
|
||||||
|
|
|
@ -96,6 +96,7 @@ struct arc_callback {
|
||||||
boolean_t acb_encrypted;
|
boolean_t acb_encrypted;
|
||||||
boolean_t acb_compressed;
|
boolean_t acb_compressed;
|
||||||
boolean_t acb_noauth;
|
boolean_t acb_noauth;
|
||||||
|
uint64_t acb_dsobj;
|
||||||
zio_t *acb_zio_dummy;
|
zio_t *acb_zio_dummy;
|
||||||
arc_callback_t *acb_next;
|
arc_callback_t *acb_next;
|
||||||
};
|
};
|
||||||
|
|
|
@ -759,10 +759,13 @@ void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
|
||||||
* -1, the range from offset to end-of-file is freed.
|
* -1, the range from offset to end-of-file is freed.
|
||||||
*/
|
*/
|
||||||
int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
|
int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
uint64_t size, dmu_tx_t *tx);
|
uint64_t size, dmu_tx_t *tx);
|
||||||
int dmu_free_long_range(objset_t *os, uint64_t object, uint64_t offset,
|
int dmu_free_long_range(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
uint64_t size);
|
uint64_t size);
|
||||||
|
int dmu_free_long_range_raw(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
|
uint64_t size);
|
||||||
int dmu_free_long_object(objset_t *os, uint64_t object);
|
int dmu_free_long_object(objset_t *os, uint64_t object);
|
||||||
|
int dmu_free_long_object_raw(objset_t *os, uint64_t object);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Convenience functions.
|
* Convenience functions.
|
||||||
|
@ -797,10 +800,11 @@ int dmu_write_uio_dnode(dnode_t *dn, struct uio *uio, uint64_t size,
|
||||||
#endif
|
#endif
|
||||||
struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
|
struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
|
||||||
void dmu_return_arcbuf(struct arc_buf *buf);
|
void dmu_return_arcbuf(struct arc_buf *buf);
|
||||||
void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
|
void dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset,
|
||||||
dmu_tx_t *tx);
|
struct arc_buf *buf, dmu_tx_t *tx);
|
||||||
void dmu_assign_arcbuf_impl(dmu_buf_t *handle, struct arc_buf *buf,
|
void dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset,
|
||||||
dmu_tx_t *tx);
|
struct arc_buf *buf, dmu_tx_t *tx);
|
||||||
|
#define dmu_assign_arcbuf dmu_assign_arcbuf_by_dbuf
|
||||||
void dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder,
|
void dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder,
|
||||||
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx);
|
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx);
|
||||||
void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
|
void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
|
|
|
@ -3155,13 +3155,14 @@ arc_buf_destroy_impl(arc_buf_t *buf)
|
||||||
hdr->b_crypt_hdr.b_ebufcnt -= 1;
|
hdr->b_crypt_hdr.b_ebufcnt -= 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* if we have no more encrypted buffers and we've already
|
* If we have no more encrypted buffers and we've already
|
||||||
* gotten a copy of the decrypted data we can free b_rabd to
|
* gotten a copy of the decrypted data we can free b_rabd to
|
||||||
* save some space.
|
* save some space.
|
||||||
*/
|
*/
|
||||||
if (hdr->b_crypt_hdr.b_ebufcnt == 0 && HDR_HAS_RABD(hdr) &&
|
if (hdr->b_crypt_hdr.b_ebufcnt == 0 && HDR_HAS_RABD(hdr) &&
|
||||||
hdr->b_l1hdr.b_pabd != NULL)
|
hdr->b_l1hdr.b_pabd != NULL && !HDR_IO_IN_PROGRESS(hdr)) {
|
||||||
arc_hdr_free_abd(hdr, B_TRUE);
|
arc_hdr_free_abd(hdr, B_TRUE);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
arc_buf_t *lastbuf = arc_buf_remove(hdr, buf);
|
arc_buf_t *lastbuf = arc_buf_remove(hdr, buf);
|
||||||
|
@ -3716,9 +3717,8 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
|
||||||
arc_hdr_free_abd(hdr, B_FALSE);
|
arc_hdr_free_abd(hdr, B_FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (HDR_HAS_RABD(hdr)) {
|
if (HDR_HAS_RABD(hdr))
|
||||||
arc_hdr_free_abd(hdr, B_TRUE);
|
arc_hdr_free_abd(hdr, B_TRUE);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT3P(hdr->b_hash_next, ==, NULL);
|
ASSERT3P(hdr->b_hash_next, ==, NULL);
|
||||||
|
@ -5746,16 +5746,15 @@ arc_read_done(zio_t *zio)
|
||||||
callback_cnt++;
|
callback_cnt++;
|
||||||
|
|
||||||
int error = arc_buf_alloc_impl(hdr, zio->io_spa,
|
int error = arc_buf_alloc_impl(hdr, zio->io_spa,
|
||||||
zio->io_bookmark.zb_objset, acb->acb_private,
|
acb->acb_dsobj, acb->acb_private, acb->acb_encrypted,
|
||||||
acb->acb_encrypted, acb->acb_compressed, acb->acb_noauth,
|
acb->acb_compressed, acb->acb_noauth, no_zio_error,
|
||||||
no_zio_error, &acb->acb_buf);
|
&acb->acb_buf);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* assert non-speculative zios didn't fail because an
|
* Assert non-speculative zios didn't fail because an
|
||||||
* encryption key wasn't loaded
|
* encryption key wasn't loaded
|
||||||
*/
|
*/
|
||||||
ASSERT((zio->io_flags & ZIO_FLAG_SPECULATIVE) ||
|
ASSERT((zio->io_flags & ZIO_FLAG_SPECULATIVE) || error == 0);
|
||||||
error == 0 || error != ENOENT);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we failed to decrypt, report an error now (as the zio
|
* If we failed to decrypt, report an error now (as the zio
|
||||||
|
@ -5778,10 +5777,8 @@ arc_read_done(zio_t *zio)
|
||||||
}
|
}
|
||||||
hdr->b_l1hdr.b_acb = NULL;
|
hdr->b_l1hdr.b_acb = NULL;
|
||||||
arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
|
arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
|
||||||
if (callback_cnt == 0) {
|
if (callback_cnt == 0)
|
||||||
ASSERT(HDR_PREFETCH(hdr) || HDR_HAS_RABD(hdr));
|
|
||||||
ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr));
|
ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr));
|
||||||
}
|
|
||||||
|
|
||||||
ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt) ||
|
ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt) ||
|
||||||
callback_list != NULL);
|
callback_list != NULL);
|
||||||
|
@ -5943,6 +5940,9 @@ top:
|
||||||
acb->acb_done = done;
|
acb->acb_done = done;
|
||||||
acb->acb_private = private;
|
acb->acb_private = private;
|
||||||
acb->acb_compressed = compressed_read;
|
acb->acb_compressed = compressed_read;
|
||||||
|
acb->acb_encrypted = encrypted_read;
|
||||||
|
acb->acb_noauth = noauth_read;
|
||||||
|
acb->acb_dsobj = zb->zb_objset;
|
||||||
if (pio != NULL)
|
if (pio != NULL)
|
||||||
acb->acb_zio_dummy = zio_null(pio,
|
acb->acb_zio_dummy = zio_null(pio,
|
||||||
spa, NULL, NULL, NULL, zio_flags);
|
spa, NULL, NULL, NULL, zio_flags);
|
||||||
|
@ -5981,9 +5981,7 @@ top:
|
||||||
rc = arc_buf_alloc_impl(hdr, spa, zb->zb_objset,
|
rc = arc_buf_alloc_impl(hdr, spa, zb->zb_objset,
|
||||||
private, encrypted_read, compressed_read,
|
private, encrypted_read, compressed_read,
|
||||||
noauth_read, B_TRUE, &buf);
|
noauth_read, B_TRUE, &buf);
|
||||||
|
ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) || rc == 0);
|
||||||
ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) ||
|
|
||||||
rc == 0 || rc != ENOENT);
|
|
||||||
} else if (*arc_flags & ARC_FLAG_PREFETCH &&
|
} else if (*arc_flags & ARC_FLAG_PREFETCH &&
|
||||||
refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) {
|
refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) {
|
||||||
arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH);
|
arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH);
|
||||||
|
@ -6008,7 +6006,7 @@ top:
|
||||||
uint64_t addr = 0;
|
uint64_t addr = 0;
|
||||||
boolean_t devw = B_FALSE;
|
boolean_t devw = B_FALSE;
|
||||||
uint64_t size;
|
uint64_t size;
|
||||||
void *hdr_abd;
|
abd_t *hdr_abd;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Gracefully handle a damaged logical block size as a
|
* Gracefully handle a damaged logical block size as a
|
||||||
|
@ -6131,6 +6129,7 @@ top:
|
||||||
acb->acb_compressed = compressed_read;
|
acb->acb_compressed = compressed_read;
|
||||||
acb->acb_encrypted = encrypted_read;
|
acb->acb_encrypted = encrypted_read;
|
||||||
acb->acb_noauth = noauth_read;
|
acb->acb_noauth = noauth_read;
|
||||||
|
acb->acb_dsobj = zb->zb_objset;
|
||||||
|
|
||||||
ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
|
ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
|
||||||
hdr->b_l1hdr.b_acb = acb;
|
hdr->b_l1hdr.b_acb = acb;
|
||||||
|
@ -7961,9 +7960,15 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb)
|
||||||
*/
|
*/
|
||||||
ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG);
|
ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG);
|
||||||
ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
|
ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
|
||||||
|
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
|
||||||
|
|
||||||
/* If the data was encrypted, decrypt it now */
|
/*
|
||||||
if (HDR_ENCRYPTED(hdr)) {
|
* If the data was encrypted, decrypt it now. Note that
|
||||||
|
* we must check the bp here and not the hdr, since the
|
||||||
|
* hdr does not have its encryption parameters updated
|
||||||
|
* until arc_read_done().
|
||||||
|
*/
|
||||||
|
if (BP_IS_ENCRYPTED(bp)) {
|
||||||
abd_t *eabd = arc_get_data_abd(hdr,
|
abd_t *eabd = arc_get_data_abd(hdr,
|
||||||
arc_hdr_size(hdr), hdr);
|
arc_hdr_size(hdr), hdr);
|
||||||
|
|
||||||
|
@ -8089,7 +8094,16 @@ l2arc_read_done(zio_t *zio)
|
||||||
*/
|
*/
|
||||||
abd_free(cb->l2rcb_abd);
|
abd_free(cb->l2rcb_abd);
|
||||||
zio->io_size = zio->io_orig_size = arc_hdr_size(hdr);
|
zio->io_size = zio->io_orig_size = arc_hdr_size(hdr);
|
||||||
zio->io_abd = zio->io_orig_abd = hdr->b_l1hdr.b_pabd;
|
|
||||||
|
if (BP_IS_ENCRYPTED(&cb->l2rcb_bp) &&
|
||||||
|
(cb->l2rcb_flags & ZIO_FLAG_RAW_ENCRYPT)) {
|
||||||
|
ASSERT(HDR_HAS_RABD(hdr));
|
||||||
|
zio->io_abd = zio->io_orig_abd =
|
||||||
|
hdr->b_crypt_hdr.b_rabd;
|
||||||
|
} else {
|
||||||
|
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
|
||||||
|
zio->io_abd = zio->io_orig_abd = hdr->b_l1hdr.b_pabd;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT3P(zio->io_abd, !=, NULL);
|
ASSERT3P(zio->io_abd, !=, NULL);
|
||||||
|
|
|
@ -2153,6 +2153,13 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
|
||||||
|
|
||||||
if (db->db_state == DB_CACHED &&
|
if (db->db_state == DB_CACHED &&
|
||||||
refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) {
|
refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) {
|
||||||
|
/*
|
||||||
|
* In practice, we will never have a case where we have an
|
||||||
|
* encrypted arc buffer while additional holds exist on the
|
||||||
|
* dbuf. We don't handle this here so we simply assert that
|
||||||
|
* fact instead.
|
||||||
|
*/
|
||||||
|
ASSERT(!arc_is_encrypted(buf));
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
(void) dbuf_dirty(db, tx);
|
(void) dbuf_dirty(db, tx);
|
||||||
bcopy(buf->b_data, db->db.db_data, db->db.db_size);
|
bcopy(buf->b_data, db->db.db_data, db->db.db_size);
|
||||||
|
@ -2168,6 +2175,8 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
|
||||||
ASSERT(db->db_buf != NULL);
|
ASSERT(db->db_buf != NULL);
|
||||||
if (dr != NULL && dr->dr_txg == tx->tx_txg) {
|
if (dr != NULL && dr->dr_txg == tx->tx_txg) {
|
||||||
ASSERT(dr->dt.dl.dr_data == db->db_buf);
|
ASSERT(dr->dt.dl.dr_data == db->db_buf);
|
||||||
|
IMPLY(arc_is_encrypted(buf), dr->dt.dl.dr_raw);
|
||||||
|
|
||||||
if (!arc_released(db->db_buf)) {
|
if (!arc_released(db->db_buf)) {
|
||||||
ASSERT(dr->dt.dl.dr_override_state ==
|
ASSERT(dr->dt.dl.dr_override_state ==
|
||||||
DR_OVERRIDDEN);
|
DR_OVERRIDDEN);
|
||||||
|
|
103
module/zfs/dmu.c
103
module/zfs/dmu.c
|
@ -761,7 +761,7 @@ dmu_objset_zfs_unmounting(objset_t *os)
|
||||||
|
|
||||||
static int
|
static int
|
||||||
dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
|
dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
|
||||||
uint64_t length)
|
uint64_t length, boolean_t raw)
|
||||||
{
|
{
|
||||||
uint64_t object_size;
|
uint64_t object_size;
|
||||||
int err;
|
int err;
|
||||||
|
@ -844,6 +844,17 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
|
||||||
uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len,
|
uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len,
|
||||||
uint64_t, dmu_tx_get_txg(tx));
|
uint64_t, dmu_tx_get_txg(tx));
|
||||||
dnode_free_range(dn, chunk_begin, chunk_len, tx);
|
dnode_free_range(dn, chunk_begin, chunk_len, tx);
|
||||||
|
|
||||||
|
/* if this is a raw free, mark the dirty record as such */
|
||||||
|
if (raw) {
|
||||||
|
dbuf_dirty_record_t *dr = dn->dn_dbuf->db_last_dirty;
|
||||||
|
|
||||||
|
while (dr != NULL && dr->dr_txg > tx->tx_txg)
|
||||||
|
dr = dr->dr_next;
|
||||||
|
if (dr != NULL && dr->dr_txg == tx->tx_txg)
|
||||||
|
dr->dt.dl.dr_raw = B_TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
|
|
||||||
length -= chunk_len;
|
length -= chunk_len;
|
||||||
|
@ -861,7 +872,7 @@ dmu_free_long_range(objset_t *os, uint64_t object,
|
||||||
err = dnode_hold(os, object, FTAG, &dn);
|
err = dnode_hold(os, object, FTAG, &dn);
|
||||||
if (err != 0)
|
if (err != 0)
|
||||||
return (err);
|
return (err);
|
||||||
err = dmu_free_long_range_impl(os, dn, offset, length);
|
err = dmu_free_long_range_impl(os, dn, offset, length, B_FALSE);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* It is important to zero out the maxblkid when freeing the entire
|
* It is important to zero out the maxblkid when freeing the entire
|
||||||
|
@ -876,8 +887,37 @@ dmu_free_long_range(objset_t *os, uint64_t object,
|
||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This function is equivalent to dmu_free_long_range(), but also
|
||||||
|
* marks the new dirty record as a raw write.
|
||||||
|
*/
|
||||||
int
|
int
|
||||||
dmu_free_long_object(objset_t *os, uint64_t object)
|
dmu_free_long_range_raw(objset_t *os, uint64_t object,
|
||||||
|
uint64_t offset, uint64_t length)
|
||||||
|
{
|
||||||
|
dnode_t *dn;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
err = dnode_hold(os, object, FTAG, &dn);
|
||||||
|
if (err != 0)
|
||||||
|
return (err);
|
||||||
|
err = dmu_free_long_range_impl(os, dn, offset, length, B_TRUE);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It is important to zero out the maxblkid when freeing the entire
|
||||||
|
* file, so that (a) subsequent calls to dmu_free_long_range_impl()
|
||||||
|
* will take the fast path, and (b) dnode_reallocate() can verify
|
||||||
|
* that the entire file has been freed.
|
||||||
|
*/
|
||||||
|
if (err == 0 && offset == 0 && length == DMU_OBJECT_END)
|
||||||
|
dn->dn_maxblkid = 0;
|
||||||
|
|
||||||
|
dnode_rele(dn, FTAG);
|
||||||
|
return (err);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
dmu_free_long_object_impl(objset_t *os, uint64_t object, boolean_t raw)
|
||||||
{
|
{
|
||||||
dmu_tx_t *tx;
|
dmu_tx_t *tx;
|
||||||
int err;
|
int err;
|
||||||
|
@ -893,6 +933,9 @@ dmu_free_long_object(objset_t *os, uint64_t object)
|
||||||
err = dmu_tx_assign(tx, TXG_WAIT);
|
err = dmu_tx_assign(tx, TXG_WAIT);
|
||||||
if (err == 0) {
|
if (err == 0) {
|
||||||
err = dmu_object_free(os, object, tx);
|
err = dmu_object_free(os, object, tx);
|
||||||
|
if (err == 0 && raw)
|
||||||
|
VERIFY0(dmu_object_dirty_raw(os, object, tx));
|
||||||
|
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
} else {
|
} else {
|
||||||
dmu_tx_abort(tx);
|
dmu_tx_abort(tx);
|
||||||
|
@ -901,6 +944,19 @@ dmu_free_long_object(objset_t *os, uint64_t object)
|
||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
dmu_free_long_object(objset_t *os, uint64_t object)
|
||||||
|
{
|
||||||
|
return (dmu_free_long_object_impl(os, object, B_FALSE));
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
dmu_free_long_object_raw(objset_t *os, uint64_t object)
|
||||||
|
{
|
||||||
|
return (dmu_free_long_object_impl(os, object, B_TRUE));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
|
dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
uint64_t size, dmu_tx_t *tx)
|
uint64_t size, dmu_tx_t *tx)
|
||||||
|
@ -1486,13 +1542,6 @@ dmu_return_arcbuf(arc_buf_t *buf)
|
||||||
arc_buf_destroy(buf, FTAG);
|
arc_buf_destroy(buf, FTAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
dmu_assign_arcbuf_impl(dmu_buf_t *handle, arc_buf_t *buf, dmu_tx_t *tx)
|
|
||||||
{
|
|
||||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle;
|
|
||||||
dbuf_assign_arcbuf(db, buf, tx);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder, const uint8_t *salt,
|
dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder, const uint8_t *salt,
|
||||||
const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx)
|
const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx)
|
||||||
|
@ -1569,22 +1618,19 @@ dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
* dmu_write().
|
* dmu_write().
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
|
dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf,
|
||||||
dmu_tx_t *tx)
|
dmu_tx_t *tx)
|
||||||
{
|
{
|
||||||
dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle;
|
|
||||||
dnode_t *dn;
|
|
||||||
dmu_buf_impl_t *db;
|
dmu_buf_impl_t *db;
|
||||||
|
objset_t *os = dn->dn_objset;
|
||||||
|
uint64_t object = dn->dn_object;
|
||||||
uint32_t blksz = (uint32_t)arc_buf_lsize(buf);
|
uint32_t blksz = (uint32_t)arc_buf_lsize(buf);
|
||||||
uint64_t blkid;
|
uint64_t blkid;
|
||||||
|
|
||||||
DB_DNODE_ENTER(dbuf);
|
|
||||||
dn = DB_DNODE(dbuf);
|
|
||||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||||
blkid = dbuf_whichblock(dn, 0, offset);
|
blkid = dbuf_whichblock(dn, 0, offset);
|
||||||
VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL);
|
VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL);
|
||||||
rw_exit(&dn->dn_struct_rwlock);
|
rw_exit(&dn->dn_struct_rwlock);
|
||||||
DB_DNODE_EXIT(dbuf);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We can only assign if the offset is aligned, the arc buf is the
|
* We can only assign if the offset is aligned, the arc buf is the
|
||||||
|
@ -1594,19 +1640,10 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
|
||||||
dbuf_assign_arcbuf(db, buf, tx);
|
dbuf_assign_arcbuf(db, buf, tx);
|
||||||
dbuf_rele(db, FTAG);
|
dbuf_rele(db, FTAG);
|
||||||
} else {
|
} else {
|
||||||
objset_t *os;
|
|
||||||
uint64_t object;
|
|
||||||
|
|
||||||
/* compressed bufs must always be assignable to their dbuf */
|
/* compressed bufs must always be assignable to their dbuf */
|
||||||
ASSERT3U(arc_get_compression(buf), ==, ZIO_COMPRESS_OFF);
|
ASSERT3U(arc_get_compression(buf), ==, ZIO_COMPRESS_OFF);
|
||||||
ASSERT(!(buf->b_flags & ARC_BUF_FLAG_COMPRESSED));
|
ASSERT(!(buf->b_flags & ARC_BUF_FLAG_COMPRESSED));
|
||||||
|
|
||||||
DB_DNODE_ENTER(dbuf);
|
|
||||||
dn = DB_DNODE(dbuf);
|
|
||||||
os = dn->dn_objset;
|
|
||||||
object = dn->dn_object;
|
|
||||||
DB_DNODE_EXIT(dbuf);
|
|
||||||
|
|
||||||
dbuf_rele(db, FTAG);
|
dbuf_rele(db, FTAG);
|
||||||
dmu_write(os, object, offset, blksz, buf->b_data, tx);
|
dmu_write(os, object, offset, blksz, buf->b_data, tx);
|
||||||
dmu_return_arcbuf(buf);
|
dmu_return_arcbuf(buf);
|
||||||
|
@ -1614,6 +1651,17 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
|
||||||
|
dmu_tx_t *tx)
|
||||||
|
{
|
||||||
|
dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle;
|
||||||
|
|
||||||
|
DB_DNODE_ENTER(dbuf);
|
||||||
|
dmu_assign_arcbuf_by_dnode(DB_DNODE(dbuf), offset, buf, tx);
|
||||||
|
DB_DNODE_EXIT(dbuf);
|
||||||
|
}
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
dbuf_dirty_record_t *dsa_dr;
|
dbuf_dirty_record_t *dsa_dr;
|
||||||
dmu_sync_cb_t *dsa_done;
|
dmu_sync_cb_t *dsa_done;
|
||||||
|
@ -2424,7 +2472,9 @@ EXPORT_SYMBOL(dmu_buf_rele_array);
|
||||||
EXPORT_SYMBOL(dmu_prefetch);
|
EXPORT_SYMBOL(dmu_prefetch);
|
||||||
EXPORT_SYMBOL(dmu_free_range);
|
EXPORT_SYMBOL(dmu_free_range);
|
||||||
EXPORT_SYMBOL(dmu_free_long_range);
|
EXPORT_SYMBOL(dmu_free_long_range);
|
||||||
|
EXPORT_SYMBOL(dmu_free_long_range_raw);
|
||||||
EXPORT_SYMBOL(dmu_free_long_object);
|
EXPORT_SYMBOL(dmu_free_long_object);
|
||||||
|
EXPORT_SYMBOL(dmu_free_long_object_raw);
|
||||||
EXPORT_SYMBOL(dmu_read);
|
EXPORT_SYMBOL(dmu_read);
|
||||||
EXPORT_SYMBOL(dmu_read_by_dnode);
|
EXPORT_SYMBOL(dmu_read_by_dnode);
|
||||||
EXPORT_SYMBOL(dmu_write);
|
EXPORT_SYMBOL(dmu_write);
|
||||||
|
@ -2443,7 +2493,8 @@ EXPORT_SYMBOL(dmu_write_policy);
|
||||||
EXPORT_SYMBOL(dmu_sync);
|
EXPORT_SYMBOL(dmu_sync);
|
||||||
EXPORT_SYMBOL(dmu_request_arcbuf);
|
EXPORT_SYMBOL(dmu_request_arcbuf);
|
||||||
EXPORT_SYMBOL(dmu_return_arcbuf);
|
EXPORT_SYMBOL(dmu_return_arcbuf);
|
||||||
EXPORT_SYMBOL(dmu_assign_arcbuf);
|
EXPORT_SYMBOL(dmu_assign_arcbuf_by_dnode);
|
||||||
|
EXPORT_SYMBOL(dmu_assign_arcbuf_by_dbuf);
|
||||||
EXPORT_SYMBOL(dmu_buf_hold);
|
EXPORT_SYMBOL(dmu_buf_hold);
|
||||||
EXPORT_SYMBOL(dmu_ot);
|
EXPORT_SYMBOL(dmu_ot);
|
||||||
|
|
||||||
|
|
|
@ -2592,7 +2592,11 @@ receive_freeobjects(struct receive_writer_arg *rwa,
|
||||||
else if (err != 0)
|
else if (err != 0)
|
||||||
return (err);
|
return (err);
|
||||||
|
|
||||||
err = dmu_free_long_object(rwa->os, obj);
|
if (rwa->raw)
|
||||||
|
err = dmu_free_long_object_raw(rwa->os, obj);
|
||||||
|
else
|
||||||
|
err = dmu_free_long_object(rwa->os, obj);
|
||||||
|
|
||||||
if (err != 0)
|
if (err != 0)
|
||||||
return (err);
|
return (err);
|
||||||
|
|
||||||
|
@ -2608,9 +2612,9 @@ noinline static int
|
||||||
receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
|
receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
|
||||||
arc_buf_t *abuf)
|
arc_buf_t *abuf)
|
||||||
{
|
{
|
||||||
dmu_tx_t *tx;
|
|
||||||
dmu_buf_t *bonus;
|
|
||||||
int err;
|
int err;
|
||||||
|
dmu_tx_t *tx;
|
||||||
|
dnode_t *dn;
|
||||||
|
|
||||||
if (drrw->drr_offset + drrw->drr_logical_size < drrw->drr_offset ||
|
if (drrw->drr_offset + drrw->drr_logical_size < drrw->drr_offset ||
|
||||||
!DMU_OT_IS_VALID(drrw->drr_type))
|
!DMU_OT_IS_VALID(drrw->drr_type))
|
||||||
|
@ -2635,7 +2639,6 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
|
||||||
return (SET_ERROR(EINVAL));
|
return (SET_ERROR(EINVAL));
|
||||||
|
|
||||||
tx = dmu_tx_create(rwa->os);
|
tx = dmu_tx_create(rwa->os);
|
||||||
|
|
||||||
dmu_tx_hold_write(tx, drrw->drr_object,
|
dmu_tx_hold_write(tx, drrw->drr_object,
|
||||||
drrw->drr_offset, drrw->drr_logical_size);
|
drrw->drr_offset, drrw->drr_logical_size);
|
||||||
err = dmu_tx_assign(tx, TXG_WAIT);
|
err = dmu_tx_assign(tx, TXG_WAIT);
|
||||||
|
@ -2655,10 +2658,9 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
|
||||||
DRR_WRITE_PAYLOAD_SIZE(drrw));
|
DRR_WRITE_PAYLOAD_SIZE(drrw));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* use the bonus buf to look up the dnode in dmu_assign_arcbuf */
|
VERIFY0(dnode_hold(rwa->os, drrw->drr_object, FTAG, &dn));
|
||||||
if (dmu_bonus_hold(rwa->os, drrw->drr_object, FTAG, &bonus) != 0)
|
dmu_assign_arcbuf_by_dnode(dn, drrw->drr_offset, abuf, tx);
|
||||||
return (SET_ERROR(EINVAL));
|
dnode_rele(dn, FTAG);
|
||||||
dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note: If the receive fails, we want the resume stream to start
|
* Note: If the receive fails, we want the resume stream to start
|
||||||
|
@ -2668,7 +2670,6 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
|
||||||
*/
|
*/
|
||||||
save_resume_state(rwa, drrw->drr_object, drrw->drr_offset, tx);
|
save_resume_state(rwa, drrw->drr_object, drrw->drr_offset, tx);
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
dmu_buf_rele(bonus, FTAG);
|
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
@ -2767,6 +2768,8 @@ receive_write_embedded(struct receive_writer_arg *rwa,
|
||||||
return (SET_ERROR(EINVAL));
|
return (SET_ERROR(EINVAL));
|
||||||
if (drrwe->drr_compression >= ZIO_COMPRESS_FUNCTIONS)
|
if (drrwe->drr_compression >= ZIO_COMPRESS_FUNCTIONS)
|
||||||
return (SET_ERROR(EINVAL));
|
return (SET_ERROR(EINVAL));
|
||||||
|
if (rwa->raw)
|
||||||
|
return (SET_ERROR(EINVAL));
|
||||||
|
|
||||||
if (drrwe->drr_object > rwa->max_object)
|
if (drrwe->drr_object > rwa->max_object)
|
||||||
rwa->max_object = drrwe->drr_object;
|
rwa->max_object = drrwe->drr_object;
|
||||||
|
@ -2841,7 +2844,7 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
|
||||||
if (db_spill->db_size < drrs->drr_length)
|
if (db_spill->db_size < drrs->drr_length)
|
||||||
VERIFY(0 == dbuf_spill_set_blksz(db_spill,
|
VERIFY(0 == dbuf_spill_set_blksz(db_spill,
|
||||||
drrs->drr_length, tx));
|
drrs->drr_length, tx));
|
||||||
dmu_assign_arcbuf_impl(db_spill, abuf, tx);
|
dbuf_assign_arcbuf((dmu_buf_impl_t *)db_spill, abuf, tx);
|
||||||
|
|
||||||
dmu_buf_rele(db, FTAG);
|
dmu_buf_rele(db, FTAG);
|
||||||
dmu_buf_rele(db_spill, FTAG);
|
dmu_buf_rele(db_spill, FTAG);
|
||||||
|
@ -2866,8 +2869,13 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf)
|
||||||
if (drrf->drr_object > rwa->max_object)
|
if (drrf->drr_object > rwa->max_object)
|
||||||
rwa->max_object = drrf->drr_object;
|
rwa->max_object = drrf->drr_object;
|
||||||
|
|
||||||
err = dmu_free_long_range(rwa->os, drrf->drr_object,
|
if (rwa->raw) {
|
||||||
drrf->drr_offset, drrf->drr_length);
|
err = dmu_free_long_range_raw(rwa->os, drrf->drr_object,
|
||||||
|
drrf->drr_offset, drrf->drr_length);
|
||||||
|
} else {
|
||||||
|
err = dmu_free_long_range(rwa->os, drrf->drr_object,
|
||||||
|
drrf->drr_offset, drrf->drr_length);
|
||||||
|
}
|
||||||
|
|
||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
|
@ -181,7 +181,7 @@ traverse_prefetch_metadata(traverse_data_t *td,
|
||||||
const blkptr_t *bp, const zbookmark_phys_t *zb)
|
const blkptr_t *bp, const zbookmark_phys_t *zb)
|
||||||
{
|
{
|
||||||
arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
|
arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
|
||||||
int zio_flags = ZIO_FLAG_CANFAIL;
|
int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
|
||||||
|
|
||||||
if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA))
|
if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA))
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -841,8 +841,8 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
|
||||||
xuio_stat_wbuf_copied();
|
xuio_stat_wbuf_copied();
|
||||||
} else {
|
} else {
|
||||||
ASSERT(xuio || tx_bytes == max_blksz);
|
ASSERT(xuio || tx_bytes == max_blksz);
|
||||||
dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl),
|
dmu_assign_arcbuf_by_dbuf(
|
||||||
woff, abuf, tx);
|
sa_get_db(zp->z_sa_hdl), woff, abuf, tx);
|
||||||
}
|
}
|
||||||
ASSERT(tx_bytes <= uio->uio_resid);
|
ASSERT(tx_bytes <= uio->uio_resid);
|
||||||
uioskip(uio, tx_bytes);
|
uioskip(uio, tx_bytes);
|
||||||
|
|
Loading…
Reference in New Issue