Fix issues with raw receive_write_byref()

This patch fixes 2 issues with raw, deduplicated send streams. The
first is that datasets who had been completely received earlier in
the stream were not still marked as raw receives. This caused
problems when newly received datasets attempted to fetch raw data
from these datasets without this flag set.

The second problem was that the arc freeze checksum code was not
consistent about which locks needed to be held while performing
its asserts. The proper locking needed to run these asserts is
actually fairly nuanced, since the asserts touch the linked list
of buffers (requiring the header lock), the arc_state (requiring
the b_evict_lock), and the b_freeze_cksum (requiring the
b_freeze_lock). This seems like a large performance sacrifice and
a lot of unneeded complexity to verify that this relatively small
debug feature is working as intended, so this patch simply removes
these asserts instead.

Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #7701
This commit is contained in:
Tom Caputi 2018-08-20 14:03:56 -04:00 committed by Brian Behlendorf
parent c962fd6c4e
commit 149ce888bb
7 changed files with 144 additions and 43 deletions

View File

@ -1561,6 +1561,9 @@ arc_cksum_free(arc_buf_hdr_t *hdr)
static boolean_t static boolean_t
arc_hdr_has_uncompressed_buf(arc_buf_hdr_t *hdr) arc_hdr_has_uncompressed_buf(arc_buf_hdr_t *hdr)
{ {
ASSERT(hdr->b_l1hdr.b_state == arc_anon ||
MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
for (arc_buf_t *b = hdr->b_l1hdr.b_buf; b != NULL; b = b->b_next) { for (arc_buf_t *b = hdr->b_l1hdr.b_buf; b != NULL; b = b->b_next) {
if (!ARC_BUF_COMPRESSED(b)) { if (!ARC_BUF_COMPRESSED(b)) {
return (B_TRUE); return (B_TRUE);
@ -1584,15 +1587,13 @@ arc_cksum_verify(arc_buf_t *buf)
if (!(zfs_flags & ZFS_DEBUG_MODIFY)) if (!(zfs_flags & ZFS_DEBUG_MODIFY))
return; return;
if (ARC_BUF_COMPRESSED(buf)) { if (ARC_BUF_COMPRESSED(buf))
ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL ||
arc_hdr_has_uncompressed_buf(hdr));
return; return;
}
ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(HDR_HAS_L1HDR(hdr));
mutex_enter(&hdr->b_l1hdr.b_freeze_lock); mutex_enter(&hdr->b_l1hdr.b_freeze_lock);
if (hdr->b_l1hdr.b_freeze_cksum == NULL || HDR_IO_ERROR(hdr)) { if (hdr->b_l1hdr.b_freeze_cksum == NULL || HDR_IO_ERROR(hdr)) {
mutex_exit(&hdr->b_l1hdr.b_freeze_lock); mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
return; return;
@ -1650,11 +1651,7 @@ arc_cksum_compute(arc_buf_t *buf)
ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(HDR_HAS_L1HDR(hdr));
mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock); mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
if (hdr->b_l1hdr.b_freeze_cksum != NULL) { if (hdr->b_l1hdr.b_freeze_cksum != NULL || ARC_BUF_COMPRESSED(buf)) {
ASSERT(arc_hdr_has_uncompressed_buf(hdr));
mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
return;
} else if (ARC_BUF_COMPRESSED(buf)) {
mutex_exit(&hdr->b_l1hdr.b_freeze_lock); mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
return; return;
} }
@ -1746,14 +1743,10 @@ arc_buf_thaw(arc_buf_t *buf)
arc_cksum_verify(buf); arc_cksum_verify(buf);
/* /*
* Compressed buffers do not manipulate the b_freeze_cksum or * Compressed buffers do not manipulate the b_freeze_cksum.
* allocate b_thawed.
*/ */
if (ARC_BUF_COMPRESSED(buf)) { if (ARC_BUF_COMPRESSED(buf))
ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL ||
arc_hdr_has_uncompressed_buf(hdr));
return; return;
}
ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(HDR_HAS_L1HDR(hdr));
arc_cksum_free(hdr); arc_cksum_free(hdr);
@ -1763,26 +1756,14 @@ arc_buf_thaw(arc_buf_t *buf)
void void
arc_buf_freeze(arc_buf_t *buf) arc_buf_freeze(arc_buf_t *buf)
{ {
arc_buf_hdr_t *hdr = buf->b_hdr;
kmutex_t *hash_lock;
if (!(zfs_flags & ZFS_DEBUG_MODIFY)) if (!(zfs_flags & ZFS_DEBUG_MODIFY))
return; return;
if (ARC_BUF_COMPRESSED(buf)) { if (ARC_BUF_COMPRESSED(buf))
ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL ||
arc_hdr_has_uncompressed_buf(hdr));
return; return;
}
hash_lock = HDR_LOCK(hdr); ASSERT(HDR_HAS_L1HDR(buf->b_hdr));
mutex_enter(hash_lock);
ASSERT(HDR_HAS_L1HDR(hdr));
ASSERT(hdr->b_l1hdr.b_freeze_cksum != NULL ||
hdr->b_l1hdr.b_state == arc_anon);
arc_cksum_compute(buf); arc_cksum_compute(buf);
mutex_exit(hash_lock);
} }
/* /*
@ -1901,7 +1882,7 @@ arc_hdr_authenticate(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj)
void *tmpbuf = NULL; void *tmpbuf = NULL;
abd_t *abd = hdr->b_l1hdr.b_pabd; abd_t *abd = hdr->b_l1hdr.b_pabd;
ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr))); ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
ASSERT(HDR_AUTHENTICATED(hdr)); ASSERT(HDR_AUTHENTICATED(hdr));
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
@ -1971,7 +1952,7 @@ arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb)
boolean_t no_crypt = B_FALSE; boolean_t no_crypt = B_FALSE;
boolean_t bswap = (hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS); boolean_t bswap = (hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS);
ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr))); ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
ASSERT(HDR_ENCRYPTED(hdr)); ASSERT(HDR_ENCRYPTED(hdr));
arc_hdr_alloc_abd(hdr, B_FALSE); arc_hdr_alloc_abd(hdr, B_FALSE);
@ -2090,7 +2071,7 @@ arc_buf_untransform_in_place(arc_buf_t *buf, kmutex_t *hash_lock)
ASSERT(HDR_ENCRYPTED(hdr)); ASSERT(HDR_ENCRYPTED(hdr));
ASSERT3U(hdr->b_crypt_hdr.b_ot, ==, DMU_OT_DNODE); ASSERT3U(hdr->b_crypt_hdr.b_ot, ==, DMU_OT_DNODE);
ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr))); ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
zio_crypt_copy_dnode_bonus(hdr->b_l1hdr.b_pabd, buf->b_data, zio_crypt_copy_dnode_bonus(hdr->b_l1hdr.b_pabd, buf->b_data,

View File

@ -1267,7 +1267,6 @@ dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, uint32_t flags)
!DMU_OT_IS_ENCRYPTED(dn->dn_bonustype)))) !DMU_OT_IS_ENCRYPTED(dn->dn_bonustype))))
err = 0; err = 0;
DB_DNODE_EXIT(db); DB_DNODE_EXIT(db);
return (err); return (err);

View File

@ -1622,6 +1622,7 @@ dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
dmu_buf_t *dst_handle; dmu_buf_t *dst_handle;
dmu_buf_impl_t *dstdb; dmu_buf_impl_t *dstdb;
dmu_buf_impl_t *srcdb = (dmu_buf_impl_t *)handle; dmu_buf_impl_t *srcdb = (dmu_buf_impl_t *)handle;
dmu_object_type_t type;
arc_buf_t *abuf; arc_buf_t *abuf;
uint64_t datalen; uint64_t datalen;
boolean_t byteorder; boolean_t byteorder;
@ -1637,11 +1638,15 @@ dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
dstdb = (dmu_buf_impl_t *)dst_handle; dstdb = (dmu_buf_impl_t *)dst_handle;
datalen = arc_buf_size(srcdb->db_buf); datalen = arc_buf_size(srcdb->db_buf);
DB_DNODE_ENTER(dstdb);
type = DB_DNODE(dstdb)->dn_type;
DB_DNODE_EXIT(dstdb);
/* allocated an arc buffer that matches the type of srcdb->db_buf */ /* allocated an arc buffer that matches the type of srcdb->db_buf */
if (arc_is_encrypted(srcdb->db_buf)) { if (arc_is_encrypted(srcdb->db_buf)) {
arc_get_raw_params(srcdb->db_buf, &byteorder, salt, iv, mac); arc_get_raw_params(srcdb->db_buf, &byteorder, salt, iv, mac);
abuf = arc_loan_raw_buf(os->os_spa, dmu_objset_id(os), abuf = arc_loan_raw_buf(os->os_spa, dmu_objset_id(os),
byteorder, salt, iv, mac, DB_DNODE(dstdb)->dn_type, byteorder, salt, iv, mac, type,
datalen, arc_buf_lsize(srcdb->db_buf), datalen, arc_buf_lsize(srcdb->db_buf),
arc_get_compression(srcdb->db_buf)); arc_get_compression(srcdb->db_buf));
} else { } else {
@ -1649,7 +1654,7 @@ dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
ASSERT3U(arc_get_compression(srcdb->db_buf), ASSERT3U(arc_get_compression(srcdb->db_buf),
==, ZIO_COMPRESS_OFF); ==, ZIO_COMPRESS_OFF);
abuf = arc_loan_buf(os->os_spa, abuf = arc_loan_buf(os->os_spa,
DMU_OT_IS_METADATA(DB_DNODE(dstdb)->dn_type), datalen); DMU_OT_IS_METADATA(type), datalen);
} }
ASSERT3U(datalen, ==, arc_buf_size(abuf)); ASSERT3U(datalen, ==, arc_buf_size(abuf));

View File

@ -2343,9 +2343,14 @@ free_guid_map_onexit(void *arg)
guid_map_entry_t *gmep; guid_map_entry_t *gmep;
while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) {
dsl_dataset_long_rele(gmep->gme_ds, gmep); ds_hold_flags_t dsflags = DS_HOLD_FLAG_DECRYPT;
dsl_dataset_rele_flags(gmep->gme_ds,
(gmep->raw) ? 0 : DS_HOLD_FLAG_DECRYPT, gmep); if (gmep->raw) {
gmep->gme_ds->ds_objset->os_raw_receive = B_FALSE;
dsflags &= ~DS_HOLD_FLAG_DECRYPT;
}
dsl_dataset_disown(gmep->gme_ds, dsflags, gmep);
kmem_free(gmep, sizeof (guid_map_entry_t)); kmem_free(gmep, sizeof (guid_map_entry_t));
} }
avl_destroy(ca); avl_destroy(ca);
@ -4237,6 +4242,7 @@ add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj,
dsl_pool_t *dp; dsl_pool_t *dp;
dsl_dataset_t *snapds; dsl_dataset_t *snapds;
guid_map_entry_t *gmep; guid_map_entry_t *gmep;
objset_t *os;
ds_hold_flags_t dsflags = (raw) ? 0 : DS_HOLD_FLAG_DECRYPT; ds_hold_flags_t dsflags = (raw) ? 0 : DS_HOLD_FLAG_DECRYPT;
int err; int err;
@ -4246,13 +4252,29 @@ add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj,
if (err != 0) if (err != 0)
return (err); return (err);
gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP); gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP);
err = dsl_dataset_hold_obj_flags(dp, snapobj, dsflags, gmep, &snapds); err = dsl_dataset_own_obj(dp, snapobj, dsflags, gmep, &snapds);
if (err == 0) { if (err == 0) {
gmep->guid = dsl_dataset_phys(snapds)->ds_guid; /*
* If this is a deduplicated raw send stream, we need
* to make sure that we can still read raw blocks from
* earlier datasets in the stream, so we set the
* os_raw_receive flag now.
*/
if (raw) {
err = dmu_objset_from_ds(snapds, &os);
if (err != 0) {
dsl_dataset_disown(snapds, dsflags, FTAG);
dsl_pool_rele(dp, FTAG);
kmem_free(gmep, sizeof (*gmep));
return (err);
}
os->os_raw_receive = B_TRUE;
}
gmep->raw = raw; gmep->raw = raw;
gmep->guid = dsl_dataset_phys(snapds)->ds_guid;
gmep->gme_ds = snapds; gmep->gme_ds = snapds;
avl_add(guid_map, gmep); avl_add(guid_map, gmep);
dsl_dataset_long_hold(snapds, gmep);
} else { } else {
kmem_free(gmep, sizeof (*gmep)); kmem_free(gmep, sizeof (*gmep));
} }

View File

@ -745,7 +745,7 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos',
'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize', 'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize',
'send-c_recv_dedup', 'send_encrypted_files', 'send_encrypted_heirarchy', 'send-c_recv_dedup', 'send_encrypted_files', 'send_encrypted_heirarchy',
'send_encrypted_props', 'send_freeobjects', 'send_realloc_dnode_size', 'send_encrypted_props', 'send_freeobjects', 'send_realloc_dnode_size',
'send_hole_birth'] 'send_hole_birth', 'send-wDR_encrypted_zvol']
tags = ['functional', 'rsend'] tags = ['functional', 'rsend']
[tests/functional/scrub_mirror] [tests/functional/scrub_mirror]

View File

@ -41,7 +41,8 @@ dist_pkgdata_SCRIPTS = \
send-cpL_varied_recsize.ksh \ send-cpL_varied_recsize.ksh \
send_freeobjects.ksh \ send_freeobjects.ksh \
send_realloc_dnode_size.ksh \ send_realloc_dnode_size.ksh \
send_hole_birth.ksh send_hole_birth.ksh \
send-wDR_encrypted_zvol.ksh
dist_pkgdata_DATA = \ dist_pkgdata_DATA = \
rsend.cfg \ rsend.cfg \

View File

@ -0,0 +1,93 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#
#
# Copyright (c) 2018 by Datto Inc. All rights reserved.
#
. $STF_SUITE/tests/functional/rsend/rsend.kshlib
#
# DESCRIPTION:
# Verify that zvols with dedup=on and encryption=on can be sent and received
# with a deduplicated raw send stream.
#
# STRATEGY:
# 1. Create a zvol with dedup and encryption on and put a filesystem on it
# 2. Copy a file into the zvol a few times and take a snapshot
# 3. Repeat step 2 a few times to create more snapshots
# 4. Send all snapshots in a recursive, raw, deduplicated send stream
# 5. Mount the received zvol and verify that all of the data there is correct
#
verify_runnable "both"
function cleanup
{
ismounted $recvmnt ext4 && log_must umount $recvmnt
ismounted $mntpnt ext4 && log_must umount $mntpnt
[[ -d $recvmnt ]] && log_must rm -rf $keyfile
[[ -d $mntpnt ]] && log_must rm -rf $keyfile
destroy_dataset $TESTPOOL/recv "-r"
destroy_dataset $TESTPOOL/$TESTVOL "-r"
[[ -f $keyfile ]] && log_must rm $keyfile
[[ -f $sendfile ]] && log_must rm $sendfile
}
log_onexit cleanup
log_assert "Verify zfs can receive raw, recursive, and deduplicated send streams"
typeset keyfile=/$TESTPOOL/pkey
typeset snap_count=5
typeset zdev=$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL
typeset mntpnt=$TESTDIR/$TESTVOL
typeset recvdev=$ZVOL_DEVDIR/$TESTPOOL/recv
typeset recvmnt=$TESTDIR/recvmnt
typeset sendfile=$TESTDIR/sendfile
log_must eval "echo 'password' > $keyfile"
log_must zfs create -o dedup=on -o encryption=on -o keyformat=passphrase \
-o keylocation=file://$keyfile -V 128M $TESTPOOL/$TESTVOL
log_must block_device_wait
log_must eval "echo 'y' | newfs -t ext4 -v $zdev"
log_must mkdir -p $mntpnt
log_must mkdir -p $recvmnt
log_must mount $zdev $mntpnt
for ((i = 1; i <= $snap_count; i++)); do
log_must dd if=/dev/urandom of=$mntpnt/file bs=1M count=1
for ((j = 0; j < 10; j++)); do
log_must cp $mntpnt/file $mntpnt/file$j
done
log_must sync
log_must zfs snap $TESTPOOL/$TESTVOL@snap$i
done
log_must eval "zfs send -wDR $TESTPOOL/$TESTVOL@snap$snap_count > $sendfile"
log_must eval "zfs recv $TESTPOOL/recv < $sendfile"
log_must zfs load-key $TESTPOOL/recv
log_must block_device_wait
log_must mount $recvdev $recvmnt
md5_1=$(cat $mntpnt/* | md5sum | awk '{print $1}')
md5_2=$(cat $recvmnt/* | md5sum | awk '{print $1}')
[[ "$md5_1" == "$md5_2" ]] || log_fail "md5 mismatch: $md5_1 != $md5_2"
log_pass "zfs can receive raw, recursive, and deduplicated send streams"