Wait for txg sync if the last DRR_FREEOBJECTS might result in a hole

If we receive a DRR_FREEOBJECTS as the first entry in an object range,
this might end up producing a hole if the freed objects were the
only existing objects in the block.

If the txg starts syncing before we've processed any following
DRR_OBJECT records, this leads to a possible race where the backing
arc_buf_t gets its psize set to 0 in the arc_write_ready() callback
while still being referenced from a dirty record in the open txg.

To prevent this, we insert a txg_wait_synced call if the first
record in the range was a DRR_FREEOBJECTS that actually
resulted in one or more freed objects.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: David Hedberg <david.hedberg@findity.com>
Sponsored by: Findity AB
Closes #11893
Closes #14358
This commit is contained in:
David Hedberg 2023-01-23 22:19:43 +01:00 committed by GitHub
parent 73968defdd
commit 37a27b4306
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 116 additions and 1 deletions

View File

@ -76,6 +76,12 @@ static int zfs_recv_best_effort_corrective = 0;
static const void *const dmu_recv_tag = "dmu_recv_tag"; static const void *const dmu_recv_tag = "dmu_recv_tag";
const char *const recv_clone_name = "%recv"; const char *const recv_clone_name = "%recv";
typedef enum {
ORNS_NO,
ORNS_YES,
ORNS_MAYBE
} or_need_sync_t;
static int receive_read_payload_and_next_header(dmu_recv_cookie_t *ra, int len, static int receive_read_payload_and_next_header(dmu_recv_cookie_t *ra, int len,
void *buf); void *buf);
@ -129,6 +135,9 @@ struct receive_writer_arg {
uint8_t or_mac[ZIO_DATA_MAC_LEN]; uint8_t or_mac[ZIO_DATA_MAC_LEN];
boolean_t or_byteorder; boolean_t or_byteorder;
zio_t *heal_pio; zio_t *heal_pio;
/* Keep track of DRR_FREEOBJECTS right after DRR_OBJECT_RANGE */
or_need_sync_t or_need_sync;
}; };
typedef struct dmu_recv_begin_arg { typedef struct dmu_recv_begin_arg {
@ -1914,10 +1923,22 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
/* object was freed and we are about to allocate a new one */ /* object was freed and we are about to allocate a new one */
object_to_hold = DMU_NEW_OBJECT; object_to_hold = DMU_NEW_OBJECT;
} else { } else {
/*
* If the only record in this range so far was DRR_FREEOBJECTS
* with at least one actually freed object, it's possible that
* the block will now be converted to a hole. We need to wait
* for the txg to sync to prevent races.
*/
if (rwa->or_need_sync == ORNS_YES)
txg_wait_synced(dmu_objset_pool(rwa->os), 0);
/* object is free and we are about to allocate a new one */ /* object is free and we are about to allocate a new one */
object_to_hold = DMU_NEW_OBJECT; object_to_hold = DMU_NEW_OBJECT;
} }
/* Only relevant for the first object in the range */
rwa->or_need_sync = ORNS_NO;
/* /*
* If this is a multi-slot dnode there is a chance that this * If this is a multi-slot dnode there is a chance that this
* object will expand into a slot that is already used by * object will expand into a slot that is already used by
@ -2111,6 +2132,9 @@ receive_freeobjects(struct receive_writer_arg *rwa,
if (err != 0) if (err != 0)
return (err); return (err);
if (rwa->or_need_sync == ORNS_MAYBE)
rwa->or_need_sync = ORNS_YES;
} }
if (next_err != ESRCH) if (next_err != ESRCH)
return (next_err); return (next_err);
@ -2604,6 +2628,8 @@ receive_object_range(struct receive_writer_arg *rwa,
memcpy(rwa->or_mac, drror->drr_mac, ZIO_DATA_MAC_LEN); memcpy(rwa->or_mac, drror->drr_mac, ZIO_DATA_MAC_LEN);
rwa->or_byteorder = byteorder; rwa->or_byteorder = byteorder;
rwa->or_need_sync = ORNS_MAYBE;
return (0); return (0);
} }

View File

@ -842,7 +842,7 @@ tests = ['recv_dedup', 'recv_dedup_encrypted_zvol', 'rsend_001_pos',
'send-c_recv_lz4_disabled', 'send-c_mixed_compression', 'send-c_recv_lz4_disabled', 'send-c_mixed_compression',
'send-c_stream_size_estimate', 'send-c_embedded_blocks', 'send-c_resume', 'send-c_stream_size_estimate', 'send-c_embedded_blocks', 'send-c_resume',
'send-cpL_varied_recsize', 'send-c_recv_dedup', 'send-L_toggle', 'send-cpL_varied_recsize', 'send-c_recv_dedup', 'send-L_toggle',
'send_encrypted_incremental.ksh', 'send_encrypted_incremental.ksh', 'send_encrypted_freeobjects',
'send_encrypted_hierarchy', 'send_encrypted_props', 'send_encrypted_hierarchy', 'send_encrypted_props',
'send_encrypted_truncated_files', 'send_freeobjects', 'send_realloc_files', 'send_encrypted_truncated_files', 'send_freeobjects', 'send_realloc_files',
'send_realloc_encrypted_files', 'send_spill_block', 'send_holds', 'send_realloc_encrypted_files', 'send_spill_block', 'send_holds',

View File

@ -550,6 +550,7 @@ tests = ['recv_dedup', 'recv_dedup_encrypted_zvol', 'rsend_001_pos',
'rsend_014_pos', 'rsend_016_neg', 'send-c_verify_contents', 'rsend_014_pos', 'rsend_016_neg', 'send-c_verify_contents',
'send-c_volume', 'send-c_zstreamdump', 'send-c_recv_dedup', 'send-c_volume', 'send-c_zstreamdump', 'send-c_recv_dedup',
'send-L_toggle', 'send_encrypted_hierarchy', 'send_encrypted_props', 'send-L_toggle', 'send_encrypted_hierarchy', 'send_encrypted_props',
'send_encrypted_freeobjects',
'send_encrypted_truncated_files', 'send_freeobjects', 'send_holds', 'send_encrypted_truncated_files', 'send_freeobjects', 'send_holds',
'send_mixed_raw', 'send-wR_encrypted_zvol', 'send_partial_dataset', 'send_mixed_raw', 'send-wR_encrypted_zvol', 'send_partial_dataset',
'send_invalid'] 'send_invalid']

View File

@ -1810,6 +1810,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/rsend/send_doall.ksh \ functional/rsend/send_doall.ksh \
functional/rsend/send_encrypted_incremental.ksh \ functional/rsend/send_encrypted_incremental.ksh \
functional/rsend/send_encrypted_files.ksh \ functional/rsend/send_encrypted_files.ksh \
functional/rsend/send_encrypted_freeobjects.ksh \
functional/rsend/send_encrypted_hierarchy.ksh \ functional/rsend/send_encrypted_hierarchy.ksh \
functional/rsend/send_encrypted_props.ksh \ functional/rsend/send_encrypted_props.ksh \
functional/rsend/send_encrypted_truncated_files.ksh \ functional/rsend/send_encrypted_truncated_files.ksh \

View File

@ -0,0 +1,87 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
# Copyright (c) 2023 by Findity AB
#
. $STF_SUITE/tests/functional/rsend/rsend.kshlib
#
# Description:
# Verify that receiving a raw encrypted stream, with a FREEOBJECTS
# removing all existing objects in a block followed by an OBJECT write
# to the same block, does not result in a panic.
#
# Strategy:
# 1. Create a new encrypted filesystem
# 2. Create file f1 as the first object in some block (here object 128)
# 3. Take snapshot A
# 4. Create file f2 as the second object in the same block (here object 129)
# 5. Delete f1
# 6. Take snapshot B
# 7. Receive a full raw encrypted send of A
# 8. Receive an incremental raw send of B
#
verify_runnable "both"
function create_object_with_num
{
file=$1
num=$2
tries=100
for ((i=0; i<$tries; i++)); do
touch $file
onum=$(ls -li $file | awk '{print $1}')
if [[ $onum -ne $num ]] ; then
rm -f $file
else
break
fi
done
if [[ $i -eq $tries ]]; then
log_fail "Failed to create object with number $num"
fi
}
log_assert "FREEOBJECTS followed by OBJECT in encrypted stream does not crash"
sendds=sendencfods
recvds=recvencfods
keyfile=/$POOL/keyencfods
f1=/$POOL/$sendds/f1
f2=/$POOL/$sendds/f2
log_must eval "echo 'password' > $keyfile"
#
# xattr=sa and dnodesize=legacy for sequential object numbers, see
# note in send_freeobjects.ksh.
#
log_must zfs create -o xattr=sa -o dnodesize=legacy -o encryption=on \
-o keyformat=passphrase -o keylocation=file://$keyfile $POOL/$sendds
create_object_with_num $f1 128
log_must zfs snap $POOL/$sendds@A
create_object_with_num $f2 129
log_must rm $f1
log_must zfs snap $POOL/$sendds@B
log_must eval "zfs send -w $POOL/$sendds@A | zfs recv $POOL/$recvds"
log_must eval "zfs send -w -i $POOL/$sendds@A $POOL/$sendds@B |" \
"zfs recv $POOL/$recvds"
log_pass "FREEOBJECTS followed by OBJECT in encrypted stream did not crash"