Allow block cloning across encrypted datasets

When two datasets share the same master encryption key, it is safe
to clone encrypted blocks. Currently only snapshots and clones
of a dataset share with it the same encryption key.

Added a test for:
- Clone from encrypted sibling to encrypted sibling with
  non encrypted parent
- Clone from encrypted parent to inherited encrypted child
- Clone from child to sibling with encrypted parent
- Clone from snapshot to the original datasets
- Clone from foreign snapshot to a foreign dataset
- Cloning from non-encrypted to encrypted datasets
- Cloning from encrypted to non-encrypted datasets

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Original-patch-by: Pawel Jakub Dawidek <pawel@dawidek.net>
Signed-off-by: Kay Pedersen <mail@mkwg.de>
Closes #15544
This commit is contained in:
oromenahar 2023-12-05 20:03:48 +01:00 committed by Brian Behlendorf
parent e11b3eb1c6
commit 121924575e
10 changed files with 236 additions and 25 deletions

View File

@ -206,6 +206,7 @@ void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin,
dmu_tx_t *tx); dmu_tx_t *tx);
int dmu_objset_create_crypt_check(dsl_dir_t *parentdd, int dmu_objset_create_crypt_check(dsl_dir_t *parentdd,
dsl_crypto_params_t *dcp, boolean_t *will_encrypt); dsl_crypto_params_t *dcp, boolean_t *will_encrypt);
boolean_t dmu_objset_crypto_key_equal(objset_t *osa, objset_t *osb);
void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd, void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd,
struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx); struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx);
uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey,

View File

@ -364,9 +364,12 @@ When this feature is enabled ZFS will use block cloning for operations like
Block cloning allows to create multiple references to a single block. Block cloning allows to create multiple references to a single block.
It is much faster than copying the data (as the actual data is neither read nor It is much faster than copying the data (as the actual data is neither read nor
written) and takes no additional space. written) and takes no additional space.
Blocks can be cloned across datasets under some conditions (like disabled Blocks can be cloned across datasets under some conditions (like equal
encryption and equal .Nm recordsize ,
.Nm recordsize ) . the same master encryption key, etc.).
ZFS tries its best to clone across datasets including encrypted ones.
This is limited for various (nontrivial) reasons depending on the OS
and/or ZFS internals.
.Pp .Pp
This feature becomes This feature becomes
.Sy active .Sy active

View File

@ -157,10 +157,8 @@
* (copying the file content to the new dataset and removing the source file). * (copying the file content to the new dataset and removing the source file).
* In that case Block Cloning will only be used briefly, because the BRT entries * In that case Block Cloning will only be used briefly, because the BRT entries
* will be removed when the source is removed. * will be removed when the source is removed.
* Note: currently it is not possible to clone blocks between encrypted * Block Cloning across encrypted datasets is supported as long as both
* datasets, even if those datasets use the same encryption key (this includes * datasets share the same master key (e.g. snapshots and clones)
* snapshots of encrypted datasets). Cloning blocks between datasets that use
* the same keys should be possible and should be implemented in the future.
* *
* Block Cloning flow through ZFS layers. * Block Cloning flow through ZFS layers.
* *

View File

@ -266,6 +266,40 @@ spa_crypto_key_compare(const void *a, const void *b)
return (0); return (0);
} }
/*
* this compares a crypto key based on zk_guid. See comment on
* spa_crypto_key_compare for more information.
*/
boolean_t
dmu_objset_crypto_key_equal(objset_t *osa, objset_t *osb)
{
dsl_crypto_key_t *dcka = NULL;
dsl_crypto_key_t *dckb = NULL;
uint64_t obja, objb;
boolean_t equal;
spa_t *spa;
spa = dmu_objset_spa(osa);
if (spa != dmu_objset_spa(osb))
return (B_FALSE);
obja = dmu_objset_ds(osa)->ds_object;
objb = dmu_objset_ds(osb)->ds_object;
if (spa_keystore_lookup_key(spa, obja, FTAG, &dcka) != 0)
return (B_FALSE);
if (spa_keystore_lookup_key(spa, objb, FTAG, &dckb) != 0) {
spa_keystore_dsl_key_rele(spa, dcka, FTAG);
return (B_FALSE);
}
equal = (dcka->dck_key.zk_guid == dckb->dck_key.zk_guid);
spa_keystore_dsl_key_rele(spa, dcka, FTAG);
spa_keystore_dsl_key_rele(spa, dckb, FTAG);
return (equal);
}
static int static int
spa_key_mapping_compare(const void *a, const void *b) spa_key_mapping_compare(const void *a, const void *b)
{ {

View File

@ -47,6 +47,7 @@
#include <sys/fs/zfs.h> #include <sys/fs/zfs.h>
#include <sys/dmu.h> #include <sys/dmu.h>
#include <sys/dmu_objset.h> #include <sys/dmu_objset.h>
#include <sys/dsl_crypt.h>
#include <sys/spa.h> #include <sys/spa.h>
#include <sys/txg.h> #include <sys/txg.h>
#include <sys/dbuf.h> #include <sys/dbuf.h>
@ -1103,6 +1104,16 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
return (SET_ERROR(EXDEV)); return (SET_ERROR(EXDEV));
} }
/*
* Cloning across encrypted datasets is possible only if they
* share the same master key.
*/
if (inos != outos && inos->os_encrypted &&
!dmu_objset_crypto_key_equal(inos, outos)) {
zfs_exit_two(inzfsvfs, outzfsvfs, FTAG);
return (SET_ERROR(EXDEV));
}
error = zfs_verify_zp(inzp); error = zfs_verify_zp(inzp);
if (error == 0) if (error == 0)
error = zfs_verify_zp(outzp); error = zfs_verify_zp(outzp);
@ -1286,20 +1297,6 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
*/ */
break; break;
} }
/*
* Encrypted data is fine as long as it comes from the same
* dataset.
* TODO: We want to extend it in the future to allow cloning to
* datasets with the same keys, like clones or to be able to
* clone a file from a snapshot of an encrypted dataset into the
* dataset itself.
*/
if (BP_IS_PROTECTED(&bps[0])) {
if (inzfsvfs != outzfsvfs) {
error = SET_ERROR(EXDEV);
break;
}
}
/* /*
* Start a transaction. * Start a transaction.

View File

@ -42,6 +42,7 @@ tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial',
'block_cloning_disabled_copyfilerange', 'block_cloning_disabled_ficlone', 'block_cloning_disabled_copyfilerange', 'block_cloning_disabled_ficlone',
'block_cloning_disabled_ficlonerange', 'block_cloning_disabled_ficlonerange',
'block_cloning_copyfilerange_cross_dataset', 'block_cloning_copyfilerange_cross_dataset',
'block_cloning_cross_enc_dataset',
'block_cloning_copyfilerange_fallback_same_txg'] 'block_cloning_copyfilerange_fallback_same_txg']
tags = ['functional', 'block_cloning'] tags = ['functional', 'block_cloning']

View File

@ -305,6 +305,8 @@ elif sys.platform.startswith('linux'):
['SKIP', cfr_cross_reason], ['SKIP', cfr_cross_reason],
'block_cloning/block_cloning_copyfilerange_fallback_same_txg': 'block_cloning/block_cloning_copyfilerange_fallback_same_txg':
['SKIP', cfr_cross_reason], ['SKIP', cfr_cross_reason],
'block_cloning/block_cloning_cross_enc_dataset':
['SKIP', cfr_cross_reason],
}) })

View File

@ -451,6 +451,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/block_cloning/block_cloning_ficlone.ksh \ functional/block_cloning/block_cloning_ficlone.ksh \
functional/block_cloning/block_cloning_ficlonerange.ksh \ functional/block_cloning/block_cloning_ficlonerange.ksh \
functional/block_cloning/block_cloning_ficlonerange_partial.ksh \ functional/block_cloning/block_cloning_ficlonerange_partial.ksh \
functional/block_cloning/block_cloning_cross_enc_dataset.ksh \
functional/bootfs/bootfs_001_pos.ksh \ functional/bootfs/bootfs_001_pos.ksh \
functional/bootfs/bootfs_002_neg.ksh \ functional/bootfs/bootfs_002_neg.ksh \
functional/bootfs/bootfs_003_pos.ksh \ functional/bootfs/bootfs_003_pos.ksh \

View File

@ -28,8 +28,8 @@
function have_same_content function have_same_content
{ {
typeset hash1=$(cat $1 | md5sum) typeset hash1=$(md5digest $1)
typeset hash2=$(cat $2 | md5sum) typeset hash2=$(md5digest $2)
log_must [ "$hash1" = "$hash2" ] log_must [ "$hash1" = "$hash2" ]
} }
@ -44,10 +44,14 @@ function have_same_content
# #
function get_same_blocks function get_same_blocks
{ {
KEY=$5
if [ ${#KEY} -gt 0 ]; then
KEY="--key=$KEY"
fi
typeset zdbout=${TMPDIR:-$TEST_BASE_DIR}/zdbout.$$ typeset zdbout=${TMPDIR:-$TEST_BASE_DIR}/zdbout.$$
zdb -vvvvv $1 -O $2 | \ zdb $KEY -vvvvv $1 -O $2 | \
awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout.a awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout.a
zdb -vvvvv $3 -O $4 | \ zdb $KEY -vvvvv $3 -O $4 | \
awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout.b awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout.b
echo $(sort $zdbout.a $zdbout.b | uniq -d | cut -f1 -d' ') echo $(sort $zdbout.a $zdbout.b | uniq -d | cut -f1 -d' ')
} }

View File

@ -0,0 +1,170 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023, Kay Pedersen <mail@mkwg.de>
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib
verify_runnable "global"
if [[ $(linux_version) -lt $(linux_version "5.3") ]]; then
log_unsupported "copy_file_range can't copy cross-filesystem before Linux 5.3"
fi
claim="Block cloning across encrypted datasets."
log_assert $claim
DS1="$TESTPOOL/encrypted1"
DS2="$TESTPOOL/encrypted2"
DS1_NC="$TESTPOOL/notcrypted1"
PASSPHRASE="top_secret"
function prepare_enc
{
log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS
log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
"-o keyformat=passphrase -o keylocation=prompt $DS1"
log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
"-o keyformat=passphrase -o keylocation=prompt $DS2"
log_must zfs create $DS1/child1
log_must zfs create $DS1/child2
log_must zfs create $DS1_NC
log_note "Create test file"
# we must wait until the src file txg is written to the disk otherwise we
# will fallback to normal copy. See "dmu_read_l0_bps" in
# "zfs/module/zfs/dmu.c" and "zfs_clone_range" in
# "zfs/module/zfs/zfs_vnops.c"
log_must dd if=/dev/urandom of=/$DS1/file bs=128K count=4
log_must dd if=/dev/urandom of=/$DS1/child1/file bs=128K count=4
log_must dd if=/dev/urandom of=/$DS1_NC/file bs=128K count=4
log_must sync_pool $TESTPOOL
}
function cleanup_enc
{
datasetexists $TESTPOOL && destroy_pool $TESTPOOL
}
function clone_and_check
{
I_FILE="$1"
O_FILE=$2
I_DS=$3
O_DS=$4
SAME_BLOCKS=$5
# the CLONE option provides a choice between copy_file_range
# which should clone and a dd which is a copy no matter what
CLONE=$6
SNAPSHOT=$7
if [ ${#SNAPSHOT} -gt 0 ]; then
I_FILE=".zfs/snapshot/$SNAPSHOT/$1"
fi
if [ $CLONE ]; then
log_must clonefile -f "/$I_DS/$I_FILE" "/$O_DS/$O_FILE" 0 0 524288
else
log_must dd if="/$I_DS/$I_FILE" of="/$O_DS/$O_FILE" bs=128K
fi
log_must sync_pool $TESTPOOL
log_must have_same_content "/$I_DS/$I_FILE" "/$O_DS/$O_FILE"
if [ ${#SNAPSHOT} -gt 0 ]; then
I_DS="$I_DS@$SNAPSHOT"
I_FILE="$1"
fi
typeset blocks=$(get_same_blocks \
$I_DS $I_FILE $O_DS $O_FILE $PASSPHRASE)
log_must [ "$blocks" = "$SAME_BLOCKS" ]
}
log_onexit cleanup_enc
prepare_enc
log_note "Cloning entire file with copy_file_range across different enc" \
"roots, should fallback"
# we are expecting no same block map.
clone_and_check "file" "clone" $DS1 $DS2 "" true
log_note "check if the file is still readable and the same after" \
"unmount and key unload, shouldn't fail"
typeset hash1=$(md5digest "/$DS1/file")
log_must zfs umount $DS1 && zfs unload-key $DS1
typeset hash2=$(md5digest "/$DS2/clone")
log_must [ "$hash1" = "$hash2" ]
cleanup_enc
prepare_enc
log_note "Cloning entire file with copy_file_range across different child datasets"
# clone shouldn't work because of deriving a new master key for the child
# we are expecting no same block map.
clone_and_check "file" "clone" $DS1 "$DS1/child1" "" true
clone_and_check "file" "clone" "$DS1/child1" "$DS1/child2" "" true
cleanup_enc
prepare_enc
log_note "Copying entire file with copy_file_range across same snapshot"
log_must zfs snapshot -r $DS1@s1
log_must sync_pool $TESTPOOL
log_must rm -f "/$DS1/file"
log_must sync_pool $TESTPOOL
clone_and_check "file" "clone" "$DS1" "$DS1" "0 1 2 3" true "s1"
cleanup_enc
prepare_enc
log_note "Copying entire file with copy_file_range across different snapshot"
clone_and_check "file" "file" $DS1 $DS2 "" true
log_must zfs snapshot -r $DS2@s1
log_must sync_pool $TESTPOOL
log_must rm -f "/$DS1/file" "/$DS2/file"
log_must sync_pool $TESTPOOL
clone_and_check "file" "clone" "$DS2" "$DS1" "" true "s1"
typeset hash1=$(md5digest "/$DS1/.zfs/snapshot/s1/file")
log_note "destroy the snapshot and check if the file is still readable and" \
"has the same content"
log_must zfs destroy -r $DS2@s1
log_must sync_pool $TESTPOOL
typeset hash2=$(md5digest "/$DS1/file")
log_must [ "$hash1" = "$hash2" ]
cleanup_enc
prepare_enc
log_note "Copying with copy_file_range from non encrypted to encrypted"
clone_and_check "file" "copy" $DS1_NC $DS1 "" true
cleanup_enc
prepare_enc
log_note "Copying with copy_file_range from encrypted to non encrypted"
clone_and_check "file" "copy" $DS1 $DS1_NC "" true
log_must sync_pool $TESTPOOL
log_pass $claim