Handle and detect #13709's unlock regression (#14161)

In #13709, as in #11294 before it, it turns out that 63a26454 still had
the same failure mode as when it was first landed as d1d47691, and
fails to unlock certain datasets that formerly worked.

Rather than reverting it again, let's add handling to just throw out
the accounting metadata that failed to unlock when that happens, as
well as a test with a pre-broken pool image to ensure that we never get
bitten by this again.

Fixes: #13709

Signed-off-by: Rich Ercolani <rincebrain@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
This commit is contained in:
Rich Ercolani 2022-11-15 17:44:12 -05:00 committed by Tony Hutter
parent d9de079a4b
commit fa7d572a8a
5 changed files with 68 additions and 6 deletions

View File

@ -2672,6 +2672,7 @@ spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
objset_phys_t *osp = buf;
uint8_t portable_mac[ZIO_OBJSET_MAC_LEN];
uint8_t local_mac[ZIO_OBJSET_MAC_LEN];
const uint8_t zeroed_mac[ZIO_OBJSET_MAC_LEN] = {0};
/* look up the key from the spa's keystore */
ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck);
@ -2694,11 +2695,25 @@ spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
return (0);
}
if (bcmp(portable_mac, osp->os_portable_mac, ZIO_OBJSET_MAC_LEN) != 0 ||
bcmp(local_mac, osp->os_local_mac, ZIO_OBJSET_MAC_LEN) != 0) {
if (memcmp(portable_mac, osp->os_portable_mac,
ZIO_OBJSET_MAC_LEN) != 0 ||
memcmp(local_mac, osp->os_local_mac, ZIO_OBJSET_MAC_LEN) != 0) {
/*
* If the MAC is zeroed out, we failed to decrypt it.
* This should only arise, at least on Linux,
* if we hit edge case handling for useraccounting, since we
* shouldn't get here without bailing out on error earlier
* otherwise.
*
* So if we're in that case, we can just fall through and
* special-casing noticing that it's zero will handle it
* elsewhere, since we can just regenerate it.
*/
if (memcmp(local_mac, zeroed_mac, ZIO_OBJSET_MAC_LEN) != 0) {
abd_return_buf(abd, buf, datalen);
return (SET_ERROR(ECKSUM));
}
}
abd_return_buf(abd, buf, datalen);

View File

@ -892,7 +892,7 @@ tests = [
'userquota_007_pos', 'userquota_008_pos', 'userquota_009_pos',
'userquota_010_pos', 'userquota_011_pos', 'userquota_012_neg',
'userspace_001_pos', 'userspace_002_pos', 'userspace_encrypted',
'userspace_send_encrypted']
'userspace_send_encrypted', 'userspace_encrypted_13709']
tags = ['functional', 'userquota']
[tests/functional/vdev_zaps]

View File

@ -22,8 +22,10 @@ dist_pkgdata_SCRIPTS = \
userspace_002_pos.ksh \
userspace_003_pos.ksh \
userspace_encrypted.ksh \
userspace_send_encrypted.ksh
userspace_send_encrypted.ksh \
userspace_encrypted_13709.ksh
dist_pkgdata_DATA = \
13709_reproducer.bz2 \
userquota.cfg \
userquota_common.kshlib

View File

@ -0,0 +1,45 @@
#!/bin/ksh -p
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/userquota/userquota_common.kshlib
#
# DESCRIPTION:
# Avoid allowing #11294/#13709 to recur a third time.
#
# So we hardcode a copy of a pool with this bug, try unlocking it,
# and fail on error. Simple.
function cleanup
{
destroy_pool $POOLNAME
rm -f $FILEDEV
}
log_onexit cleanup
FILEDEV="$TEST_BASE_DIR/userspace_13709"
POOLNAME="testpool_13709"
log_assert "ZFS should be able to unlock pools with #13709's failure mode"
log_must bzcat $STF_SUITE/tests/functional/userquota/13709_reproducer.bz2 > $FILEDEV
log_must zpool import -d $FILEDEV $POOLNAME
echo -e 'password\npassword\n' | log_must zfs mount -al
# Cleanup
cleanup
log_pass "#13709 not happening here"