Fix zpool status in case of unloaded keys

When scrubbing an encrypted filesystem with unloaded key still report an
error in zpool status.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alek Pinchuk <apinchuk@axcient.com>
Signed-off-by: George Amanakis <gamanakis@gmail.com>
Closes #13675
Closes #13717
This commit is contained in:
George Amanakis 2022-08-23 02:42:01 +02:00 committed by GitHub
parent 17e212652d
commit 0c4064d9a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 190 additions and 34 deletions

View File

@ -222,5 +222,6 @@ int spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, const zbookmark_phys_t *zb,
dmu_object_type_t ot, boolean_t dedup, boolean_t bswap, uint8_t *salt, dmu_object_type_t ot, boolean_t dedup, boolean_t bswap, uint8_t *salt,
uint8_t *iv, uint8_t *mac, uint_t datalen, abd_t *pabd, abd_t *cabd, uint8_t *iv, uint8_t *mac, uint_t datalen, abd_t *pabd, abd_t *cabd,
boolean_t *no_crypt); boolean_t *no_crypt);
zfs_keystatus_t dsl_dataset_get_keystatus(dsl_dir_t *dd);
#endif #endif

View File

@ -545,6 +545,9 @@ This feature enables the upgraded version of errlog, which required an on-disk
error log format change. error log format change.
Now the error log of each head dataset is stored separately in the zap object Now the error log of each head dataset is stored separately in the zap object
and keyed by the head id. and keyed by the head id.
In case of encrypted filesystems with unloaded keys or unmounted encrypted
filesystems we are unable to check their snapshots or clones for errors and
these will not be reported.
With this feature enabled, every dataset affected by an error block is listed With this feature enabled, every dataset affected by an error block is listed
in the output of in the output of
.Nm zpool Cm status . .Nm zpool Cm status .

View File

@ -1137,7 +1137,7 @@ dmu_objset_check_wkey_loaded(dsl_dir_t *dd)
return (0); return (0);
} }
static zfs_keystatus_t zfs_keystatus_t
dsl_dataset_get_keystatus(dsl_dir_t *dd) dsl_dataset_get_keystatus(dsl_dir_t *dd)
{ {
/* check if this dd has a has a dsl key */ /* check if this dd has a has a dsl key */

View File

@ -21,8 +21,8 @@
/* /*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2014, Delphix. All rights reserved. * Copyright (c) 2013, 2014, Delphix. All rights reserved.
* Copyright (c) 2021, George Amanakis. All rights reserved.
* Copyright (c) 2019 Datto Inc. * Copyright (c) 2019 Datto Inc.
* Copyright (c) 2021, 2022, George Amanakis. All rights reserved.
*/ */
/* /*
@ -68,6 +68,7 @@
#include <sys/dsl_dir.h> #include <sys/dsl_dir.h>
#include <sys/dmu_objset.h> #include <sys/dmu_objset.h>
#include <sys/dbuf.h> #include <sys/dbuf.h>
#include <sys/zfs_znode.h>
#define NAME_MAX_LEN 64 #define NAME_MAX_LEN 64
@ -175,6 +176,23 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj,
return (error); return (error);
} }
/*
* If the key is not loaded dbuf_dnode_findbp() will error out with
* EACCES. However in that case dnode_hold() will eventually call
* dbuf_read()->zio_wait() which may call spa_log_error(). This will
* lead to a deadlock due to us holding the mutex spa_errlist_lock.
* Avoid this by checking here if the keys are loaded, if not return.
* If the keys are not loaded the head_errlog feature is meaningless
* as we cannot figure out the birth txg of the block pointer.
*/
if (dsl_dataset_get_keystatus(ds->ds_dir) ==
ZFS_KEYSTATUS_UNAVAILABLE) {
zep->zb_birth = 0;
dsl_dataset_rele(ds, FTAG);
dsl_pool_config_exit(dp, FTAG);
return (0);
}
dnode_t *dn; dnode_t *dn;
blkptr_t bp; blkptr_t bp;
@ -188,11 +206,22 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj,
rw_enter(&dn->dn_struct_rwlock, RW_READER); rw_enter(&dn->dn_struct_rwlock, RW_READER);
error = dbuf_dnode_findbp(dn, zep->zb_level, zep->zb_blkid, &bp, NULL, error = dbuf_dnode_findbp(dn, zep->zb_level, zep->zb_blkid, &bp, NULL,
NULL); NULL);
if (error == 0 && BP_IS_HOLE(&bp)) if (error == 0 && BP_IS_HOLE(&bp))
error = SET_ERROR(ENOENT); error = SET_ERROR(ENOENT);
zep->zb_birth = bp.blk_birth; /*
* If the key is loaded but the encrypted filesystem is unmounted when
* a scrub is run, then dbuf_dnode_findbp() will still error out with
* EACCES (possibly due to the key mapping being removed upon
* unmounting). In that case the head_errlog feature is also
* meaningless as we cannot figure out the birth txg of the block
* pointer.
*/
if (error == EACCES)
error = 0;
else if (!error)
zep->zb_birth = bp.blk_birth;
rw_exit(&dn->dn_struct_rwlock); rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG); dnode_rele(dn, FTAG);
dsl_dataset_rele(ds, FTAG); dsl_dataset_rele(ds, FTAG);
@ -264,7 +293,6 @@ find_birth_txg(dsl_dataset_t *ds, zbookmark_err_phys_t *zep,
rw_enter(&dn->dn_struct_rwlock, RW_READER); rw_enter(&dn->dn_struct_rwlock, RW_READER);
error = dbuf_dnode_findbp(dn, zep->zb_level, zep->zb_blkid, &bp, NULL, error = dbuf_dnode_findbp(dn, zep->zb_level, zep->zb_blkid, &bp, NULL,
NULL); NULL);
if (error == 0 && BP_IS_HOLE(&bp)) if (error == 0 && BP_IS_HOLE(&bp))
error = SET_ERROR(ENOENT); error = SET_ERROR(ENOENT);
@ -298,27 +326,40 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
uint64_t txg_to_consider = spa->spa_syncing_txg; uint64_t txg_to_consider = spa->spa_syncing_txg;
boolean_t check_snapshot = B_TRUE; boolean_t check_snapshot = B_TRUE;
error = find_birth_txg(ds, zep, &latest_txg); error = find_birth_txg(ds, zep, &latest_txg);
if (error == 0) {
if (zep->zb_birth == latest_txg) { /*
/* Block neither free nor rewritten. */ * If we cannot figure out the current birth txg of the block pointer
if (!only_count) { * error out. If the filesystem is encrypted and the key is not loaded
zbookmark_phys_t zb; * or the encrypted filesystem is not mounted the error will be EACCES.
zep_to_zb(head_ds, zep, &zb); * In that case do not return an error.
if (copyout(&zb, (char *)uaddr + (*count - 1) */
* sizeof (zbookmark_phys_t), if (error == EACCES) {
sizeof (zbookmark_phys_t)) != 0) { dsl_dataset_rele(ds, FTAG);
dsl_dataset_rele(ds, FTAG); return (0);
return (SET_ERROR(EFAULT)); }
} if (error) {
(*count)--; dsl_dataset_rele(ds, FTAG);
} else { return (error);
(*count)++; }
if (zep->zb_birth == latest_txg) {
/* Block neither free nor rewritten. */
if (!only_count) {
zbookmark_phys_t zb;
zep_to_zb(head_ds, zep, &zb);
if (copyout(&zb, (char *)uaddr + (*count - 1)
* sizeof (zbookmark_phys_t),
sizeof (zbookmark_phys_t)) != 0) {
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EFAULT));
} }
check_snapshot = B_FALSE; (*count)--;
} else { } else {
ASSERT3U(zep->zb_birth, <, latest_txg); (*count)++;
txg_to_consider = latest_txg;
} }
check_snapshot = B_FALSE;
} else {
ASSERT3U(zep->zb_birth, <, latest_txg);
txg_to_consider = latest_txg;
} }
/* How many snapshots reference this block. */ /* How many snapshots reference this block. */
@ -439,9 +480,31 @@ process_error_block(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
uint64_t *count, void *uaddr, boolean_t only_count) uint64_t *count, void *uaddr, boolean_t only_count)
{ {
dsl_pool_t *dp = spa->spa_dsl_pool; dsl_pool_t *dp = spa->spa_dsl_pool;
dsl_pool_config_enter(dp, FTAG);
uint64_t top_affected_fs; uint64_t top_affected_fs;
/*
* If the zb_birth is 0 it means we failed to retrieve the birth txg
* of the block pointer. This happens when an encrypted filesystem is
* not mounted or when the key is not loaded. Do not proceed to
* check_filesystem(), instead do the accounting here.
*/
if (zep->zb_birth == 0) {
if (!only_count) {
zbookmark_phys_t zb;
zep_to_zb(head_ds, zep, &zb);
if (copyout(&zb, (char *)uaddr + (*count - 1)
* sizeof (zbookmark_phys_t),
sizeof (zbookmark_phys_t)) != 0) {
return (SET_ERROR(EFAULT));
}
(*count)--;
} else {
(*count)++;
}
return (0);
}
dsl_pool_config_enter(dp, FTAG);
int error = find_top_affected_fs(spa, head_ds, zep, &top_affected_fs); int error = find_top_affected_fs(spa, head_ds, zep, &top_affected_fs);
if (error == 0) if (error == 0)
error = check_filesystem(spa, top_affected_fs, zep, count, error = check_filesystem(spa, top_affected_fs, zep, count,
@ -497,6 +560,7 @@ get_errlist_size(spa_t *spa, avl_tree_t *tree)
zep.zb_object = se->se_bookmark.zb_object; zep.zb_object = se->se_bookmark.zb_object;
zep.zb_level = se->se_bookmark.zb_level; zep.zb_level = se->se_bookmark.zb_level;
zep.zb_blkid = se->se_bookmark.zb_blkid; zep.zb_blkid = se->se_bookmark.zb_blkid;
zep.zb_birth = 0;
/* /*
* If we cannot find out the head dataset and birth txg of * If we cannot find out the head dataset and birth txg of
@ -505,8 +569,10 @@ get_errlist_size(spa_t *spa, avl_tree_t *tree)
* sync_error_list() and written to the on-disk error log. * sync_error_list() and written to the on-disk error log.
*/ */
uint64_t head_ds_obj; uint64_t head_ds_obj;
if (get_head_and_birth_txg(spa, &zep, int error = get_head_and_birth_txg(spa, &zep,
se->se_bookmark.zb_objset, &head_ds_obj) == 0) se->se_bookmark.zb_objset, &head_ds_obj);
if (!error)
(void) process_error_block(spa, head_ds_obj, &zep, (void) process_error_block(spa, head_ds_obj, &zep,
&total, NULL, B_TRUE); &total, NULL, B_TRUE);
} }
@ -695,6 +761,7 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj,
zep.zb_object = zb.zb_object; zep.zb_object = zb.zb_object;
zep.zb_level = zb.zb_level; zep.zb_level = zb.zb_level;
zep.zb_blkid = zb.zb_blkid; zep.zb_blkid = zb.zb_blkid;
zep.zb_birth = 0;
/* /*
* We cannot use get_head_and_birth_txg() because it will * We cannot use get_head_and_birth_txg() because it will
@ -737,8 +804,11 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj,
rw_enter(&dn->dn_struct_rwlock, RW_READER); rw_enter(&dn->dn_struct_rwlock, RW_READER);
error = dbuf_dnode_findbp(dn, zep.zb_level, zep.zb_blkid, &bp, error = dbuf_dnode_findbp(dn, zep.zb_level, zep.zb_blkid, &bp,
NULL, NULL); NULL, NULL);
if (error == EACCES)
error = 0;
else if (!error)
zep.zb_birth = bp.blk_birth;
zep.zb_birth = bp.blk_birth;
rw_exit(&dn->dn_struct_rwlock); rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG); dnode_rele(dn, FTAG);
dsl_dataset_rele(ds, FTAG); dsl_dataset_rele(ds, FTAG);
@ -885,16 +955,16 @@ process_error_list(spa_t *spa, avl_tree_t *list, void *uaddr, uint64_t *count)
zep.zb_object = se->se_bookmark.zb_object; zep.zb_object = se->se_bookmark.zb_object;
zep.zb_level = se->se_bookmark.zb_level; zep.zb_level = se->se_bookmark.zb_level;
zep.zb_blkid = se->se_bookmark.zb_blkid; zep.zb_blkid = se->se_bookmark.zb_blkid;
zep.zb_birth = 0;
uint64_t head_ds_obj; uint64_t head_ds_obj;
int error = get_head_and_birth_txg(spa, &zep, int error = get_head_and_birth_txg(spa, &zep,
se->se_bookmark.zb_objset, &head_ds_obj); se->se_bookmark.zb_objset, &head_ds_obj);
if (error != 0)
return (error);
error = process_error_block(spa, head_ds_obj, &zep, count, if (!error)
uaddr, B_FALSE); error = process_error_block(spa, head_ds_obj, &zep,
if (error != 0) count, uaddr, B_FALSE);
if (error)
return (error); return (error);
} }
return (0); return (0);
@ -1013,6 +1083,7 @@ sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj, dmu_tx_t *tx)
zep.zb_object = se->se_bookmark.zb_object; zep.zb_object = se->se_bookmark.zb_object;
zep.zb_level = se->se_bookmark.zb_level; zep.zb_level = se->se_bookmark.zb_level;
zep.zb_blkid = se->se_bookmark.zb_blkid; zep.zb_blkid = se->se_bookmark.zb_blkid;
zep.zb_birth = 0;
/* /*
* If we cannot find out the head dataset and birth txg * If we cannot find out the head dataset and birth txg
@ -1024,7 +1095,7 @@ sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj, dmu_tx_t *tx)
uint64_t head_dataset_obj; uint64_t head_dataset_obj;
int error = get_head_and_birth_txg(spa, &zep, int error = get_head_and_birth_txg(spa, &zep,
se->se_bookmark.zb_objset, &head_dataset_obj); se->se_bookmark.zb_objset, &head_dataset_obj);
if (error != 0) if (error)
continue; continue;
uint64_t err_obj; uint64_t err_obj;

View File

@ -494,7 +494,7 @@ tags = ['functional', 'cli_root', 'zpool_split']
[tests/functional/cli_root/zpool_status] [tests/functional/cli_root/zpool_status]
tests = ['zpool_status_001_pos', 'zpool_status_002_pos', tests = ['zpool_status_001_pos', 'zpool_status_002_pos',
'zpool_status_003_pos', 'zpool_status_004_pos', 'zpool_status_003_pos', 'zpool_status_004_pos',
'zpool_status_features_001_pos'] 'zpool_status_005_pos', 'zpool_status_features_001_pos']
tags = ['functional', 'cli_root', 'zpool_status'] tags = ['functional', 'cli_root', 'zpool_status']
[tests/functional/cli_root/zpool_sync] [tests/functional/cli_root/zpool_sync]

View File

@ -1161,6 +1161,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_status/zpool_status_002_pos.ksh \ functional/cli_root/zpool_status/zpool_status_002_pos.ksh \
functional/cli_root/zpool_status/zpool_status_003_pos.ksh \ functional/cli_root/zpool_status/zpool_status_003_pos.ksh \
functional/cli_root/zpool_status/zpool_status_004_pos.ksh \ functional/cli_root/zpool_status/zpool_status_004_pos.ksh \
functional/cli_root/zpool_status/zpool_status_005_pos.ksh \
functional/cli_root/zpool_status/zpool_status_features_001_pos.ksh \ functional/cli_root/zpool_status/zpool_status_features_001_pos.ksh \
functional/cli_root/zpool_sync/cleanup.ksh \ functional/cli_root/zpool_sync/cleanup.ksh \
functional/cli_root/zpool_sync/setup.ksh \ functional/cli_root/zpool_sync/setup.ksh \

View File

@ -63,6 +63,7 @@ log_must zfs snapshot $TESTPOOL2@snap
log_must zfs clone $TESTPOOL2@snap $TESTPOOL2/clone log_must zfs clone $TESTPOOL2@snap $TESTPOOL2/clone
# Look to see that snapshot, clone and filesystem our files report errors # Look to see that snapshot, clone and filesystem our files report errors
log_must zpool status -v $TESTPOOL2
log_must eval "zpool status -v | grep '$TESTPOOL2@snap:/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2@snap:/10m_file'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2/clone/10m_file'"
log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'"

View File

@ -74,6 +74,7 @@ log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'"
# Check that enabling the feature reports the error properly. # Check that enabling the feature reports the error properly.
log_must zpool set feature@head_errlog=enabled $TESTPOOL2 log_must zpool set feature@head_errlog=enabled $TESTPOOL2
log_must zpool status -v $TESTPOOL2
log_must eval "zpool status -v | grep '$TESTPOOL2@snap:/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2@snap:/10m_file'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2/clone/10m_file'"
log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'"

View File

@ -0,0 +1,78 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2022 George Amanakis. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
#
# DESCRIPTION:
# Verify correct output with 'zpool status -v' after corrupting a file
#
# STRATEGY:
# 1. Create a pool, an ancrypted filesystem and a file
# 2. zinject checksum errors
# 3. Unmount the filesystem and unload the key
# 4. Scrub the pool
# 5. Verify we report errors in the pool in 'zpool status -v'
verify_runnable "both"
DISK=${DISKS%% *}
function cleanup
{
log_must zinject -c all
destroy_pool $TESTPOOL2
rm -f $TESTDIR/vdev_a
}
log_assert "Verify reporting errors with unloaded keys works"
log_onexit cleanup
typeset passphrase="password"
typeset file="/$TESTPOOL2/$TESTFS1/$TESTFILE0"
truncate -s $MINVDEVSIZE $TESTDIR/vdev_a
log_must zpool create -f -o feature@head_errlog=enabled $TESTPOOL2 $TESTDIR/vdev_a
log_must eval "echo $passphrase > /$TESTPOOL2/pwd"
log_must zfs create -o encryption=aes-256-ccm -o keyformat=passphrase \
-o keylocation=file:///$TESTPOOL2/pwd -o primarycache=none \
$TESTPOOL2/$TESTFS1
log_must dd if=/dev/urandom of=$file bs=1024 count=1024 oflag=sync
log_must eval "echo 'aaaaaaaa' >> "$file
corrupt_blocks_at_level $file 0
log_must zfs unmount $TESTPOOL2/$TESTFS1
log_must zfs unload-key $TESTPOOL2/$TESTFS1
log_must zpool sync $TESTPOOL2
log_must zpool scrub $TESTPOOL2
log_must zpool wait -t scrub $TESTPOOL2
log_must zpool status -v $TESTPOOL2
log_must eval "zpool status -v $TESTPOOL2 | \
grep \"Permanent errors have been detected\""
log_pass "Verify reporting errors with unloaded keys works"