zdb: Report bad label checksum

In case if all label checksums will be invalid on any vdev, the pool
will become unimportable. From other side zdb with -l option will not
provide any useful information why it happened. Add notifications
about corrupted label checksums.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Signed-off-by: Fedor Uporov <fuporov.vstack@gmail.com>
Closes #2509
Closes #12685
This commit is contained in:
Fedor Uporov 2021-11-10 11:22:00 -08:00 committed by GitHub
parent 6c8f03232a
commit 2a9c572059
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 143 additions and 6 deletions

View File

@ -4221,11 +4221,13 @@ print_label_numbers(char *prefix, cksum_record_t *rec)
typedef struct zdb_label { typedef struct zdb_label {
vdev_label_t label; vdev_label_t label;
uint64_t label_offset;
nvlist_t *config_nv; nvlist_t *config_nv;
cksum_record_t *config; cksum_record_t *config;
cksum_record_t *uberblocks[MAX_UBERBLOCK_COUNT]; cksum_record_t *uberblocks[MAX_UBERBLOCK_COUNT];
boolean_t header_printed; boolean_t header_printed;
boolean_t read_failed; boolean_t read_failed;
boolean_t cksum_valid;
} zdb_label_t; } zdb_label_t;
static void static void
@ -4239,7 +4241,8 @@ print_label_header(zdb_label_t *label, int l)
return; return;
(void) printf("------------------------------------\n"); (void) printf("------------------------------------\n");
(void) printf("LABEL %d\n", l); (void) printf("LABEL %d %s\n", l,
label->cksum_valid ? "" : "(Bad label cksum)");
(void) printf("------------------------------------\n"); (void) printf("------------------------------------\n");
label->header_printed = B_TRUE; label->header_printed = B_TRUE;
@ -4751,6 +4754,42 @@ zdb_copy_object(objset_t *os, uint64_t srcobj, char *destfile)
return (err); return (err);
} }
static boolean_t
label_cksum_valid(vdev_label_t *label, uint64_t offset)
{
zio_checksum_info_t *ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL];
zio_cksum_t expected_cksum;
zio_cksum_t actual_cksum;
zio_cksum_t verifier;
zio_eck_t *eck;
int byteswap;
void *data = (char *)label + offsetof(vdev_label_t, vl_vdev_phys);
eck = (zio_eck_t *)((char *)(data) + VDEV_PHYS_SIZE) - 1;
offset += offsetof(vdev_label_t, vl_vdev_phys);
ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0);
byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC));
if (byteswap)
byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));
expected_cksum = eck->zec_cksum;
eck->zec_cksum = verifier;
abd_t *abd = abd_get_from_buf(data, VDEV_PHYS_SIZE);
ci->ci_func[byteswap](abd, VDEV_PHYS_SIZE, NULL, &actual_cksum);
abd_free(abd);
if (byteswap)
byteswap_uint64_array(&expected_cksum, sizeof (zio_cksum_t));
if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
return (B_TRUE);
return (B_FALSE);
}
static int static int
dump_label(const char *dev) dump_label(const char *dev)
{ {
@ -4817,8 +4856,9 @@ dump_label(const char *dev)
/* /*
* 1. Read the label from disk * 1. Read the label from disk
* 2. Unpack the configuration and insert in config tree. * 2. Verify label cksum
* 3. Traverse all uberblocks and insert in uberblock tree. * 3. Unpack the configuration and insert in config tree.
* 4. Traverse all uberblocks and insert in uberblock tree.
*/ */
for (int l = 0; l < VDEV_LABELS; l++) { for (int l = 0; l < VDEV_LABELS; l++) {
zdb_label_t *label = &labels[l]; zdb_label_t *label = &labels[l];
@ -4829,8 +4869,10 @@ dump_label(const char *dev)
zio_cksum_t cksum; zio_cksum_t cksum;
vdev_t vd; vdev_t vd;
label->label_offset = vdev_label_offset(psize, l, 0);
if (pread64(fd, &label->label, sizeof (label->label), if (pread64(fd, &label->label, sizeof (label->label),
vdev_label_offset(psize, l, 0)) != sizeof (label->label)) { label->label_offset) != sizeof (label->label)) {
if (!dump_opt['q']) if (!dump_opt['q'])
(void) printf("failed to read label %d\n", l); (void) printf("failed to read label %d\n", l);
label->read_failed = B_TRUE; label->read_failed = B_TRUE;
@ -4839,6 +4881,8 @@ dump_label(const char *dev)
} }
label->read_failed = B_FALSE; label->read_failed = B_FALSE;
label->cksum_valid = label_cksum_valid(&label->label,
label->label_offset);
if (nvlist_unpack(buf, buflen, &config, 0) == 0) { if (nvlist_unpack(buf, buflen, &config, 0) == 0) {
nvlist_t *vdev_tree = NULL; nvlist_t *vdev_tree = NULL;

View File

@ -122,8 +122,9 @@ tags = ['functional', 'clean_mirror']
tests = ['zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', 'zdb_005_pos', tests = ['zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', 'zdb_005_pos',
'zdb_006_pos', 'zdb_args_neg', 'zdb_args_pos', 'zdb_006_pos', 'zdb_args_neg', 'zdb_args_pos',
'zdb_block_size_histogram', 'zdb_checksum', 'zdb_decompress', 'zdb_block_size_histogram', 'zdb_checksum', 'zdb_decompress',
'zdb_display_block', 'zdb_object_range_neg', 'zdb_object_range_pos', 'zdb_display_block', 'zdb_label_checksum', 'zdb_object_range_neg',
'zdb_objset_id', 'zdb_decompress_zstd', 'zdb_recover', 'zdb_recover_2'] 'zdb_object_range_pos', 'zdb_objset_id', 'zdb_decompress_zstd',
'zdb_recover', 'zdb_recover_2']
pre = pre =
post = post =
tags = ['functional', 'cli_root', 'zdb'] tags = ['functional', 'cli_root', 'zdb']

View File

@ -652,3 +652,16 @@ function corrupt_blocks_at_level # input_file corrupt_level
# This is necessary for pools made of loop devices. # This is necessary for pools made of loop devices.
sync sync
} }
function corrupt_label_checksum # label_number vdev_path
{
typeset label_size=$((256*1024))
typeset vdev_size=$(stat_size ${2})
typeset -a offsets=("$((128*1024 - 32))" \
"$(($label_size + (128*1024 - 32)))" \
"$(($vdev_size - $label_size - (128*1024 + 32)))" \
"$(($vdev_size - (128*1024 + 32)))")
dd if=/dev/urandom of=${2} seek=${offsets[$1]} bs=1 count=32 \
conv=notrunc
}

View File

@ -14,6 +14,7 @@ dist_pkgdata_SCRIPTS = \
zdb_object_range_neg.ksh \ zdb_object_range_neg.ksh \
zdb_object_range_pos.ksh \ zdb_object_range_pos.ksh \
zdb_display_block.ksh \ zdb_display_block.ksh \
zdb_label_checksum.ksh \
zdb_objset_id.ksh \ zdb_objset_id.ksh \
zdb_recover.ksh \ zdb_recover.ksh \
zdb_recover_2.ksh zdb_recover_2.ksh

View File

@ -0,0 +1,78 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2021 by vStack. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/include/blkdev.shlib
#
# Description:
# zdb -l will report corrupted labels checksums
#
# Strategy:
# 1. Create pool with some number of vdevs and export it
# 2. Corrupt label 0 and label 1, check that corrupted labels are reported
# 3. Check that pool still be imported correctly
# 4. Corrupt all labels, check that all corrupted labels are reported
# 5. Check that pool cannot be imported
#
log_assert "Verify zdb -l will report corrupted labels checksums"
log_onexit cleanup
VIRTUAL_DISK=$TEST_BASE_DIR/disk
function cleanup
{
poolexists $TESTPOOL && log_must destroy_pool $TESTPOOL
[[ -f $VIRTUAL_DISK ]] && log_must rm $VIRTUAL_DISK
}
verify_runnable "global"
log_must truncate -s $(($MINVDEVSIZE * 8)) $VIRTUAL_DISK
log_must zpool create $TESTPOOL $VIRTUAL_DISK
log_must zpool export $TESTPOOL
corrupt_label_checksum 0 $VIRTUAL_DISK
corrupt_label_checksum 1 $VIRTUAL_DISK
msg_count=$(zdb -l $VIRTUAL_DISK | grep -c '(Bad label cksum)')
[ $msg_count -ne 1 ] && \
log_fail "zdb -l produces an incorrect number of corrupted labels."
msg_count=$(zdb -lll $VIRTUAL_DISK | grep -c '(Bad label cksum)')
[ $msg_count -ne 2 ] && \
log_fail "zdb -l produces an incorrect number of corrupted labels."
log_must zpool import $TESTPOOL -d $TEST_BASE_DIR
log_must zpool export $TESTPOOL
corrupt_label_checksum 0 $VIRTUAL_DISK
corrupt_label_checksum 1 $VIRTUAL_DISK
corrupt_label_checksum 2 $VIRTUAL_DISK
corrupt_label_checksum 3 $VIRTUAL_DISK
msg_count=$(zdb -lll $VIRTUAL_DISK | grep -c '(Bad label cksum)')
[ $msg_count -ne 4 ] && \
log_fail "zdb -l produces an incorrect number of corrupted labels."
log_mustnot zpool import $TESTPOOL -d $TEST_BASE_DIR
cleanup
log_pass "zdb -l bad cksum report is correct."