Fix reporting of CKSUM errors in indirect vdevs
When removing and subsequently reattaching a vdev, CKSUM errors may occur as vdev_indirect_read_all() reads from all children of a mirror in case of a resilver. Fix this by checking whether a child is missing the data and setting a flag (ic_error) which is then checked in vdev_indirect_repair() and suppresses incrementing the checksum counter. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: George Amanakis <gamanakis@gmail.com> Closes #11277
This commit is contained in:
parent
1ad07b01bc
commit
c76a40bfda
|
@ -239,6 +239,7 @@ typedef struct indirect_child {
|
||||||
*/
|
*/
|
||||||
struct indirect_child *ic_duplicate;
|
struct indirect_child *ic_duplicate;
|
||||||
list_node_t ic_node; /* node on is_unique_child */
|
list_node_t ic_node; /* node on is_unique_child */
|
||||||
|
int ic_error; /* set when a child does not contain the data */
|
||||||
} indirect_child_t;
|
} indirect_child_t;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1272,15 +1273,14 @@ vdev_indirect_read_all(zio_t *zio)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note, we may read from a child whose DTL
|
* If a child is missing the data, set ic_error. Used
|
||||||
* indicates that the data may not be present here.
|
* in vdev_indirect_repair(). We perform the read
|
||||||
* While this might result in a few i/os that will
|
* nevertheless which provides the opportunity to
|
||||||
* likely return incorrect data, it simplifies the
|
* reconstruct the split block if at all possible.
|
||||||
* code since we can treat scrub and resilver
|
|
||||||
* identically. (The incorrect data will be
|
|
||||||
* detected and ignored when we verify the
|
|
||||||
* checksum.)
|
|
||||||
*/
|
*/
|
||||||
|
if (vdev_dtl_contains(ic->ic_vdev, DTL_MISSING,
|
||||||
|
zio->io_txg, 1))
|
||||||
|
ic->ic_error = SET_ERROR(ESTALE);
|
||||||
|
|
||||||
ic->ic_data = abd_alloc_sametype(zio->io_abd,
|
ic->ic_data = abd_alloc_sametype(zio->io_abd,
|
||||||
is->is_size);
|
is->is_size);
|
||||||
|
@ -1410,7 +1410,11 @@ vdev_indirect_checksum_error(zio_t *zio,
|
||||||
* Issue repair i/os for any incorrect copies. We do this by comparing
|
* Issue repair i/os for any incorrect copies. We do this by comparing
|
||||||
* each split segment's correct data (is_good_child's ic_data) with each
|
* each split segment's correct data (is_good_child's ic_data) with each
|
||||||
* other copy of the data. If they differ, then we overwrite the bad data
|
* other copy of the data. If they differ, then we overwrite the bad data
|
||||||
* with the good copy. Note that we do this without regard for the DTL's,
|
* with the good copy. The DTL is checked in vdev_indirect_read_all() and
|
||||||
|
* if a vdev is missing a copy of the data we set ic_error and the read is
|
||||||
|
* performed. This provides the opportunity to reconstruct the split block
|
||||||
|
* if at all possible. ic_error is checked here and if set it suppresses
|
||||||
|
* incrementing the checksum counter. Aside from this DTLs are not checked,
|
||||||
* which simplifies this code and also issues the optimal number of writes
|
* which simplifies this code and also issues the optimal number of writes
|
||||||
* (based on which copies actually read bad data, as opposed to which we
|
* (based on which copies actually read bad data, as opposed to which we
|
||||||
* think might be wrong). For the same reason, we always use
|
* think might be wrong). For the same reason, we always use
|
||||||
|
@ -1447,6 +1451,14 @@ vdev_indirect_repair(zio_t *zio)
|
||||||
ZIO_FLAG_IO_REPAIR | ZIO_FLAG_SELF_HEAL,
|
ZIO_FLAG_IO_REPAIR | ZIO_FLAG_SELF_HEAL,
|
||||||
NULL, NULL));
|
NULL, NULL));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If ic_error is set the current child does not have
|
||||||
|
* a copy of the data, so suppress incrementing the
|
||||||
|
* checksum counter.
|
||||||
|
*/
|
||||||
|
if (ic->ic_error == ESTALE)
|
||||||
|
continue;
|
||||||
|
|
||||||
vdev_indirect_checksum_error(zio, is, ic);
|
vdev_indirect_checksum_error(zio, is, ic);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -756,7 +756,7 @@ tests = ['removal_all_vdev', 'removal_cancel', 'removal_check_space',
|
||||||
'removal_with_send_recv', 'removal_with_snapshot',
|
'removal_with_send_recv', 'removal_with_snapshot',
|
||||||
'removal_with_write', 'removal_with_zdb', 'remove_expanded',
|
'removal_with_write', 'removal_with_zdb', 'remove_expanded',
|
||||||
'remove_mirror', 'remove_mirror_sanity', 'remove_raidz',
|
'remove_mirror', 'remove_mirror_sanity', 'remove_raidz',
|
||||||
'remove_indirect']
|
'remove_indirect', 'remove_attach_mirror']
|
||||||
tags = ['functional', 'removal']
|
tags = ['functional', 'removal']
|
||||||
|
|
||||||
[tests/functional/rename_dirs]
|
[tests/functional/rename_dirs]
|
||||||
|
|
|
@ -29,7 +29,8 @@ dist_pkgdata_SCRIPTS = \
|
||||||
removal_with_send.ksh removal_with_send_recv.ksh \
|
removal_with_send.ksh removal_with_send_recv.ksh \
|
||||||
removal_with_snapshot.ksh removal_with_write.ksh \
|
removal_with_snapshot.ksh removal_with_write.ksh \
|
||||||
removal_with_zdb.ksh remove_mirror.ksh remove_mirror_sanity.ksh \
|
removal_with_zdb.ksh remove_mirror.ksh remove_mirror_sanity.ksh \
|
||||||
remove_raidz.ksh remove_expanded.ksh remove_indirect.ksh
|
remove_raidz.ksh remove_expanded.ksh remove_indirect.ksh \
|
||||||
|
remove_attach_mirror.ksh
|
||||||
|
|
||||||
dist_pkgdata_DATA = \
|
dist_pkgdata_DATA = \
|
||||||
removal.kshlib
|
removal.kshlib
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
#! /bin/ksh -p
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# This file and its contents are supplied under the terms of the
|
||||||
|
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||||
|
# You may only use this file in accordance with the terms of version
|
||||||
|
# 1.0 of the CDDL.
|
||||||
|
#
|
||||||
|
# A full copy of the text of the CDDL should have accompanied this
|
||||||
|
# source. A copy of the CDDL is also available via the Internet at
|
||||||
|
# http://www.illumos.org/license/CDDL.
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright (c) 2020, George Amanakis. All rights reserved.
|
||||||
|
#
|
||||||
|
|
||||||
|
. $STF_SUITE/include/libtest.shlib
|
||||||
|
. $STF_SUITE/tests/functional/removal/removal.kshlib
|
||||||
|
|
||||||
|
#
|
||||||
|
# DESCRIPTION:
|
||||||
|
# Resilvering results in no CKSUM errors in pools with indirect vdevs.
|
||||||
|
#
|
||||||
|
# STRATEGY:
|
||||||
|
# 1. Create a pool with two top-vdevs
|
||||||
|
# 2. Write some files
|
||||||
|
# 3. Remove one of the top-vdevs
|
||||||
|
# 4. Reattach it to make a mirror
|
||||||
|
#
|
||||||
|
|
||||||
|
TMPDIR=${TMPDIR:-$TEST_BASE_DIR}
|
||||||
|
|
||||||
|
DISK1="$TMPDIR/dsk1"
|
||||||
|
DISK2="$TMPDIR/dsk2"
|
||||||
|
DISKS="$DISK1 $DISK2"
|
||||||
|
|
||||||
|
# fio options
|
||||||
|
export DIRECTORY=/$TESTPOOL
|
||||||
|
export NUMJOBS=16
|
||||||
|
export RUNTIME=10
|
||||||
|
export PERF_RANDSEED=1234
|
||||||
|
export PERF_COMPPERCENT=66
|
||||||
|
export PERF_COMPCHUNK=0
|
||||||
|
export BLOCKSIZE=4K
|
||||||
|
export SYNC_TYPE=0
|
||||||
|
export DIRECT=1
|
||||||
|
export FILE_SIZE=128M
|
||||||
|
|
||||||
|
log_must mkfile 4g $DISK1
|
||||||
|
log_must mkfile 4g $DISK2
|
||||||
|
|
||||||
|
function cleanup
|
||||||
|
{
|
||||||
|
default_cleanup_noexit
|
||||||
|
log_must rm -f $DISKS
|
||||||
|
}
|
||||||
|
|
||||||
|
log_must zpool create -O recordsize=4k $TESTPOOL $DISK1 $DISK2
|
||||||
|
log_onexit cleanup
|
||||||
|
|
||||||
|
log_must fio $FIO_SCRIPTS/mkfiles.fio
|
||||||
|
log_must fio $FIO_SCRIPTS/sequential_reads.fio
|
||||||
|
|
||||||
|
log_must zpool remove -w $TESTPOOL $DISK2
|
||||||
|
log_must zpool attach -w $TESTPOOL $DISK1 $DISK2
|
||||||
|
|
||||||
|
verify_pool $TESTPOOL
|
||||||
|
|
||||||
|
log_pass "Resilvering results in no CKSUM errors with indirect vdevs"
|
Loading…
Reference in New Issue