Fix dRAID self-healing short columns
When dRAID performs a normal read operation only the data columns in the raid map are read from disk. This is enough information to calculate the checksum, verify it, and return the needed data to the application. It's only in the event of a checksum failure that the additional parity and any empty columns must be read since they are required for parity reconstruction. Reading these additional columns is handled by vdev_raidz_read_all() which calls vdev_draid_map_alloc_empty() to expand the raid_map_t and submit IOs for the missing columns. This all works correctly, but it fails to account for any "short" columns. These are data columns which are padded with a empty skip sector at the end. Since that empty sector is not needed for a normal read it's not read when columns is first read from disk. However, like the parity and empty columns the skip sector is needed to perform reconstruction. The fix is to mark any "short" columns as never being read by clearing the rc_tried flag when expanding the raid_map_t. This will cause the entire column to re-read from disk in the event of a checksum failure allowing the self-healing functionality to repair the block. Note that this only effects the self-healing feature because when scrubbing a pool the parity, data, and empty columns are all read initially to verify their contents. Furthermore, only blocks which contain "short" columns would be effected, and only when the memory backing the skip sector wasn't already zeroed out. This change extends the existing redundancy_raidz.ksh test case to verify self-healing (as well as resilver and scrub). Then applies the same test case to dRAID with a slightly modified version of the test script called redundancy_draid.ksh. The unused variable combrec was also removed from both test cases. Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Reviewed-by: Mark Maybee <mark.maybee@delphix.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #12010
This commit is contained in:
parent
b1dd6351bb
commit
2085a5f992
|
@ -812,7 +812,12 @@ vdev_draid_map_alloc_empty(zio_t *zio, raidz_row_t *rr)
|
|||
/* this is a "big column", nothing to add */
|
||||
ASSERT3P(rc->rc_abd, !=, NULL);
|
||||
} else {
|
||||
/* short data column, add a skip sector */
|
||||
/*
|
||||
* short data column, add a skip sector and clear
|
||||
* rc_tried to force the entire column to be re-read
|
||||
* thereby including the missing skip sector data
|
||||
* which is needed for reconstruction.
|
||||
*/
|
||||
ASSERT3U(rc->rc_size + skip_size, ==, parity_size);
|
||||
ASSERT3U(rr->rr_nempty, !=, 0);
|
||||
ASSERT3P(rc->rc_abd, !=, NULL);
|
||||
|
@ -823,6 +828,7 @@ vdev_draid_map_alloc_empty(zio_t *zio, raidz_row_t *rr)
|
|||
abd_gang_add(rc->rc_abd, abd_get_offset_size(
|
||||
rr->rr_abd_empty, skip_off, skip_size), B_TRUE);
|
||||
skip_off += skip_size;
|
||||
rc->rc_tried = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -741,8 +741,8 @@ tests = ['raidz_001_neg', 'raidz_002_pos', 'raidz_003_pos', 'raidz_004_pos']
|
|||
tags = ['functional', 'raidz']
|
||||
|
||||
[tests/functional/redundancy]
|
||||
tests = ['redundancy_draid1', 'redundancy_draid2', 'redundancy_draid3',
|
||||
'redundancy_draid_spare1', 'redundancy_draid_spare2',
|
||||
tests = ['redundancy_draid', 'redundancy_draid1', 'redundancy_draid2',
|
||||
'redundancy_draid3', 'redundancy_draid_spare1', 'redundancy_draid_spare2',
|
||||
'redundancy_draid_spare3', 'redundancy_mirror', 'redundancy_raidz',
|
||||
'redundancy_raidz1', 'redundancy_raidz2', 'redundancy_raidz3',
|
||||
'redundancy_stripe']
|
||||
|
|
|
@ -2,6 +2,7 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/redundancy
|
|||
dist_pkgdata_SCRIPTS = \
|
||||
setup.ksh \
|
||||
cleanup.ksh \
|
||||
redundancy_draid.ksh \
|
||||
redundancy_draid1.ksh \
|
||||
redundancy_draid2.ksh \
|
||||
redundancy_draid3.ksh \
|
||||
|
|
|
@ -0,0 +1,248 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2020 by vStack. All rights reserved.
|
||||
# Copyright (c) 2021 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# dRAID should provide redundancy
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create block device files for the test draid pool
|
||||
# 2. For each parity value [1..3]
|
||||
# - create draid pool
|
||||
# - fill it with some directories/files
|
||||
# - verify self-healing by overwriting devices
|
||||
# - verify resilver by replacing devices
|
||||
# - verify scrub by zeroing devices
|
||||
# - destroy the draid pool
|
||||
|
||||
typeset -r devs=6
|
||||
typeset -r dev_size_mb=512
|
||||
|
||||
typeset -a disks
|
||||
|
||||
prefetch_disable=$(get_tunable PREFETCH_DISABLE)
|
||||
|
||||
function cleanup
|
||||
{
|
||||
poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"
|
||||
|
||||
for i in {0..$devs}; do
|
||||
rm -f "$TEST_BASE_DIR/dev-$i"
|
||||
done
|
||||
|
||||
set_tunable32 PREFETCH_DISABLE $prefetch_disable
|
||||
}
|
||||
|
||||
function test_selfheal # <pool> <parity> <dir>
|
||||
{
|
||||
typeset pool=$1
|
||||
typeset nparity=$2
|
||||
typeset dir=$3
|
||||
|
||||
log_must zpool export $pool
|
||||
|
||||
for (( i=0; i<$nparity; i=i+1 )); do
|
||||
log_must dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
|
||||
bs=1M seek=4 count=$(($dev_size_mb-4))
|
||||
done
|
||||
|
||||
log_must zpool import -o cachefile=none -d $dir $pool
|
||||
|
||||
typeset mntpnt=$(get_prop mountpoint $pool/fs)
|
||||
log_must find $mntpnt -type f -exec cksum {} + >> /dev/null 2>&1
|
||||
log_must check_pool_status $pool "errors" "No known data errors"
|
||||
|
||||
#
|
||||
# Scrub the pool because the find command will only self-heal blocks
|
||||
# from the files which were read. Before overwriting additional
|
||||
# devices we need to repair all of the blocks in the pool.
|
||||
#
|
||||
log_must zpool scrub -w $pool
|
||||
log_must check_pool_status $pool "errors" "No known data errors"
|
||||
|
||||
log_must zpool clear $pool
|
||||
|
||||
log_must zpool export $pool
|
||||
|
||||
for (( i=$nparity; i<$nparity*2; i=i+1 )); do
|
||||
log_must dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
|
||||
bs=1M seek=4 count=$(($dev_size_mb-4))
|
||||
done
|
||||
|
||||
log_must zpool import -o cachefile=none -d $dir $pool
|
||||
|
||||
typeset mntpnt=$(get_prop mountpoint $pool/fs)
|
||||
log_must find $mntpnt -type f -exec cksum {} + >> /dev/null 2>&1
|
||||
log_must check_pool_status $pool "errors" "No known data errors"
|
||||
|
||||
log_must zpool scrub -w $pool
|
||||
log_must check_pool_status $pool "errors" "No known data errors"
|
||||
|
||||
log_must zpool clear $pool
|
||||
}
|
||||
|
||||
function test_resilver # <pool> <parity> <dir>
|
||||
{
|
||||
typeset pool=$1
|
||||
typeset nparity=$2
|
||||
typeset dir=$3
|
||||
|
||||
for (( i=0; i<$nparity; i=i+1 )); do
|
||||
log_must zpool offline $pool $dir/dev-$i
|
||||
done
|
||||
|
||||
log_must zpool export $pool
|
||||
|
||||
for (( i=0; i<$nparity; i=i+1 )); do
|
||||
log_must zpool labelclear -f $dir/dev-$i
|
||||
done
|
||||
|
||||
log_must zpool import -o cachefile=none -d $dir $pool
|
||||
|
||||
for (( i=0; i<$nparity; i=i+1 )); do
|
||||
log_must zpool replace -fw $pool $dir/dev-$i
|
||||
done
|
||||
|
||||
log_must check_pool_status $pool "errors" "No known data errors"
|
||||
resilver_cksum=$(cksum_pool $pool)
|
||||
if [[ $resilver_cksum != 0 ]]; then
|
||||
log_must zpool status -v $pool
|
||||
log_fail "resilver cksum errors: $resilver_cksum"
|
||||
fi
|
||||
|
||||
log_must zpool clear $pool
|
||||
|
||||
for (( i=$nparity; i<$nparity*2; i=i+1 )); do
|
||||
log_must zpool offline $pool $dir/dev-$i
|
||||
done
|
||||
|
||||
log_must zpool export $pool
|
||||
|
||||
for (( i=$nparity; i<$nparity*2; i=i+1 )); do
|
||||
log_must zpool labelclear -f $dir/dev-$i
|
||||
done
|
||||
|
||||
log_must zpool import -o cachefile=none -d $dir $pool
|
||||
|
||||
for (( i=$nparity; i<$nparity*2; i=i+1 )); do
|
||||
log_must zpool replace -fw $pool $dir/dev-$i
|
||||
done
|
||||
|
||||
log_must check_pool_status $pool "errors" "No known data errors"
|
||||
resilver_cksum=$(cksum_pool $pool)
|
||||
if [[ $resilver_cksum != 0 ]]; then
|
||||
log_must zpool status -v $pool
|
||||
log_fail "resilver cksum errors: $resilver_cksum"
|
||||
fi
|
||||
|
||||
log_must zpool clear $pool
|
||||
}
|
||||
|
||||
function test_scrub # <pool> <parity> <dir>
|
||||
{
|
||||
typeset pool=$1
|
||||
typeset nparity=$2
|
||||
typeset dir=$3
|
||||
|
||||
log_must zpool export $pool
|
||||
|
||||
for (( i=0; i<$nparity; i=i+1 )); do
|
||||
dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
|
||||
bs=1M seek=4 count=$(($dev_size_mb-4))
|
||||
done
|
||||
|
||||
log_must zpool import -o cachefile=none -d $dir $pool
|
||||
|
||||
log_must zpool scrub -w $pool
|
||||
log_must check_pool_status $pool "errors" "No known data errors"
|
||||
|
||||
log_must zpool clear $pool
|
||||
|
||||
log_must zpool export $pool
|
||||
|
||||
for (( i=$nparity; i<$nparity*2; i=i+1 )); do
|
||||
dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
|
||||
bs=1M seek=4 count=$(($dev_size_mb-4))
|
||||
done
|
||||
|
||||
log_must zpool import -o cachefile=none -d $dir $pool
|
||||
|
||||
log_must zpool scrub -w $pool
|
||||
log_must check_pool_status $pool "errors" "No known data errors"
|
||||
|
||||
log_must zpool clear $pool
|
||||
}
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must set_tunable32 PREFETCH_DISABLE 1
|
||||
|
||||
# Disk files which will be used by pool
|
||||
for i in {0..$(($devs - 1))}; do
|
||||
device=$TEST_BASE_DIR/dev-$i
|
||||
log_must truncate -s ${dev_size_mb}M $device
|
||||
disks[${#disks[*]}+1]=$device
|
||||
done
|
||||
|
||||
# Disk file which will be attached
|
||||
log_must truncate -s 512M $TEST_BASE_DIR/dev-$devs
|
||||
|
||||
for nparity in 1 2 3; do
|
||||
raid=draid$nparity
|
||||
dir=$TEST_BASE_DIR
|
||||
|
||||
log_must zpool create -f -o cachefile=none $TESTPOOL $raid ${disks[@]}
|
||||
log_must zfs set primarycache=metadata $TESTPOOL
|
||||
|
||||
log_must zfs create $TESTPOOL/fs
|
||||
log_must fill_fs /$TESTPOOL/fs 1 512 100 1024 R
|
||||
|
||||
log_must zfs create -o compress=on $TESTPOOL/fs2
|
||||
log_must fill_fs /$TESTPOOL/fs2 1 512 100 1024 R
|
||||
|
||||
log_must zfs create -o compress=on -o recordsize=8k $TESTPOOL/fs3
|
||||
log_must fill_fs /$TESTPOOL/fs3 1 512 100 1024 R
|
||||
|
||||
typeset pool_size=$(get_pool_prop size $TESTPOOL)
|
||||
|
||||
log_must zpool export $TESTPOOL
|
||||
log_must zpool import -o cachefile=none -d $dir $TESTPOOL
|
||||
|
||||
log_must check_pool_status $TESTPOOL "errors" "No known data errors"
|
||||
|
||||
test_selfheal $TESTPOOL $nparity $dir
|
||||
test_resilver $TESTPOOL $nparity $dir
|
||||
test_scrub $TESTPOOL $nparity $dir
|
||||
|
||||
log_must zpool destroy "$TESTPOOL"
|
||||
done
|
||||
|
||||
log_pass "draid redundancy test succeeded."
|
|
@ -23,6 +23,7 @@
|
|||
#
|
||||
# Copyright (c) 2020 by vStack. All rights reserved.
|
||||
# Copyright (c) 2021 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
@ -37,6 +38,7 @@
|
|||
# 2. For each parity value [1..3]
|
||||
# - create raidz pool
|
||||
# - fill it with some directories/files
|
||||
# - verify self-healing by overwriting devices
|
||||
# - verify resilver by replacing devices
|
||||
# - verify scrub by zeroing devices
|
||||
# - destroy the raidz pool
|
||||
|
@ -59,6 +61,54 @@ function cleanup
|
|||
set_tunable32 PREFETCH_DISABLE $prefetch_disable
|
||||
}
|
||||
|
||||
function test_selfheal # <pool> <parity> <dir>
|
||||
{
|
||||
typeset pool=$1
|
||||
typeset nparity=$2
|
||||
typeset dir=$3
|
||||
|
||||
log_must zpool export $pool
|
||||
|
||||
for (( i=0; i<$nparity; i=i+1 )); do
|
||||
log_must dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
|
||||
bs=1M seek=4 count=$(($dev_size_mb-4))
|
||||
done
|
||||
|
||||
log_must zpool import -o cachefile=none -d $dir $pool
|
||||
|
||||
typeset mntpnt=$(get_prop mountpoint $pool/fs)
|
||||
log_must find $mntpnt -type f -exec cksum {} + >> /dev/null 2>&1
|
||||
log_must check_pool_status $pool "errors" "No known data errors"
|
||||
|
||||
#
|
||||
# Scrub the pool because the find command will only self-heal blocks
|
||||
# from the files which were read. Before overwriting additional
|
||||
# devices we need to repair all of the blocks in the pool.
|
||||
#
|
||||
log_must zpool scrub -w $pool
|
||||
log_must check_pool_status $pool "errors" "No known data errors"
|
||||
|
||||
log_must zpool clear $pool
|
||||
|
||||
log_must zpool export $pool
|
||||
|
||||
for (( i=$nparity; i<$nparity*2; i=i+1 )); do
|
||||
log_must dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
|
||||
bs=1M seek=4 count=$(($dev_size_mb-4))
|
||||
done
|
||||
|
||||
log_must zpool import -o cachefile=none -d $dir $pool
|
||||
|
||||
typeset mntpnt=$(get_prop mountpoint $pool/fs)
|
||||
log_must find $mntpnt -type f -exec cksum {} + >> /dev/null 2>&1
|
||||
log_must check_pool_status $pool "errors" "No known data errors"
|
||||
|
||||
log_must zpool scrub -w $pool
|
||||
log_must check_pool_status $pool "errors" "No known data errors"
|
||||
|
||||
log_must zpool clear $pool
|
||||
}
|
||||
|
||||
function test_resilver # <pool> <parity> <dir>
|
||||
{
|
||||
typeset pool=$1
|
||||
|
@ -121,7 +171,6 @@ function test_scrub # <pool> <parity> <dir>
|
|||
typeset pool=$1
|
||||
typeset nparity=$2
|
||||
typeset dir=$3
|
||||
typeset combrec=$4
|
||||
|
||||
log_must zpool export $pool
|
||||
|
||||
|
@ -189,6 +238,7 @@ for nparity in 1 2 3; do
|
|||
|
||||
log_must check_pool_status $TESTPOOL "errors" "No known data errors"
|
||||
|
||||
test_selfheal $TESTPOOL $nparity $dir
|
||||
test_resilver $TESTPOOL $nparity $dir
|
||||
test_scrub $TESTPOOL $nparity $dir
|
||||
|
||||
|
|
Loading…
Reference in New Issue