nopwrites on dmu_sync-ed blocks can result in a panic

After device removal, performing nopwrites on a dmu_sync-ed block
will result in a panic. This panic can show up in two ways:
1. an attempt to issue an IOCTL in vdev_indirect_io_start()
2. a failed comparison of zio->io_bp and zio->io_bp_orig in
   zio_done()
To resolve both of these panics, nopwrites of blocks on indirect
vdevs should be ignored and new allocations should be performed on
concrete vdevs.

Reviewed-by: Igor Kozhukhov <igor@dilos.org>
Reviewed-by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Don Brady <don.brady@delphix.com>
Signed-off-by: George Wilson <gwilson@delphix.com>
Closes #8957
This commit is contained in:
George Wilson 2019-06-28 15:40:24 -04:00 committed by Brian Behlendorf
parent 765d1f0644
commit 681a85cb01
4 changed files with 103 additions and 2 deletions

View File

@ -2856,6 +2856,20 @@ zio_nop_write(zio_t *zio)
ASSERT(bcmp(&bp->blk_prop, &bp_orig->blk_prop,
sizeof (uint64_t)) == 0);
/*
* If we're overwriting a block that is currently on an
* indirect vdev, then ignore the nopwrite request and
* allow a new block to be allocated on a concrete vdev.
*/
spa_config_enter(zio->io_spa, SCL_VDEV, FTAG, RW_READER);
vdev_t *tvd = vdev_lookup_top(zio->io_spa,
DVA_GET_VDEV(&bp->blk_dva[0]));
if (tvd->vdev_ops == &vdev_indirect_ops) {
spa_config_exit(zio->io_spa, SCL_VDEV, FTAG);
return (zio);
}
spa_config_exit(zio->io_spa, SCL_VDEV, FTAG);
*bp = *bp_orig;
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
zio->io_flags |= ZIO_FLAG_NOPWRITE;

View File

@ -763,7 +763,7 @@ tags = ['functional', 'refreserv']
pre =
tests = ['removal_all_vdev', 'removal_check_space',
'removal_condense_export', 'removal_multiple_indirection',
'removal_remap_deadlists',
'removal_nopwrite', 'removal_remap_deadlists',
'removal_resume_export', 'removal_sanity', 'removal_with_add',
'removal_with_create_fs', 'removal_with_dedup',
'removal_with_errors', 'removal_with_export',

View File

@ -18,7 +18,7 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/removal
dist_pkgdata_SCRIPTS = \
cleanup.ksh removal_all_vdev.ksh removal_check_space.ksh \
removal_condense_export.ksh removal_multiple_indirection.ksh \
removal_remap_deadlists.ksh \
removal_nopwrite.ksh removal_remap_deadlists.ksh \
removal_reservation.ksh removal_resume_export.ksh \
removal_sanity.ksh removal_with_add.ksh removal_with_create_fs.ksh \
removal_with_dedup.ksh removal_with_errors.ksh \

View File

@ -0,0 +1,87 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#
#
# Copyright (c) 2019 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/removal/removal.kshlib
. $STF_SUITE/tests/functional/nopwrite/nopwrite.shlib
default_setup_noexit "$DISKS"
log_onexit default_cleanup_noexit
BLOCKSIZE=8192
origin="$TESTPOOL/$TESTFS"
log_must zfs set compress=on $origin
log_must zfs set checksum=edonr $origin
log_must zfs set recordsize=8k $origin
dd if=/dev/urandom of=$TESTDIR/file_8k bs=1024k count=$MEGS oflag=sync \
conv=notrunc >/dev/null 2>&1 || log_fail "dd into $TESTDIR/file failed."
log_must zfs set recordsize=128k $origin
dd if=/dev/urandom of=$TESTDIR/file_128k bs=1024k count=$MEGS oflag=sync \
conv=notrunc >/dev/null 2>&1 || log_fail "dd into $TESTDIR/file failed."
zfs snapshot $origin@a || log_fail "zfs snap failed"
log_must zfs clone $origin@a $origin/clone
#
# Verify that nopwrites work prior to removal
#
log_must zfs set recordsize=8k $origin/clone
dd if=/$TESTDIR/file_8k of=/$TESTDIR/clone/file_8k bs=1024k \
oflag=sync conv=notrunc >/dev/null 2>&1 || log_fail "dd failed."
log_must verify_nopwrite $origin $origin@a $origin/clone
log_must zfs set recordsize=128k $origin/clone
dd if=/$TESTDIR/file_128k of=/$TESTDIR/clone/file_128k bs=1024k \
oflag=sync conv=notrunc >/dev/null 2>&1 || log_fail "dd failed."
log_must verify_nopwrite $origin $origin@a $origin/clone
#
# Remove a device before testing nopwrites again
#
log_must zpool remove $TESTPOOL $REMOVEDISK
log_must wait_for_removal $TESTPOOL
log_mustnot vdevs_in_pool $TESTPOOL $REMOVEDISK
#
# Normally, we expect nopwrites to avoid allocating new blocks, but
# after a device has been removed the DVAs will get remapped when
# a L0's indirect bloock is written. This will negate the effects
# of nopwrite and should result in new allocations.
#
#
# Perform a direct zil nopwrite test
#
log_must zfs set recordsize=8k $origin/clone
dd if=/$TESTDIR/file_8k of=/$TESTDIR/clone/file_8k bs=1024k \
oflag=sync conv=notrunc >/dev/null 2>&1 || log_fail "dd failed."
log_mustnot verify_nopwrite $origin $origin@a $origin/clone
#
# Perform an indirect zil nopwrite test
#
log_must zfs set recordsize=128k $origin/clone
dd if=/$TESTDIR/file_128k of=/$TESTDIR/clone/file_128k bs=1024k \
oflag=sync conv=notrunc >/dev/null 2>&1 || log_fail "dd failed."
log_mustnot verify_nopwrite $origin $origin@a $origin/clone
log_pass "Remove works with nopwrite."