OpenZFS 9591 - ms_shift can be incorrectly changed

ms_shift can be incorrectly changed changed in MOS config for
indirect vdevs that have been historically expanded

According to spa_config_update() we expect new vdevs to have
vdev_ms_array equal to 0 and then we go ahead and set their metaslab
size. The problem is that indirect vdevs also have vdev_ms_array == 0
because their metaslabs are destroyed once their removal is done.

As a result, if a vdev was expanded and then removed may have its
ms_shift changed if another vdev was added after its removal.
Fortunately this behavior does not cause any type of crash or bad
behavior in the kernel but it can confuse zdb and anyone doing any kind
of analysis of the history of the pools.

Authored by: Serapheim Dimitropoulos <serapheim@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <gwilson@zfsmail.com>
Reviewed by: John Kennedy <john.kennedy@delphix.com>
Reviewed by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tim Chase <tim@chase2k.com>
Ported-by: Tim Chase <tim@chase2k.com>

OpenZFS-commit: https://github.com/openzfs/openzfs/pull/651
OpenZFS-issue: https://illumos.org/issues/9591a
External-issue: DLPX-58879
Closes #7644
This commit is contained in:
Serapheim Dimitropoulos 2018-06-12 15:34:20 -07:00 committed by Brian Behlendorf
parent af43029484
commit 7637ef8d23
5 changed files with 115 additions and 13 deletions

View File

@ -22,7 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright 2017 Joyent, Inc.
*/
@ -576,6 +576,18 @@ spa_config_update(spa_t *spa, int what)
*/
for (c = 0; c < rvd->vdev_children; c++) {
vdev_t *tvd = rvd->vdev_child[c];
/*
* Explicitly skip vdevs that are indirect or
* log vdevs that are being removed. The reason
* is that both of those can have vdev_ms_array
* set to 0 and we wouldn't want to change their
* metaslab size nor call vdev_expand() on them.
*/
if (!vdev_is_concrete(tvd) ||
(tvd->vdev_islog && tvd->vdev_removing))
continue;
if (tvd->vdev_ms_array == 0)
vdev_metaslab_set_size(tvd);
vdev_expand(tvd, txg);

View File

@ -4172,11 +4172,11 @@ vdev_expand(vdev_t *vd, uint64_t txg)
{
ASSERT(vd->vdev_top == vd);
ASSERT(spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL);
ASSERT(vdev_is_concrete(vd));
vdev_set_deflate_ratio(vd);
if ((vd->vdev_asize >> vd->vdev_ms_shift) > vd->vdev_ms_count &&
vdev_is_concrete(vd)) {
if ((vd->vdev_asize >> vd->vdev_ms_shift) > vd->vdev_ms_count) {
VERIFY(vdev_metaslab_init(vd, txg) == 0);
vdev_config_dirty(vd);
}

View File

@ -683,15 +683,16 @@ tags = ['functional', 'refreserv']
[tests/functional/removal]
pre =
tests = ['removal_sanity', 'removal_all_vdev', 'removal_check_space',
'removal_condense_export',
'removal_multiple_indirection', 'removal_remap',
'removal_remap_deadlists',
'removal_with_add', 'removal_with_create_fs', 'removal_with_dedup',
'removal_with_export', 'removal_with_ganging', 'removal_with_remap',
'removal_with_remove', 'removal_with_scrub', 'removal_with_send',
'removal_with_send_recv', 'removal_with_snapshot', 'removal_with_write',
'removal_with_zdb', 'removal_resume_export',
tests = ['removal_all_vdev', 'removal_check_space',
'removal_condense_export', 'removal_multiple_indirection',
'removal_remap', 'removal_remap_deadlists',
'removal_resume_export', 'removal_sanity', 'removal_with_add',
'removal_with_create_fs', 'removal_with_dedup',
'removal_with_export', 'removal_with_ganging',
'removal_with_remap', 'removal_with_remove',
'removal_with_scrub', 'removal_with_send',
'removal_with_send_recv', 'removal_with_snapshot',
'removal_with_write', 'removal_with_zdb', 'remove_expanded',
'remove_mirror', 'remove_mirror_sanity', 'remove_raidz']
tags = ['functional', 'removal']

View File

@ -27,6 +27,6 @@ dist_pkgdata_SCRIPTS = \
removal_with_send.ksh removal_with_send_recv.ksh \
removal_with_snapshot.ksh removal_with_write.ksh \
removal_with_zdb.ksh remove_mirror.ksh remove_mirror_sanity.ksh \
remove_raidz.ksh removal.kshlib
remove_raidz.ksh remove_expanded.ksh removal.kshlib
pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/removal

View File

@ -0,0 +1,89 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/removal/removal.kshlib
#
# BACKGROUND:
#
# ztest hit an issue where it ran zdb and zdb failed because
# it couldn't access some indirect mappings at the end of a
# vdev. The issue was that the vdev's ms_shift had changed after
# it was removed by the addition of another vdev. This test is
# a regression test for ensuring this case doesn't come up again.
#
TMPDIR=${TMPDIR:-$TEST_BASE_DIR}
DISK0=$TMPDIR/dsk0
DISK1=$TMPDIR/dsk1
DISK2=$TMPDIR/dsk2
log_must truncate -s $MINVDEVSIZE $DISK0
log_must truncate -s $(($MINVDEVSIZE * 3)) $DISK1
log_must truncate -s $MINVDEVSIZE $DISK2
function cleanup
{
default_cleanup_noexit
log_must rm -f $DISK0 $DISK1 $DISK2
}
#
# Setup the pool with one disk .
#
log_must default_setup_noexit "$DISK0"
log_onexit cleanup
#
# Expand vdev.
#
log_must truncate -s $(($MINVDEVSIZE * 2)) $DISK0
log_must zpool reopen $TESTPOOL
log_must zpool online -e $TESTPOOL $DISK0
#
# Fill up the whole vdev.
#
dd if=/dev/urandom of=$TESTDIR/$TESTFILE0 bs=8M
#
# Add another vdev and remove the first vdev creating indirect
# mappings for nearly all the allocatable space from the first
# vdev. Wait for removal to finish.
#
log_must zpool add $TESTPOOL $DISK1
log_must zpool remove $TESTPOOL $DISK0
log_must wait_for_removal $TESTPOOL
#
# Add a new vdev that will trigger a change in the config.
# Run sync once to ensure that the config actually changed.
#
log_must zpool add $TESTPOOL $DISK2
log_must sync
#
# Ensure that zdb does not find any problems with this.
#
log_must zdb $TESTPOOL
log_pass "Removal of expanded vdev doesn't cause any problems."