Write label 2,3 uberblocks when vdev expands

When vdev_psize increases, the location of labels 2 and 3 changes
because their location is relative to the end of the device.

The configs for labels 2 and 3 are written during the next spa_sync()
because the vdev is added to the dirty config list.  However, the
uberblock rings are not re-written in their new location, leaving the
device vulnerable to the beginning of the device being overwritten or
damaged.

This patch copies the uberblock ring from label 0 to labels 2 and 3,
in their new locations, at the next sync after vdev_psize increases.

Also, add a test zpool_expand_004_pos.ksh to confirm the uberblocks
are copied.

Reviewed-by: BearBabyLiu <liu.huang@zte.com.cn>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Olaf Faaland <faaland1@llnl.gov>
Closes #5108
This commit is contained in:
Olaf Faaland 2017-05-02 13:55:24 -07:00 committed by Brian Behlendorf
parent e7fbeb606a
commit 9d3f7b8791
6 changed files with 174 additions and 2 deletions

View File

@ -229,6 +229,7 @@ struct vdev {
boolean_t vdev_cant_write; /* vdev is failing all writes */
boolean_t vdev_isspare; /* was a hot spare */
boolean_t vdev_isl2cache; /* was a l2cache device */
boolean_t vdev_copy_uberblocks; /* post expand copy uberblocks */
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
vdev_cache_t vdev_cache; /* physical block cache */
spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */

View File

@ -1330,6 +1330,13 @@ vdev_open(vdev_t *vd)
max_asize = max_osize;
}
/*
* If the vdev was expanded, record this so that we can re-create the
* uberblock rings in labels {2,3}, during the next sync.
*/
if ((psize > vd->vdev_psize) && (vd->vdev_psize != 0))
vd->vdev_copy_uberblocks = B_TRUE;
vd->vdev_psize = psize;
/*

View File

@ -1132,6 +1132,60 @@ vdev_uberblock_load(vdev_t *rvd, uberblock_t *ub, nvlist_t **config)
spa_config_exit(spa, SCL_ALL, FTAG);
}
/*
* For use when a leaf vdev is expanded.
* The location of labels 2 and 3 changed, and at the new location the
* uberblock rings are either empty or contain garbage. The sync will write
* new configs there because the vdev is dirty, but expansion also needs the
* uberblock rings copied. Read them from label 0 which did not move.
*
* Since the point is to populate labels {2,3} with valid uberblocks,
* we zero uberblocks we fail to read or which are not valid.
*/
static void
vdev_copy_uberblocks(vdev_t *vd)
{
abd_t *ub_abd;
zio_t *write_zio;
int locks = (SCL_L2ARC | SCL_ZIO);
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
ZIO_FLAG_SPECULATIVE;
ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_READER) ==
SCL_STATE);
ASSERT(vd->vdev_ops->vdev_op_leaf);
spa_config_enter(vd->vdev_spa, locks, FTAG, RW_READER);
ub_abd = abd_alloc(VDEV_UBERBLOCK_SIZE(vd), B_TRUE);
write_zio = zio_root(vd->vdev_spa, NULL, NULL, flags);
for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
const int src_label = 0;
zio_t *zio;
zio = zio_root(vd->vdev_spa, NULL, NULL, flags);
vdev_label_read(zio, vd, src_label, ub_abd,
VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd),
NULL, NULL, flags);
if (zio_wait(zio) || uberblock_verify(abd_to_buf(ub_abd)))
abd_zero(ub_abd, VDEV_UBERBLOCK_SIZE(vd));
for (int l = 2; l < VDEV_LABELS; l++)
vdev_label_write(write_zio, vd, l, ub_abd,
VDEV_UBERBLOCK_OFFSET(vd, n),
VDEV_UBERBLOCK_SIZE(vd), NULL, NULL,
flags | ZIO_FLAG_DONT_PROPAGATE);
}
(void) zio_wait(write_zio);
spa_config_exit(vd->vdev_spa, locks, FTAG);
abd_free(ub_abd);
}
/*
* On success, increment root zio's count of good writes.
* We only get credit for writes to known-visible vdevs; see spa_vdev_add().
@ -1163,6 +1217,13 @@ vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags)
if (!vdev_writeable(vd))
return;
/* If the vdev was expanded, need to copy uberblock rings. */
if (vd->vdev_state == VDEV_STATE_HEALTHY &&
vd->vdev_copy_uberblocks == B_TRUE) {
vdev_copy_uberblocks(vd);
vd->vdev_copy_uberblocks = B_FALSE;
}
n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1);
/* Copy the uberblock_t into the ABD */

View File

@ -276,7 +276,7 @@ tests = ['zpool_detach_001_neg']
# zpool_expand_001_pos - https://github.com/zfsonlinux/zfs/issues/2437
# zpool_expand_003_pos - https://github.com/zfsonlinux/zfs/issues/2437
[tests/functional/cli_root/zpool_expand]
tests = ['zpool_expand_002_pos']
tests = ['zpool_expand_002_pos', 'zpool_expand_004_pos']
# DISABLED:
# zpool_export_004_pos - https://github.com/zfsonlinux/zfs/issues/3484

View File

@ -5,4 +5,5 @@ dist_pkgdata_SCRIPTS = \
cleanup.ksh \
zpool_expand_001_pos.ksh \
zpool_expand_002_pos.ksh \
zpool_expand_003_neg.ksh
zpool_expand_003_neg.ksh \
zpool_expand_004_pos.ksh

View File

@ -0,0 +1,102 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
# Copyright (c) 2012, 2015 by Delphix. All rights reserved.
# Copyright (c) 2017 Lawrence Livermore National Security, LLC.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zpool_expand/zpool_expand.cfg
#
# DESCRIPTION:
# After vdev expansion, all 4 labels have the same set of uberblocks.
#
#
# STRATEGY:
# 1) Create 3 files
# 2) Create a pool backed by the files
# 3) Expand the files' size with truncate
# 4) Use zpool online -e to expand the vdevs
# 5) Check that for all the devices, all 4 labels have the same uberblocks
#
verify_runnable "global"
function cleanup
{
if poolexists $TESTPOOL1; then
log_must zpool destroy $TESTPOOL1
fi
for i in 1 2 3; do
[ -e ${TEMPFILE}.$i ] && log_must rm ${TEMPFILE}.$i
done
}
log_onexit cleanup
log_assert "After vdev expansion, all 4 labels have the same set of uberblocks."
for type in " " mirror raidz raidz2; do
for i in 1 2 3; do
log_must truncate -s $org_size ${TEMPFILE}.$i
done
log_must zpool create $TESTPOOL1 $type $TEMPFILE.1 \
$TEMPFILE.2 $TEMPFILE.3
sync_pool $TESTPOOL1
for i in 1 2 3; do
log_must truncate -s $exp_size ${TEMPFILE}.$i
done
for i in 1 2 3; do
log_must zpool online -e $TESTPOOL1 ${TEMPFILE}.$i
done
sync_pool $TESTPOOL1
for i in 1 2 3; do
non_uniform=$(zdb -lu ${TEMPFILE}.$i | \
grep 'labels = ' | \
grep -c -v 'labels = 0 1 2 3')
log_note "non-uniform label count: $non_uniform"
if [[ $non_uniform -ne 0 ]]; then
log_fail "After vdev expansion, all labels contents are not identical"
fi
done
log_must zpool destroy $TESTPOOL1
done
log_pass "After vdev expansion, all 4 labels have the same set of uberblocks."