Illumos 4970-4974 - extreme rewind enhancements
4970 need controls on i/o issued by zpool import -XF 4971 zpool import -T should accept hex values 4972 zpool import -T implies extreme rewind, and thus a scrub 4973 spa_load_retry retries the same txg 4974 spa_load_verify() reads all data twice Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Dan McDonald <danmcd@omniti.com> Reviewed by: George Wilson <george.wilson@delphix.com> Approved by: Robert Mustacchi <rm@joyent.com> References: https://www.illumos.org/issues/4970 https://www.illumos.org/issues/4971 https://www.illumos.org/issues/4972 https://www.illumos.org/issues/4973 https://www.illumos.org/issues/4974 https://github.com/illumos/illumos-gate/commit/e42d205 Notes: This set of patches adds a set of tunable parameters for the "extreme rewind" mode of pool import which allows control over the traversal performed during such an import. Ported by: Tim Chase <tim@chase2k.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #2598
This commit is contained in:
parent
49ddb31506
commit
dea377c0d9
|
@ -22,7 +22,7 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
|
||||||
* Copyright (c) 2012 by Frederik Wessels. All rights reserved.
|
* Copyright (c) 2012 by Frederik Wessels. All rights reserved.
|
||||||
* Copyright (c) 2012 by Cyril Plisko. All rights reserved.
|
* Copyright (c) 2012 by Cyril Plisko. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
@ -2082,7 +2082,7 @@ zpool_do_import(int argc, char **argv)
|
||||||
|
|
||||||
case 'T':
|
case 'T':
|
||||||
errno = 0;
|
errno = 0;
|
||||||
txg = strtoull(optarg, &endptr, 10);
|
txg = strtoull(optarg, &endptr, 0);
|
||||||
if (errno != 0 || *endptr != '\0') {
|
if (errno != 0 || *endptr != '\0') {
|
||||||
(void) fprintf(stderr,
|
(void) fprintf(stderr,
|
||||||
gettext("invalid txg value\n"));
|
gettext("invalid txg value\n"));
|
||||||
|
|
|
@ -230,6 +230,52 @@ they operate close to quota or capacity limits.
|
||||||
Default value: 24
|
Default value: 24
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
|
.sp
|
||||||
|
.ne 2
|
||||||
|
.na
|
||||||
|
\fBspa_load_verify_data\fR (int)
|
||||||
|
.ad
|
||||||
|
.RS 12n
|
||||||
|
Whether to traverse data blocks during an "extreme rewind" (\fB-X\fR)
|
||||||
|
import. Use 0 to disable and 1 to enable.
|
||||||
|
|
||||||
|
An extreme rewind import normally performs a full traversal of all
|
||||||
|
blocks in the pool for verification. If this parameter is set to 0,
|
||||||
|
the traversal skips non-metadata blocks. It can be toggled once the
|
||||||
|
import has started to stop or start the traversal of non-metadata blocks.
|
||||||
|
.sp
|
||||||
|
Default value: 1
|
||||||
|
.RE
|
||||||
|
|
||||||
|
.sp
|
||||||
|
.ne 2
|
||||||
|
.na
|
||||||
|
\fBspa_load_verify_metadata\fR (int)
|
||||||
|
.ad
|
||||||
|
.RS 12n
|
||||||
|
Whether to traverse blocks during an "extreme rewind" (\fB-X\fR)
|
||||||
|
pool import. Use 0 to disable and 1 to enable.
|
||||||
|
|
||||||
|
An extreme rewind import normally performs a full traversal of all
|
||||||
|
blocks in the pool for verification. If this parameter is set to 1,
|
||||||
|
the traversal is not performed. It can be toggled once the import has
|
||||||
|
started to stop or start the traversal.
|
||||||
|
.sp
|
||||||
|
Default value: 1
|
||||||
|
.RE
|
||||||
|
|
||||||
|
.sp
|
||||||
|
.ne 2
|
||||||
|
.na
|
||||||
|
\fBspa_load_verify_maxinflight\fR (int)
|
||||||
|
.ad
|
||||||
|
.RS 12n
|
||||||
|
Maximum concurrent I/Os during the traversal performed during an "extreme
|
||||||
|
rewind" (\fB-X\fR) pool import.
|
||||||
|
.sp
|
||||||
|
Default value: 10000
|
||||||
|
.RE
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
.ne 2
|
.ne 2
|
||||||
.na
|
.na
|
||||||
|
|
|
@ -1855,6 +1855,7 @@ spa_load_verify_done(zio_t *zio)
|
||||||
spa_load_error_t *sle = zio->io_private;
|
spa_load_error_t *sle = zio->io_private;
|
||||||
dmu_object_type_t type = BP_GET_TYPE(bp);
|
dmu_object_type_t type = BP_GET_TYPE(bp);
|
||||||
int error = zio->io_error;
|
int error = zio->io_error;
|
||||||
|
spa_t *spa = zio->io_spa;
|
||||||
|
|
||||||
if (error) {
|
if (error) {
|
||||||
if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) &&
|
if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) &&
|
||||||
|
@ -1864,23 +1865,56 @@ spa_load_verify_done(zio_t *zio)
|
||||||
atomic_add_64(&sle->sle_data_count, 1);
|
atomic_add_64(&sle->sle_data_count, 1);
|
||||||
}
|
}
|
||||||
zio_data_buf_free(zio->io_data, zio->io_size);
|
zio_data_buf_free(zio->io_data, zio->io_size);
|
||||||
|
|
||||||
|
mutex_enter(&spa->spa_scrub_lock);
|
||||||
|
spa->spa_scrub_inflight--;
|
||||||
|
cv_broadcast(&spa->spa_scrub_io_cv);
|
||||||
|
mutex_exit(&spa->spa_scrub_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Maximum number of concurrent scrub i/os to create while verifying
|
||||||
|
* a pool while importing it.
|
||||||
|
*/
|
||||||
|
int spa_load_verify_maxinflight = 10000;
|
||||||
|
int spa_load_verify_metadata = B_TRUE;
|
||||||
|
int spa_load_verify_data = B_TRUE;
|
||||||
|
|
||||||
/*ARGSUSED*/
|
/*ARGSUSED*/
|
||||||
static int
|
static int
|
||||||
spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||||
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
|
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
|
||||||
{
|
{
|
||||||
if (!BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
|
zio_t *rio;
|
||||||
zio_t *rio = arg;
|
size_t size;
|
||||||
size_t size = BP_GET_PSIZE(bp);
|
void *data;
|
||||||
void *data = zio_data_buf_alloc(size);
|
|
||||||
|
|
||||||
zio_nowait(zio_read(rio, spa, bp, data, size,
|
if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
|
||||||
spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
|
return (0);
|
||||||
ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
|
/*
|
||||||
ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
|
* Note: normally this routine will not be called if
|
||||||
}
|
* spa_load_verify_metadata is not set. However, it may be useful
|
||||||
|
* to manually set the flag after the traversal has begun.
|
||||||
|
*/
|
||||||
|
if (!spa_load_verify_metadata)
|
||||||
|
return (0);
|
||||||
|
if (BP_GET_BUFC_TYPE(bp) == ARC_BUFC_DATA && !spa_load_verify_data)
|
||||||
|
return (0);
|
||||||
|
|
||||||
|
rio = arg;
|
||||||
|
size = BP_GET_PSIZE(bp);
|
||||||
|
data = zio_data_buf_alloc(size);
|
||||||
|
|
||||||
|
mutex_enter(&spa->spa_scrub_lock);
|
||||||
|
while (spa->spa_scrub_inflight >= spa_load_verify_maxinflight)
|
||||||
|
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
|
||||||
|
spa->spa_scrub_inflight++;
|
||||||
|
mutex_exit(&spa->spa_scrub_lock);
|
||||||
|
|
||||||
|
zio_nowait(zio_read(rio, spa, bp, data, size,
|
||||||
|
spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
|
||||||
|
ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
|
||||||
|
ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1891,7 +1925,7 @@ spa_load_verify(spa_t *spa)
|
||||||
spa_load_error_t sle = { 0 };
|
spa_load_error_t sle = { 0 };
|
||||||
zpool_rewind_policy_t policy;
|
zpool_rewind_policy_t policy;
|
||||||
boolean_t verify_ok = B_FALSE;
|
boolean_t verify_ok = B_FALSE;
|
||||||
int error;
|
int error = 0;
|
||||||
|
|
||||||
zpool_get_rewind_policy(spa->spa_config, &policy);
|
zpool_get_rewind_policy(spa->spa_config, &policy);
|
||||||
|
|
||||||
|
@ -1901,8 +1935,11 @@ spa_load_verify(spa_t *spa)
|
||||||
rio = zio_root(spa, NULL, &sle,
|
rio = zio_root(spa, NULL, &sle,
|
||||||
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
|
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
|
||||||
|
|
||||||
error = traverse_pool(spa, spa->spa_verify_min_txg,
|
if (spa_load_verify_metadata) {
|
||||||
TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio);
|
error = traverse_pool(spa, spa->spa_verify_min_txg,
|
||||||
|
TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
|
||||||
|
spa_load_verify_cb, rio);
|
||||||
|
}
|
||||||
|
|
||||||
(void) zio_wait(rio);
|
(void) zio_wait(rio);
|
||||||
|
|
||||||
|
@ -2781,7 +2818,7 @@ spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
|
||||||
spa_unload(spa);
|
spa_unload(spa);
|
||||||
spa_deactivate(spa);
|
spa_deactivate(spa);
|
||||||
|
|
||||||
spa->spa_load_max_txg--;
|
spa->spa_load_max_txg = spa->spa_uberblock.ub_txg - 1;
|
||||||
|
|
||||||
spa_activate(spa, mode);
|
spa_activate(spa, mode);
|
||||||
spa_async_suspend(spa);
|
spa_async_suspend(spa);
|
||||||
|
@ -2811,6 +2848,8 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
|
||||||
spa_set_log_state(spa, SPA_LOG_CLEAR);
|
spa_set_log_state(spa, SPA_LOG_CLEAR);
|
||||||
} else {
|
} else {
|
||||||
spa->spa_load_max_txg = max_request;
|
spa->spa_load_max_txg = max_request;
|
||||||
|
if (max_request != UINT64_MAX)
|
||||||
|
spa->spa_extreme_rewind = B_TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING,
|
load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING,
|
||||||
|
@ -6603,3 +6642,17 @@ EXPORT_SYMBOL(spa_prop_clear_bootfs);
|
||||||
/* asynchronous event notification */
|
/* asynchronous event notification */
|
||||||
EXPORT_SYMBOL(spa_event_notify);
|
EXPORT_SYMBOL(spa_event_notify);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(_KERNEL) && defined(HAVE_SPL)
|
||||||
|
module_param(spa_load_verify_maxinflight, int, 0644);
|
||||||
|
MODULE_PARM_DESC(spa_load_verify_maxinflight,
|
||||||
|
"Max concurrent traversal I/Os while verifying pool during import -X");
|
||||||
|
|
||||||
|
module_param(spa_load_verify_metadata, int, 0644);
|
||||||
|
MODULE_PARM_DESC(spa_load_verify_metadata,
|
||||||
|
"Set to traverse metadata on pool import");
|
||||||
|
|
||||||
|
module_param(spa_load_verify_data, int, 0644);
|
||||||
|
MODULE_PARM_DESC(spa_load_verify_data,
|
||||||
|
"Set to traverse data on pool import");
|
||||||
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue