From dea377c0d9d92bd7d10c0e2f006efa11ab28060a Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Tue, 15 Jul 2014 10:58:41 -0800 Subject: [PATCH] Illumos 4970-4974 - extreme rewind enhancements 4970 need controls on i/o issued by zpool import -XF 4971 zpool import -T should accept hex values 4972 zpool import -T implies extreme rewind, and thus a scrub 4973 spa_load_retry retries the same txg 4974 spa_load_verify() reads all data twice Reviewed by: Christopher Siden Reviewed by: Dan McDonald Reviewed by: George Wilson Approved by: Robert Mustacchi References: https://www.illumos.org/issues/4970 https://www.illumos.org/issues/4971 https://www.illumos.org/issues/4972 https://www.illumos.org/issues/4973 https://www.illumos.org/issues/4974 https://github.com/illumos/illumos-gate/commit/e42d205 Notes: This set of patches adds a set of tunable parameters for the "extreme rewind" mode of pool import which allows control over the traversal performed during such an import. Ported by: Tim Chase Signed-off-by: Brian Behlendorf Closes #2598 --- cmd/zpool/zpool_main.c | 4 +- man/man5/zfs-module-parameters.5 | 46 +++++++++++++++++++ module/zfs/spa.c | 79 ++++++++++++++++++++++++++------ 3 files changed, 114 insertions(+), 15 deletions(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 920acc3679..952645ea51 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2011, 2014 by Delphix. All rights reserved. * Copyright (c) 2012 by Frederik Wessels. All rights reserved. * Copyright (c) 2012 by Cyril Plisko. All rights reserved. */ @@ -2082,7 +2082,7 @@ zpool_do_import(int argc, char **argv) case 'T': errno = 0; - txg = strtoull(optarg, &endptr, 10); + txg = strtoull(optarg, &endptr, 0); if (errno != 0 || *endptr != '\0') { (void) fprintf(stderr, gettext("invalid txg value\n")); diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 4671c396f3..3d7093b73a 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -230,6 +230,52 @@ they operate close to quota or capacity limits. Default value: 24 .RE +.sp +.ne 2 +.na +\fBspa_load_verify_data\fR (int) +.ad +.RS 12n +Whether to traverse data blocks during an "extreme rewind" (\fB-X\fR) +import. Use 0 to disable and 1 to enable. + +An extreme rewind import normally performs a full traversal of all +blocks in the pool for verification. If this parameter is set to 0, +the traversal skips non-metadata blocks. It can be toggled once the +import has started to stop or start the traversal of non-metadata blocks. +.sp +Default value: 1 +.RE + +.sp +.ne 2 +.na +\fBspa_load_verify_metadata\fR (int) +.ad +.RS 12n +Whether to traverse blocks during an "extreme rewind" (\fB-X\fR) +pool import. Use 0 to disable and 1 to enable. + +An extreme rewind import normally performs a full traversal of all +blocks in the pool for verification. If this parameter is set to 1, +the traversal is not performed. It can be toggled once the import has +started to stop or start the traversal. +.sp +Default value: 1 +.RE + +.sp +.ne 2 +.na +\fBspa_load_verify_maxinflight\fR (int) +.ad +.RS 12n +Maximum concurrent I/Os during the traversal performed during an "extreme +rewind" (\fB-X\fR) pool import. +.sp +Default value: 10000 +.RE + .sp .ne 2 .na diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 0824f9c7f8..62887122d5 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -1855,6 +1855,7 @@ spa_load_verify_done(zio_t *zio) spa_load_error_t *sle = zio->io_private; dmu_object_type_t type = BP_GET_TYPE(bp); int error = zio->io_error; + spa_t *spa = zio->io_spa; if (error) { if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) && @@ -1864,23 +1865,56 @@ spa_load_verify_done(zio_t *zio) atomic_add_64(&sle->sle_data_count, 1); } zio_data_buf_free(zio->io_data, zio->io_size); + + mutex_enter(&spa->spa_scrub_lock); + spa->spa_scrub_inflight--; + cv_broadcast(&spa->spa_scrub_io_cv); + mutex_exit(&spa->spa_scrub_lock); } +/* + * Maximum number of concurrent scrub i/os to create while verifying + * a pool while importing it. + */ +int spa_load_verify_maxinflight = 10000; +int spa_load_verify_metadata = B_TRUE; +int spa_load_verify_data = B_TRUE; + /*ARGSUSED*/ static int spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) { - if (!BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) { - zio_t *rio = arg; - size_t size = BP_GET_PSIZE(bp); - void *data = zio_data_buf_alloc(size); + zio_t *rio; + size_t size; + void *data; - zio_nowait(zio_read(rio, spa, bp, data, size, - spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, - ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | - ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); - } + if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) + return (0); + /* + * Note: normally this routine will not be called if + * spa_load_verify_metadata is not set. However, it may be useful + * to manually set the flag after the traversal has begun. + */ + if (!spa_load_verify_metadata) + return (0); + if (BP_GET_BUFC_TYPE(bp) == ARC_BUFC_DATA && !spa_load_verify_data) + return (0); + + rio = arg; + size = BP_GET_PSIZE(bp); + data = zio_data_buf_alloc(size); + + mutex_enter(&spa->spa_scrub_lock); + while (spa->spa_scrub_inflight >= spa_load_verify_maxinflight) + cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); + spa->spa_scrub_inflight++; + mutex_exit(&spa->spa_scrub_lock); + + zio_nowait(zio_read(rio, spa, bp, data, size, + spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, + ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | + ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); return (0); } @@ -1891,7 +1925,7 @@ spa_load_verify(spa_t *spa) spa_load_error_t sle = { 0 }; zpool_rewind_policy_t policy; boolean_t verify_ok = B_FALSE; - int error; + int error = 0; zpool_get_rewind_policy(spa->spa_config, &policy); @@ -1901,8 +1935,11 @@ spa_load_verify(spa_t *spa) rio = zio_root(spa, NULL, &sle, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); - error = traverse_pool(spa, spa->spa_verify_min_txg, - TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio); + if (spa_load_verify_metadata) { + error = traverse_pool(spa, spa->spa_verify_min_txg, + TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, + spa_load_verify_cb, rio); + } (void) zio_wait(rio); @@ -2781,7 +2818,7 @@ spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig) spa_unload(spa); spa_deactivate(spa); - spa->spa_load_max_txg--; + spa->spa_load_max_txg = spa->spa_uberblock.ub_txg - 1; spa_activate(spa, mode); spa_async_suspend(spa); @@ -2811,6 +2848,8 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig, spa_set_log_state(spa, SPA_LOG_CLEAR); } else { spa->spa_load_max_txg = max_request; + if (max_request != UINT64_MAX) + spa->spa_extreme_rewind = B_TRUE; } load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING, @@ -6603,3 +6642,17 @@ EXPORT_SYMBOL(spa_prop_clear_bootfs); /* asynchronous event notification */ EXPORT_SYMBOL(spa_event_notify); #endif + +#if defined(_KERNEL) && defined(HAVE_SPL) +module_param(spa_load_verify_maxinflight, int, 0644); +MODULE_PARM_DESC(spa_load_verify_maxinflight, + "Max concurrent traversal I/Os while verifying pool during import -X"); + +module_param(spa_load_verify_metadata, int, 0644); +MODULE_PARM_DESC(spa_load_verify_metadata, + "Set to traverse metadata on pool import"); + +module_param(spa_load_verify_data, int, 0644); +MODULE_PARM_DESC(spa_load_verify_data, + "Set to traverse data on pool import"); +#endif