From 065e76e2d265373ca0b2d13a310e6bfd03bd68ca Mon Sep 17 00:00:00 2001 From: Mariusz Zaborski Date: Mon, 21 Aug 2023 16:35:24 +0200 Subject: [PATCH 1/3] Redesign scan/scrub interfact to allow scrubing range of TXGs Sponsored-By: Wasabi Technology, Inc. Sponsored-By: Klara Inc. Signed-off-by: Mariusz Zaborski --- include/sys/dsl_scan.h | 3 ++- include/sys/spa.h | 2 ++ module/zfs/dsl_scan.c | 50 +++++++++++++++++++++++++++++++----------- module/zfs/spa.c | 13 ++++++++++- 4 files changed, 53 insertions(+), 15 deletions(-) diff --git a/include/sys/dsl_scan.h b/include/sys/dsl_scan.h index 63734dbc17..d9f2c1be8b 100644 --- a/include/sys/dsl_scan.h +++ b/include/sys/dsl_scan.h @@ -189,7 +189,8 @@ void dsl_scan_setup_sync(void *, dmu_tx_t *); void dsl_scan_fini(struct dsl_pool *dp); void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *); int dsl_scan_cancel(struct dsl_pool *); -int dsl_scan(struct dsl_pool *, pool_scan_func_t); +int dsl_scan(struct dsl_pool *, pool_scan_func_t, uint64_t starttxg, + uint64_t txgend); void dsl_scan_assess_vdev(struct dsl_pool *dp, vdev_t *vd); boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp); boolean_t dsl_errorscrubbing(const struct dsl_pool *dp); diff --git a/include/sys/spa.h b/include/sys/spa.h index 93f381affd..385b4501c5 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -821,6 +821,8 @@ extern void spa_l2cache_drop(spa_t *spa); /* scanning */ extern int spa_scan(spa_t *spa, pool_scan_func_t func); +extern int spa_scan_range(spa_t *spa, pool_scan_func_t func, uint64_t txgstart, + uint64_t txgend); extern int spa_scan_stop(spa_t *spa); extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag); diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 9d040e1463..f340fcd323 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -848,18 +848,24 @@ dsl_scan_setup_check(void *arg, dmu_tx_t *tx) return (0); } +typedef struct { + pool_scan_func_t func; + uint64_t txgstart; + uint64_t txgend; +} setup_sync_arg_t; + void dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) { - (void) arg; + setup_sync_arg_t *setup_sync_arg = (setup_sync_arg_t *)arg; dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; - pool_scan_func_t *funcp = arg; dmu_object_type_t ot = 0; dsl_pool_t *dp = scn->scn_dp; spa_t *spa = dp->dp_spa; ASSERT(!dsl_scan_is_running(scn)); - ASSERT(*funcp > POOL_SCAN_NONE && *funcp < POOL_SCAN_FUNCS); + ASSERT(setup_sync_arg->func > POOL_SCAN_NONE && + setup_sync_arg->func < POOL_SCAN_FUNCS); memset(&scn->scn_phys, 0, sizeof (scn->scn_phys)); /* @@ -869,10 +875,14 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) memset(&scn->errorscrub_phys, 0, sizeof (scn->errorscrub_phys)); dsl_errorscrub_sync_state(scn, tx); - scn->scn_phys.scn_func = *funcp; + scn->scn_phys.scn_func = setup_sync_arg->func; scn->scn_phys.scn_state = DSS_SCANNING; - scn->scn_phys.scn_min_txg = 0; - scn->scn_phys.scn_max_txg = tx->tx_txg; + scn->scn_phys.scn_min_txg = setup_sync_arg->txgstart; + if (setup_sync_arg->txgend == 0) { + scn->scn_phys.scn_max_txg = tx->tx_txg; + } else { + scn->scn_phys.scn_max_txg = setup_sync_arg->txgend; + } scn->scn_phys.scn_ddt_class_max = DDT_CLASSES - 1; /* the entire DDT */ scn->scn_phys.scn_start_time = gethrestime_sec(); scn->scn_phys.scn_errors = 0; @@ -959,7 +969,7 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) spa_history_log_internal(spa, "scan setup", tx, "func=%u mintxg=%llu maxtxg=%llu", - *funcp, (u_longlong_t)scn->scn_phys.scn_min_txg, + setup_sync_arg->func, (u_longlong_t)scn->scn_phys.scn_min_txg, (u_longlong_t)scn->scn_phys.scn_max_txg); } @@ -969,10 +979,16 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) * error scrub. */ int -dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) +dsl_scan(dsl_pool_t *dp, pool_scan_func_t func, uint64_t txgstart, + uint64_t txgend) { spa_t *spa = dp->dp_spa; dsl_scan_t *scn = dp->dp_scan; + setup_sync_arg_t setup_sync_arg; + + if (func != POOL_SCAN_SCRUB && (txgstart != 0 || txgend != 0)) { + return (EINVAL); + } /* * Purge all vdev caches and probe all devices. We do this here @@ -1023,8 +1039,13 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) return (SET_ERROR(err)); } + setup_sync_arg.func = func; + setup_sync_arg.txgstart = txgstart; + setup_sync_arg.txgend = txgend; + return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check, - dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED)); + dsl_scan_setup_sync, &setup_sync_arg, 0, + ZFS_SPACE_CHECK_EXTRA_RESERVED)); } static void @@ -4301,13 +4322,16 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) */ if (dsl_scan_restarting(scn, tx) || (spa->spa_resilver_deferred && zfs_resilver_disable_defer)) { - pool_scan_func_t func = POOL_SCAN_SCRUB; + setup_sync_arg_t setup_sync_arg = { + .func = POOL_SCAN_SCRUB, + }; dsl_scan_done(scn, B_FALSE, tx); if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) - func = POOL_SCAN_RESILVER; + setup_sync_arg.func = POOL_SCAN_RESILVER; zfs_dbgmsg("restarting scan func=%u on %s txg=%llu", - func, dp->dp_spa->spa_name, (longlong_t)tx->tx_txg); - dsl_scan_setup_sync(&func, tx); + setup_sync_arg.func, dp->dp_spa->spa_name, + (longlong_t)tx->tx_txg); + dsl_scan_setup_sync(&setup_sync_arg, tx); } /* diff --git a/module/zfs/spa.c b/module/zfs/spa.c index d51cc4fcd0..2a37bf463e 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -8877,6 +8877,13 @@ spa_scan_stop(spa_t *spa) int spa_scan(spa_t *spa, pool_scan_func_t func) +{ + return (spa_scan_range(spa, func, 0, 0)); +} + +int +spa_scan_range(spa_t *spa, pool_scan_func_t func, uint64_t txgstart, + uint64_t txgend) { ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); @@ -8887,6 +8894,9 @@ spa_scan(spa_t *spa, pool_scan_func_t func) !spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) return (SET_ERROR(ENOTSUP)); + if (func != POOL_SCAN_SCRUB && (txgstart != 0 || txgend != 0)) + return (SET_ERROR(ENOTSUP)); + /* * If a resilver was requested, but there is no DTL on a * writeable leaf device, we have nothing to do. @@ -8901,7 +8911,7 @@ spa_scan(spa_t *spa, pool_scan_func_t func) !spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) return (SET_ERROR(ENOTSUP)); - return (dsl_scan(spa->spa_dsl_pool, func)); + return (dsl_scan(spa->spa_dsl_pool, func, txgstart, txgend)); } /* @@ -10982,6 +10992,7 @@ EXPORT_SYMBOL(spa_l2cache_drop); /* scanning */ EXPORT_SYMBOL(spa_scan); +EXPORT_SYMBOL(spa_scan_range); EXPORT_SYMBOL(spa_scan_stop); /* spa syncing */ From 4dde5c3f350638f1560ad19cb83d3a6d1026fc8b Mon Sep 17 00:00:00 2001 From: Mariusz Zaborski Date: Wed, 30 Aug 2023 14:22:09 +0000 Subject: [PATCH 2/3] Add last_scrubbed_txg property and option to scrub from last saved txg The `last_scrubbed_txg` property indicates the transaction group (TXG) up to which the most recent scrub operation has checked and repaired the dataset. This provides administrators with insight into the data integrity status of their pool at a specific point in time. Sponsored-By: Wasabi Technology, Inc. Sponsored-By: Klara Inc. Signed-off-by: Mariusz Zaborski --- cmd/zpool/zpool_main.c | 26 ++++- include/sys/dmu.h | 1 + include/sys/fs/zfs.h | 2 + include/sys/spa.h | 1 + include/sys/spa_impl.h | 1 + lib/libzfs/libzfs.abi | 6 +- lib/libzfs/libzfs_pool.c | 1 + man/man7/zpoolprops.7 | 8 ++ man/man8/zpool-scrub.8 | 5 + module/zcommon/zpool_prop.c | 4 + module/zfs/dsl_scan.c | 18 ++- module/zfs/spa.c | 9 ++ module/zfs/spa_misc.c | 6 + module/zfs/zfs_ioctl.c | 3 + tests/zfs-tests/tests/Makefile.am | 1 + .../zpool_scrub_txg_continue_from_last.ksh | 104 ++++++++++++++++++ 16 files changed, 188 insertions(+), 8 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_txg_continue_from_last.ksh diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 9cd26a8650..3de1408567 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -506,7 +506,8 @@ get_usage(zpool_help_t idx) return (gettext("\tinitialize [-c | -s | -u] [-w] " "[ ...]\n")); case HELP_SCRUB: - return (gettext("\tscrub [-s | -p] [-w] [-e] ...\n")); + return (gettext("\tscrub [-s | -p] [-w] [-e] [-C] " + " ...\n")); case HELP_RESILVER: return (gettext("\tresilver ...\n")); case HELP_TRIM: @@ -8391,8 +8392,9 @@ wait_callback(zpool_handle_t *zhp, void *data) } /* - * zpool scrub [-s | -p] [-w] [-e] ... + * zpool scrub [-s | -p] [-w] [-e] [-C] ... * + * -C Scrub from last saved txg. * -e Only scrub blocks in the error log. * -s Stop. Stops any in-progress scrub. * -p Pause. Pause in-progress scrub. @@ -8412,10 +8414,14 @@ zpool_do_scrub(int argc, char **argv) boolean_t is_error_scrub = B_FALSE; boolean_t is_pause = B_FALSE; boolean_t is_stop = B_FALSE; + boolean_t is_txg_continue = B_FALSE; /* check options */ - while ((c = getopt(argc, argv, "spwe")) != -1) { + while ((c = getopt(argc, argv, "spweC")) != -1) { switch (c) { + case 'C': + is_txg_continue = B_TRUE; + break; case 'e': is_error_scrub = B_TRUE; break; @@ -8439,6 +8445,18 @@ zpool_do_scrub(int argc, char **argv) (void) fprintf(stderr, gettext("invalid option " "combination :-s and -p are mutually exclusive\n")); usage(B_FALSE); + } else if (is_pause && is_txg_continue) { + (void) fprintf(stderr, gettext("invalid option " + "combination :-p and -C are mutually exclusive\n")); + usage(B_FALSE); + } else if (is_stop && is_txg_continue) { + (void) fprintf(stderr, gettext("invalid option " + "combination :-s and -C are mutually exclusive\n")); + usage(B_FALSE); + } else if (is_error_scrub && is_txg_continue) { + (void) fprintf(stderr, gettext("invalid option " + "combination :-e and -C are mutually exclusive\n")); + usage(B_FALSE); } else { if (is_error_scrub) cb.cb_type = POOL_SCAN_ERRORSCRUB; @@ -8447,6 +8465,8 @@ zpool_do_scrub(int argc, char **argv) cb.cb_scrub_cmd = POOL_SCRUB_PAUSE; } else if (is_stop) { cb.cb_type = POOL_SCAN_NONE; + } else if (is_txg_continue) { + cb.cb_scrub_cmd = POOL_SCRUB_FROM_LAST_TXG; } else { cb.cb_scrub_cmd = POOL_SCRUB_NORMAL; } diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 928f5f2b4f..6921f5bdb9 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -381,6 +381,7 @@ typedef struct dmu_buf { #define DMU_POOL_CREATION_VERSION "creation_version" #define DMU_POOL_SCAN "scan" #define DMU_POOL_ERRORSCRUB "error_scrub" +#define DMU_POOL_LAST_SCRUBBED_TXG "last_scrubbed_txg" #define DMU_POOL_FREE_BPOBJ "free_bpobj" #define DMU_POOL_BPTREE_OBJ "bptree_obj" #define DMU_POOL_EMPTY_BPOBJ "empty_bpobj" diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 73d686a002..69441066e6 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -261,6 +261,7 @@ typedef enum { ZPOOL_PROP_DEDUP_TABLE_SIZE, ZPOOL_PROP_DEDUP_TABLE_QUOTA, ZPOOL_PROP_DEDUPCACHED, + ZPOOL_PROP_LAST_SCRUBBED_TXG, ZPOOL_NUM_PROPS } zpool_prop_t; @@ -1075,6 +1076,7 @@ typedef enum pool_scan_func { typedef enum pool_scrub_cmd { POOL_SCRUB_NORMAL = 0, POOL_SCRUB_PAUSE, + POOL_SCRUB_FROM_LAST_TXG, POOL_SCRUB_FLAGS_END } pool_scrub_cmd_t; diff --git a/include/sys/spa.h b/include/sys/spa.h index 385b4501c5..1bcc84ed8c 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -1069,6 +1069,7 @@ extern uint64_t spa_get_deadman_failmode(spa_t *spa); extern void spa_set_deadman_failmode(spa_t *spa, const char *failmode); extern boolean_t spa_suspended(spa_t *spa); extern uint64_t spa_bootfs(spa_t *spa); +extern uint64_t spa_get_last_scrubbed_txg(spa_t *spa); extern uint64_t spa_delegation(spa_t *spa); extern objset_t *spa_meta_objset(spa_t *spa); extern space_map_t *spa_syncing_log_sm(spa_t *spa); diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 4fc6f22fcb..a3eedcc791 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -318,6 +318,7 @@ struct spa { uint64_t spa_scan_pass_scrub_spent_paused; /* total paused */ uint64_t spa_scan_pass_exam; /* examined bytes per pass */ uint64_t spa_scan_pass_issued; /* issued bytes per pass */ + uint64_t spa_scrubbed_last_txg; /* last txg scrubbed */ /* error scrub pause time in milliseconds */ uint64_t spa_scan_pass_errorscrub_pause; diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 87c5c4380b..0edcc8adf7 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -2955,7 +2955,8 @@ - + + @@ -5807,7 +5808,8 @@ - + + diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index dfa7c4db68..670106f717 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -378,6 +378,7 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, case ZPOOL_PROP_BCLONEUSED: case ZPOOL_PROP_DEDUP_TABLE_SIZE: case ZPOOL_PROP_DEDUPCACHED: + case ZPOOL_PROP_LAST_SCRUBBED_TXG: if (literal) (void) snprintf(buf, len, "%llu", (u_longlong_t)intval); diff --git a/man/man7/zpoolprops.7 b/man/man7/zpoolprops.7 index f4fcc620e4..9185fb10e7 100644 --- a/man/man7/zpoolprops.7 +++ b/man/man7/zpoolprops.7 @@ -135,6 +135,14 @@ A unique identifier for the pool. The current health of the pool. Health can be one of .Sy ONLINE , DEGRADED , FAULTED , OFFLINE, REMOVED , UNAVAIL . +.It Sy last_scrubbed_txg +Indicates the transaction group (TXG) up to which the most recent scrub +operation has checked and repaired the dataset. +This provides insight into the data integrity status of their pool at +a specific point in time. +The +.Xr zpool-scrub 8 +might be used to utilize this property. .It Sy leaked Space not released while .Sy freeing diff --git a/man/man8/zpool-scrub.8 b/man/man8/zpool-scrub.8 index 03f3ad4991..023ec33fb3 100644 --- a/man/man8/zpool-scrub.8 +++ b/man/man8/zpool-scrub.8 @@ -39,6 +39,7 @@ .Op Fl s Ns | Ns Fl p .Op Fl w .Op Fl e +.Op Fl C .Ar pool Ns … . .Sh DESCRIPTION @@ -114,6 +115,10 @@ The pool must have been scrubbed at least once with the feature enabled to use this option. Error scrubbing cannot be run simultaneously with regular scrubbing or resilvering, nor can it be run when a regular scrub is paused. +.It Fl C +Continue scrub from last saved txg (see zpool +.Sy last_scrubbed_txg +property). .El .Sh EXAMPLES .Ss Example 1 diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index afdbb6f15e..2785e1d321 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -129,6 +129,10 @@ zpool_prop_init(void) 0, PROP_READONLY, ZFS_TYPE_POOL, "", "DDTSIZE", B_FALSE, sfeatures); + zprop_register_number(ZPOOL_PROP_LAST_SCRUBBED_TXG, + "last_scrubbed_txg", 0, PROP_READONLY, ZFS_TYPE_POOL, "", + "LAST_SCRUBBED_TXG", B_FALSE, sfeatures); + /* default number properties */ zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION, PROP_DEFAULT, ZFS_TYPE_POOL, "", "VERSION", B_FALSE, diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index f340fcd323..76db3b55c6 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -228,6 +228,9 @@ static int zfs_resilver_disable_defer = B_FALSE; ((scn)->scn_phys.scn_func == POOL_SCAN_SCRUB || \ (scn)->scn_phys.scn_func == POOL_SCAN_RESILVER) +#define DSL_SCAN_IS_SCRUB(scn) \ + ((scn)->scn_phys.scn_func == POOL_SCAN_SCRUB) + /* * Enable/disable the processing of the free_bpobj object. */ @@ -1133,15 +1136,24 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) spa_notify_waiters(spa); - if (dsl_scan_restarting(scn, tx)) + if (dsl_scan_restarting(scn, tx)) { spa_history_log_internal(spa, "scan aborted, restarting", tx, "errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa)); - else if (!complete) + } else if (!complete) { spa_history_log_internal(spa, "scan cancelled", tx, "errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa)); - else + } else { spa_history_log_internal(spa, "scan done", tx, "errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa)); + if (DSL_SCAN_IS_SCRUB(scn)) { + VERIFY0(zap_update(dp->dp_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_LAST_SCRUBBED_TXG, + sizeof (uint64_t), 1, + &scn->scn_phys.scn_max_txg, tx)); + spa->spa_scrubbed_last_txg = scn->scn_phys.scn_max_txg; + } + } if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { spa->spa_scrub_active = B_FALSE; diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 2a37bf463e..ed3defbe0d 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -458,6 +458,9 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp) spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUP_TABLE_SIZE, NULL, ddt_get_ddt_dsize(spa), src); + spa_prop_add_list(*nvp, ZPOOL_PROP_LAST_SCRUBBED_TXG, NULL, + spa_get_last_scrubbed_txg(spa), src); + spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, rvd->vdev_state, src); @@ -4737,6 +4740,12 @@ spa_ld_get_props(spa_t *spa) if (error != 0 && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + /* Load the last scrubbed txg. */ + error = spa_dir_prop(spa, DMU_POOL_LAST_SCRUBBED_TXG, + &spa->spa_scrubbed_last_txg, B_FALSE); + if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + /* * Load the livelist deletion field. If a livelist is queued for * deletion, indicate that in the spa diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 97191e7685..cdf5992b46 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -2679,6 +2679,12 @@ spa_mode(spa_t *spa) return (spa->spa_mode); } +uint64_t +spa_get_last_scrubbed_txg(spa_t *spa) +{ + return (spa->spa_scrubbed_last_txg); +} + uint64_t spa_bootfs(spa_t *spa) { diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 7ce2d91961..0b6683ffb9 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -1719,6 +1719,9 @@ zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE); } else if (scan_type == POOL_SCAN_NONE) { error = spa_scan_stop(spa); + } else if (scan_cmd == POOL_SCRUB_FROM_LAST_TXG) { + error = spa_scan_range(spa, scan_type, + spa_get_last_scrubbed_txg(spa), 0); } else { error = spa_scan(spa, scan_type); } diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index bbeabc6dfb..35b6ba81f5 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1218,6 +1218,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_scrub/zpool_scrub_multiple_copies.ksh \ functional/cli_root/zpool_scrub/zpool_scrub_offline_device.ksh \ functional/cli_root/zpool_scrub/zpool_scrub_print_repairing.ksh \ + functional/cli_root/zpool_scrub/zpool_scrub_txg_continue_from_last.ksh \ functional/cli_root/zpool_scrub/zpool_error_scrub_001_pos.ksh \ functional/cli_root/zpool_scrub/zpool_error_scrub_002_pos.ksh \ functional/cli_root/zpool_scrub/zpool_error_scrub_003_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_txg_continue_from_last.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_txg_continue_from_last.ksh new file mode 100755 index 0000000000..b28a8d2cf7 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_txg_continue_from_last.ksh @@ -0,0 +1,104 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# Copyright (c) 2023, Klara Inc. +# +# This software was developed by +# Mariusz Zaborski +# under sponsorship from Wasabi Technology, Inc. and Klara Inc. + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg +. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib + +# +# DESCRIPTION: +# Verify scrub -C +# +# STRATEGY: +# 1. Create a pool and create one file. +# 2. Verify that the last_txg_scrub is 0. +# 3. Run scrub. +# 4. Verify that the last_txg_scrub is set. +# 5. Create second file. +# 6. Invalidate both files. +# 7. Run scrub only from last point. +# 8. Verify that only one file, that was created with newer txg, +# was detected. +# + +verify_runnable "global" + +function cleanup +{ + log_must zinject -c all + log_must rm -f $mntpnt/f1 + log_must rm -f $mntpnt/f2 +} + +log_onexit cleanup + +log_assert "Verify scrub -C." + +# Create one file. +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) + +log_must file_write -b 1048576 -c 10 -o create -d 0 -f $mntpnt/f1 +log_must sync_pool $TESTPOOL true +f1txg=$(get_last_txg_synced $TESTPOOL) + +# Verify that last_scrubbed_txg isn't set. +zpoollasttxg=$(zpool get -H -o value last_scrubbed_txg $TESTPOOL) +log_must [ $zpoollasttxg -eq 0 ] + +# Run scrub. +log_must zpool scrub -w $TESTPOOL + +# Verify that last_scrubbed_txg is set. +zpoollasttxg=$(zpool get -H -o value last_scrubbed_txg $TESTPOOL) +log_must [ $zpoollasttxg -ne 0 ] + +# Create second file. +log_must file_write -b 1048576 -c 10 -o create -d 0 -f $mntpnt/f2 +log_must sync_pool $TESTPOOL true +f2txg=$(get_last_txg_synced $TESTPOOL) + +# Make sure that the sync txg are different. +log_must [ $f1txg -ne $f2txg ] + +# Insert faults. +log_must zinject -a -t data -e io -T read $mntpnt/f1 +log_must zinject -a -t data -e io -T read $mntpnt/f2 + +# Run scrub from last saved point. +log_must zpool scrub -w -C $TESTPOOL + +# Verify that only newer file was detected. +log_mustnot eval "zpool status -v $TESTPOOL | grep '$mntpnt/f1'" +log_must eval "zpool status -v $TESTPOOL | grep '$mntpnt/f2'" + +# Verify that both files are corrupted. +log_must zpool scrub -w $TESTPOOL +log_must eval "zpool status -v $TESTPOOL | grep '$mntpnt/f1'" +log_must eval "zpool status -v $TESTPOOL | grep '$mntpnt/f2'" + +log_pass "Verified scrub -C show expected status." From 4823cc2492f501a3a5c0de04507592b9e494c1cc Mon Sep 17 00:00:00 2001 From: Mariusz Zaborski Date: Thu, 29 Aug 2024 09:59:07 +0000 Subject: [PATCH 3/3] scrub: add option to scrub only recent data Sponsored-By: Wasabi Technology, Inc. Sponsored-By: Klara Inc. Signed-off-by: Mariusz Zaborski --- cmd/zpool/zpool_main.c | 28 ++++++++++++++++++++---- include/sys/dsl_scan.h | 1 + include/sys/fs/zfs.h | 1 + lib/libzfs/libzfs.abi | 49 +++++++++++++++++++++++++++++++----------- man/man4/zfs.4 | 3 +++ man/man8/zpool-scrub.8 | 5 +++++ module/zfs/dsl_scan.c | 6 ++++++ module/zfs/zfs_ioctl.c | 10 +++++++++ 8 files changed, 86 insertions(+), 17 deletions(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 3de1408567..7859b04191 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -506,7 +506,7 @@ get_usage(zpool_help_t idx) return (gettext("\tinitialize [-c | -s | -u] [-w] " "[ ...]\n")); case HELP_SCRUB: - return (gettext("\tscrub [-s | -p] [-w] [-e] [-C] " + return (gettext("\tscrub [-s | -p] [-w] [-e] [-C] [-R] " " ...\n")); case HELP_RESILVER: return (gettext("\tresilver ...\n")); @@ -8398,6 +8398,7 @@ wait_callback(zpool_handle_t *zhp, void *data) * -e Only scrub blocks in the error log. * -s Stop. Stops any in-progress scrub. * -p Pause. Pause in-progress scrub. + * -R Scrub only recent data. * -w Wait. Blocks until scrub has completed. */ int @@ -8415,9 +8416,10 @@ zpool_do_scrub(int argc, char **argv) boolean_t is_pause = B_FALSE; boolean_t is_stop = B_FALSE; boolean_t is_txg_continue = B_FALSE; + boolean_t is_recent_scrub = B_FALSE; /* check options */ - while ((c = getopt(argc, argv, "spweC")) != -1) { + while ((c = getopt(argc, argv, "spweCR")) != -1) { switch (c) { case 'C': is_txg_continue = B_TRUE; @@ -8431,6 +8433,9 @@ zpool_do_scrub(int argc, char **argv) case 'p': is_pause = B_TRUE; break; + case 'R': + is_recent_scrub = B_TRUE; + break; case 'w': wait = B_TRUE; break; @@ -8457,11 +8462,26 @@ zpool_do_scrub(int argc, char **argv) (void) fprintf(stderr, gettext("invalid option " "combination :-e and -C are mutually exclusive\n")); usage(B_FALSE); + } else if (is_pause && is_recent_scrub) { + (void) fprintf(stderr, gettext("invalid option " + "combination :-p and -R are mutually exclusive\n")); + usage(B_FALSE); + } else if (is_stop && is_recent_scrub) { + (void) fprintf(stderr, gettext("invalid option " + "combination :-s and -R are mutually exclusive\n")); + usage(B_FALSE); + } else if (is_error_scrub && is_recent_scrub) { + (void) fprintf(stderr, gettext("invalid option " + "combination :-e and -R are mutually exclusive\n")); + usage(B_FALSE); } else { - if (is_error_scrub) + if (is_error_scrub) { cb.cb_type = POOL_SCAN_ERRORSCRUB; + } - if (is_pause) { + if (is_recent_scrub) { + cb.cb_scrub_cmd = POOL_SCRUB_RECENT_TXGS; + } else if (is_pause) { cb.cb_scrub_cmd = POOL_SCRUB_PAUSE; } else if (is_stop) { cb.cb_type = POOL_SCAN_NONE; diff --git a/include/sys/dsl_scan.h b/include/sys/dsl_scan.h index d9f2c1be8b..5cc99ad9d1 100644 --- a/include/sys/dsl_scan.h +++ b/include/sys/dsl_scan.h @@ -44,6 +44,7 @@ struct dsl_pool; struct dmu_tx; extern int zfs_scan_suspend_progress; +extern uint_t zfs_scrub_recent_txgs; /* * All members of this structure must be uint64_t, for byteswap diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 69441066e6..78133d01a0 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -1077,6 +1077,7 @@ typedef enum pool_scrub_cmd { POOL_SCRUB_NORMAL = 0, POOL_SCRUB_PAUSE, POOL_SCRUB_FROM_LAST_TXG, + POOL_SCRUB_RECENT_TXGS, POOL_SCRUB_FLAGS_END } pool_scrub_cmd_t; diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 0edcc8adf7..873234733d 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -183,8 +183,8 @@ - + @@ -466,6 +466,7 @@ + @@ -485,8 +486,8 @@ - + @@ -529,7 +530,6 @@ - @@ -5809,7 +5809,8 @@ - + + @@ -7839,7 +7840,7 @@ - + @@ -7858,6 +7859,9 @@ + + + @@ -7867,6 +7871,15 @@ + + + + + + + + + @@ -7970,6 +7983,11 @@ + + + + + @@ -8077,6 +8095,11 @@ + + + + + @@ -8095,6 +8118,11 @@ + + + + + @@ -8294,12 +8322,12 @@ - - - + + + @@ -8804,11 +8832,6 @@ - - - - - diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index 20bb95c1ae..f09b90665d 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -1986,6 +1986,9 @@ working on a scrub between TXG flushes. .It Sy zfs_scrub_error_blocks_per_txg Ns = Ns Sy 4096 Pq uint Error blocks to be scrubbed in one txg. . +.It Sy zfs_scrub_recent_txgs Ns = Ns Sy 256 Pq uint +Number of txgs to be considered as recent when performing a recent data scrub. +. .It Sy zfs_scan_checkpoint_intval Ns = Ns Sy 7200 Ns s Po 2 hour Pc Pq uint To preserve progress across reboots, the sequential scan algorithm periodically needs to stop metadata scanning and issue all the verification I/O to disk. diff --git a/man/man8/zpool-scrub.8 b/man/man8/zpool-scrub.8 index 023ec33fb3..39d14eecbc 100644 --- a/man/man8/zpool-scrub.8 +++ b/man/man8/zpool-scrub.8 @@ -40,6 +40,7 @@ .Op Fl w .Op Fl e .Op Fl C +.Op Fl R .Ar pool Ns … . .Sh DESCRIPTION @@ -119,6 +120,10 @@ resilvering, nor can it be run when a regular scrub is paused. Continue scrub from last saved txg (see zpool .Sy last_scrubbed_txg property). +.It Fl R +Scrub only recent data (this can be controlled by the +.Sy zfs_scrub_recent_txgs +parameter). .El .Sh EXAMPLES .Ss Example 1 diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 76db3b55c6..6f90011893 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -239,6 +239,9 @@ static int zfs_free_bpobj_enabled = 1; /* Error blocks to be scrubbed in one txg. */ static uint_t zfs_scrub_error_blocks_per_txg = 1 << 12; +/* The number of TXGs should be scrubbed while scrubbing recent data. */ +uint_t zfs_scrub_recent_txgs = 256; + /* the order has to match pool_scan_type */ static scan_cb_t *scan_funcs[POOL_SCAN_FUNCS] = { NULL, @@ -5323,4 +5326,7 @@ ZFS_MODULE_PARAM(zfs, zfs_, resilver_disable_defer, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, scrub_error_blocks_per_txg, UINT, ZMOD_RW, "Error blocks to be scrubbed in one txg"); + +ZFS_MODULE_PARAM(zfs, zfs_, scrub_recent_txgs, UINT, ZMOD_RW, + "The number of TXGs should be scrubbed while scrubbing recent data"); /* END CSTYLED */ diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 0b6683ffb9..6eddf2d742 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -1722,6 +1722,16 @@ zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) } else if (scan_cmd == POOL_SCRUB_FROM_LAST_TXG) { error = spa_scan_range(spa, scan_type, spa_get_last_scrubbed_txg(spa), 0); + } else if (scan_cmd == POOL_SCRUB_RECENT_TXGS) { + uint64_t start; + + start = 0; + if (spa_last_synced_txg(spa) > zfs_scrub_recent_txgs) { + start = spa_last_synced_txg(spa) - + zfs_scrub_recent_txgs; + } + + error = spa_scan_range(spa, scan_type, start, 0); } else { error = spa_scan(spa, scan_type); }