ztest: split block reconstruction
Increase the default allowed number of reconstruction attempts. There's not an exact right number for this setting. It needs to be set large enough to cover any realistic failure scenarios and small enough to avoid stalling the IO pipeline and invoking the dead man detection. The current value of 256 was empirically determined to be too low based on multi-day runs of ztest. The fault injection code would inject more damage than could be reconstructed given the relatively small number of attempts. However, in all observed cases the block could be reconstructed using a slightly higher limit. Based on local testing increasing the default value to 4096 was determined to strike the best balance. Checking all combinations takes less than 10s in the worst case, and has so far eliminated the vast majority of false positives detected by ztest. This delay is roughly on par with how long retries may be performed to a misbehaving HDD and was deemed to be reasonable. Better to err on the side of a brief delay rather than fail to reconstruct the data. Lastly, the -Y flag has been added to zdb to make it easy to try all possible combinations when performing split block reconstruction. For badly damaged blocks with 18 splits, they can be fully enumerated within a few minutes. This has been done to ensure permanent errors are never incorrectly reported when ztest verifies the pool with zdb. Reviewed by: Tom Caputi <tcaputi@datto.com> Reviewed by: Matt Ahrens <mahrens@delphix.com> Reviewed by: Serapheim Dimitropoulos <serapheim@delphix.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #8271
This commit is contained in:
parent
db587941c5
commit
64bdf63f5c
|
@ -100,6 +100,7 @@ extern int zfs_recover;
|
||||||
extern uint64_t zfs_arc_max, zfs_arc_meta_limit;
|
extern uint64_t zfs_arc_max, zfs_arc_meta_limit;
|
||||||
extern int zfs_vdev_async_read_max_active;
|
extern int zfs_vdev_async_read_max_active;
|
||||||
extern boolean_t spa_load_verify_dryrun;
|
extern boolean_t spa_load_verify_dryrun;
|
||||||
|
extern int zfs_reconstruct_indirect_combinations_max;
|
||||||
|
|
||||||
static const char cmdname[] = "zdb";
|
static const char cmdname[] = "zdb";
|
||||||
uint8_t dump_opt[256];
|
uint8_t dump_opt[256];
|
||||||
|
@ -215,6 +216,8 @@ usage(void)
|
||||||
"dump all read blocks into specified directory\n");
|
"dump all read blocks into specified directory\n");
|
||||||
(void) fprintf(stderr, " -X attempt extreme rewind (does not "
|
(void) fprintf(stderr, " -X attempt extreme rewind (does not "
|
||||||
"work with dataset)\n");
|
"work with dataset)\n");
|
||||||
|
(void) fprintf(stderr, " -Y attempt all reconstruction "
|
||||||
|
"combinations for split blocks\n");
|
||||||
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
|
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
|
||||||
"to make only that option verbose\n");
|
"to make only that option verbose\n");
|
||||||
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
|
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
|
||||||
|
@ -5871,7 +5874,7 @@ main(int argc, char **argv)
|
||||||
spa_config_path = spa_config_path_env;
|
spa_config_path = spa_config_path_env;
|
||||||
|
|
||||||
while ((c = getopt(argc, argv,
|
while ((c = getopt(argc, argv,
|
||||||
"AbcCdDeEFGhiI:klLmMo:Op:PqRsSt:uU:vVx:X")) != -1) {
|
"AbcCdDeEFGhiI:klLmMo:Op:PqRsSt:uU:vVx:XY")) != -1) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 'b':
|
case 'b':
|
||||||
case 'c':
|
case 'c':
|
||||||
|
@ -5903,6 +5906,10 @@ main(int argc, char **argv)
|
||||||
case 'X':
|
case 'X':
|
||||||
dump_opt[c]++;
|
dump_opt[c]++;
|
||||||
break;
|
break;
|
||||||
|
case 'Y':
|
||||||
|
zfs_reconstruct_indirect_combinations_max = INT_MAX;
|
||||||
|
zfs_deadman_enabled = 0;
|
||||||
|
break;
|
||||||
/* NB: Sort single match options below. */
|
/* NB: Sort single match options below. */
|
||||||
case 'I':
|
case 'I':
|
||||||
max_inflight = strtoull(optarg, NULL, 0);
|
max_inflight = strtoull(optarg, NULL, 0);
|
||||||
|
|
|
@ -6447,8 +6447,7 @@ ztest_run_zdb(char *pool)
|
||||||
ztest_get_zdb_bin(bin, len);
|
ztest_get_zdb_bin(bin, len);
|
||||||
|
|
||||||
(void) sprintf(zdb,
|
(void) sprintf(zdb,
|
||||||
"%s -bcc%s%s -G -d -U %s "
|
"%s -bcc%s%s -G -d -Y -U %s %s",
|
||||||
"-o zfs_reconstruct_indirect_combinations_max=65536 %s",
|
|
||||||
bin,
|
bin,
|
||||||
ztest_opts.zo_verbose >= 3 ? "s" : "",
|
ztest_opts.zo_verbose >= 3 ? "s" : "",
|
||||||
ztest_opts.zo_verbose >= 4 ? "v" : "",
|
ztest_opts.zo_verbose >= 4 ? "v" : "",
|
||||||
|
|
|
@ -2025,7 +2025,7 @@ combinations each time the block is accessed. This allows all segment
|
||||||
copies to participate fairly in the reconstruction when all combinations
|
copies to participate fairly in the reconstruction when all combinations
|
||||||
cannot be checked and prevents repeated use of one bad copy.
|
cannot be checked and prevents repeated use of one bad copy.
|
||||||
.sp
|
.sp
|
||||||
Default value: \fB256\fR.
|
Default value: \fB4096\fR.
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
.Nd display zpool debugging and consistency information
|
.Nd display zpool debugging and consistency information
|
||||||
.Sh SYNOPSIS
|
.Sh SYNOPSIS
|
||||||
.Nm
|
.Nm
|
||||||
.Op Fl AbcdDFGhikLMPsvX
|
.Op Fl AbcdDFGhikLMPsvXY
|
||||||
.Op Fl e Oo Fl V Oc Op Fl p Ar path ...
|
.Op Fl e Oo Fl V Oc Op Fl p Ar path ...
|
||||||
.Op Fl I Ar inflight I/Os
|
.Op Fl I Ar inflight I/Os
|
||||||
.Oo Fl o Ar var Ns = Ns Ar value Oc Ns ...
|
.Oo Fl o Ar var Ns = Ns Ar value Oc Ns ...
|
||||||
|
@ -50,7 +50,7 @@
|
||||||
.Ar device
|
.Ar device
|
||||||
.Nm
|
.Nm
|
||||||
.Fl m
|
.Fl m
|
||||||
.Op Fl AFLPX
|
.Op Fl AFLPXY
|
||||||
.Op Fl e Oo Fl V Oc Op Fl p Ar path ...
|
.Op Fl e Oo Fl V Oc Op Fl p Ar path ...
|
||||||
.Op Fl t Ar txg
|
.Op Fl t Ar txg
|
||||||
.Op Fl U Ar cache
|
.Op Fl U Ar cache
|
||||||
|
@ -349,6 +349,10 @@ Attempt
|
||||||
transaction rewind, that is attempt the same recovery as
|
transaction rewind, that is attempt the same recovery as
|
||||||
.Fl F
|
.Fl F
|
||||||
but read transactions otherwise deemed too old.
|
but read transactions otherwise deemed too old.
|
||||||
|
.It Fl Y
|
||||||
|
Attempt all possible combinations when reconstructing indirect split blocks.
|
||||||
|
This flag disables the individual I/O deadman timer in order to allow as
|
||||||
|
much time as required for the attempted reconstruction.
|
||||||
.El
|
.El
|
||||||
.Pp
|
.Pp
|
||||||
Specifying a display option more than once enables verbosity for only that
|
Specifying a display option more than once enables verbosity for only that
|
||||||
|
|
|
@ -213,8 +213,7 @@ int zfs_condense_indirect_commit_entry_delay_ms = 0;
|
||||||
* copies to participate fairly in the reconstruction when all combinations
|
* copies to participate fairly in the reconstruction when all combinations
|
||||||
* cannot be checked and prevents repeated use of one bad copy.
|
* cannot be checked and prevents repeated use of one bad copy.
|
||||||
*/
|
*/
|
||||||
int zfs_reconstruct_indirect_combinations_max = 256;
|
int zfs_reconstruct_indirect_combinations_max = 4096;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Enable to simulate damaged segments and validate reconstruction. This
|
* Enable to simulate damaged segments and validate reconstruction. This
|
||||||
|
|
Loading…
Reference in New Issue