Add a "zstream decompress" subcommand
It can be used to repair a ZFS file system corrupted by ZFS bug #12762. Use it like this: zfs send -c <DS> | \ zstream decompress <OBJECT>,<OFFSET>[,<COMPRESSION_ALGO>] ... | \ zfs recv <DST_DS> Reviewed-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Allan Jude <allan@klarasystems.com> Signed-off-by: Alan Somers <asomers@gmail.com> Sponsored-by: Axcient Workaround for #12762 Closes #13256
This commit is contained in:
parent
1c0c729ab4
commit
ccf89b39fe
|
@ -4,6 +4,7 @@ CPPCHECKTARGETS += zstream
|
|||
zstream_SOURCES = \
|
||||
%D%/zstream.c \
|
||||
%D%/zstream.h \
|
||||
%D%/zstream_decompress.c \
|
||||
%D%/zstream_dump.c \
|
||||
%D%/zstream_redup.c \
|
||||
%D%/zstream_token.c
|
||||
|
@ -11,6 +12,7 @@ zstream_SOURCES = \
|
|||
zstream_LDADD = \
|
||||
libzfs.la \
|
||||
libzfs_core.la \
|
||||
libzpool.la \
|
||||
libnvpair.la
|
||||
|
||||
PHONY += install-exec-hook
|
||||
|
|
|
@ -40,6 +40,8 @@ zstream_usage(void)
|
|||
"\tzstream dump [-vCd] FILE\n"
|
||||
"\t... | zstream dump [-vCd]\n"
|
||||
"\n"
|
||||
"\tzstream decompress [-v] [OBJECT,OFFSET[,TYPE]] ...\n"
|
||||
"\n"
|
||||
"\tzstream token resume_token\n"
|
||||
"\n"
|
||||
"\tzstream redup [-v] FILE | ...\n");
|
||||
|
@ -61,6 +63,8 @@ main(int argc, char *argv[])
|
|||
|
||||
if (strcmp(subcommand, "dump") == 0) {
|
||||
return (zstream_do_dump(argc - 1, argv + 1));
|
||||
} else if (strcmp(subcommand, "decompress") == 0) {
|
||||
return (zstream_do_decompress(argc - 1, argv + 1));
|
||||
} else if (strcmp(subcommand, "token") == 0) {
|
||||
return (zstream_do_token(argc - 1, argv + 1));
|
||||
} else if (strcmp(subcommand, "redup") == 0) {
|
||||
|
|
|
@ -24,8 +24,12 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern void *safe_calloc(size_t n);
|
||||
extern int sfread(void *buf, size_t size, FILE *fp);
|
||||
extern void *safe_malloc(size_t size);
|
||||
extern int zstream_do_redup(int, char *[]);
|
||||
extern int zstream_do_dump(int, char *[]);
|
||||
extern int zstream_do_decompress(int argc, char *argv[]);
|
||||
extern int zstream_do_token(int, char *[]);
|
||||
extern void zstream_usage(void);
|
||||
|
||||
|
|
|
@ -0,0 +1,359 @@
|
|||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2022 Axcient. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#include <err.h>
|
||||
#include <search.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/zfs_ioctl.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
#include <sys/zstd/zstd.h>
|
||||
#include "zfs_fletcher.h"
|
||||
#include "zstream.h"
|
||||
|
||||
static int
|
||||
dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
|
||||
zio_cksum_t *zc, int outfd)
|
||||
{
|
||||
assert(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum)
|
||||
== sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
|
||||
fletcher_4_incremental_native(drr,
|
||||
offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
|
||||
if (drr->drr_type != DRR_BEGIN) {
|
||||
assert(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
|
||||
drr_checksum.drr_checksum));
|
||||
drr->drr_u.drr_checksum.drr_checksum = *zc;
|
||||
}
|
||||
fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
|
||||
sizeof (zio_cksum_t), zc);
|
||||
if (write(outfd, drr, sizeof (*drr)) == -1)
|
||||
return (errno);
|
||||
if (payload_len != 0) {
|
||||
fletcher_4_incremental_native(payload, payload_len, zc);
|
||||
if (write(outfd, payload, payload_len) == -1)
|
||||
return (errno);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
zstream_do_decompress(int argc, char *argv[])
|
||||
{
|
||||
const int KEYSIZE = 64;
|
||||
int bufsz = SPA_MAXBLOCKSIZE;
|
||||
char *buf = safe_malloc(bufsz);
|
||||
dmu_replay_record_t thedrr;
|
||||
dmu_replay_record_t *drr = &thedrr;
|
||||
zio_cksum_t stream_cksum;
|
||||
int c;
|
||||
boolean_t verbose = B_FALSE;
|
||||
|
||||
while ((c = getopt(argc, argv, "v")) != -1) {
|
||||
switch (c) {
|
||||
case 'v':
|
||||
verbose = B_TRUE;
|
||||
break;
|
||||
case '?':
|
||||
(void) fprintf(stderr, "invalid option '%c'\n",
|
||||
optopt);
|
||||
zstream_usage();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
argc -= optind;
|
||||
argv += optind;
|
||||
|
||||
if (argc < 0)
|
||||
zstream_usage();
|
||||
|
||||
if (hcreate(argc) == 0)
|
||||
errx(1, "hcreate");
|
||||
for (int i = 0; i < argc; i++) {
|
||||
uint64_t object, offset;
|
||||
char *obj_str;
|
||||
char *offset_str;
|
||||
char *key;
|
||||
char *end;
|
||||
enum zio_compress type = ZIO_COMPRESS_LZ4;
|
||||
|
||||
obj_str = strsep(&argv[i], ",");
|
||||
if (argv[i] == NULL) {
|
||||
zstream_usage();
|
||||
exit(2);
|
||||
}
|
||||
errno = 0;
|
||||
object = strtoull(obj_str, &end, 0);
|
||||
if (errno || *end != '\0')
|
||||
errx(1, "invalid value for object");
|
||||
offset_str = strsep(&argv[i], ",");
|
||||
offset = strtoull(offset_str, &end, 0);
|
||||
if (errno || *end != '\0')
|
||||
errx(1, "invalid value for offset");
|
||||
if (argv[i]) {
|
||||
if (0 == strcmp("lz4", argv[i]))
|
||||
type = ZIO_COMPRESS_LZ4;
|
||||
else if (0 == strcmp("lzjb", argv[i]))
|
||||
type = ZIO_COMPRESS_LZJB;
|
||||
else if (0 == strcmp("gzip", argv[i]))
|
||||
type = ZIO_COMPRESS_GZIP_1;
|
||||
else if (0 == strcmp("zle", argv[i]))
|
||||
type = ZIO_COMPRESS_ZLE;
|
||||
else if (0 == strcmp("zstd", argv[i]))
|
||||
type = ZIO_COMPRESS_ZSTD;
|
||||
else {
|
||||
fprintf(stderr, "Invalid compression type %s.\n"
|
||||
"Supported types are lz4, lzjb, gzip, zle, "
|
||||
"and zstd\n",
|
||||
argv[i]);
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
|
||||
if (asprintf(&key, "%llu,%llu", (u_longlong_t)object,
|
||||
(u_longlong_t)offset) < 0) {
|
||||
err(1, "asprintf");
|
||||
}
|
||||
ENTRY e = {.key = key};
|
||||
ENTRY *p;
|
||||
|
||||
p = hsearch(e, ENTER);
|
||||
if (p == NULL)
|
||||
errx(1, "hsearch");
|
||||
p->data = (void*)type;
|
||||
}
|
||||
|
||||
if (isatty(STDIN_FILENO)) {
|
||||
(void) fprintf(stderr,
|
||||
"Error: The send stream is a binary format "
|
||||
"and can not be read from a\n"
|
||||
"terminal. Standard input must be redirected.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fletcher_4_init();
|
||||
while (sfread(drr, sizeof (*drr), stdin) != 0) {
|
||||
struct drr_write *drrw;
|
||||
uint64_t payload_size = 0;
|
||||
|
||||
/*
|
||||
* We need to regenerate the checksum.
|
||||
*/
|
||||
if (drr->drr_type != DRR_BEGIN) {
|
||||
memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
|
||||
sizeof (drr->drr_u.drr_checksum.drr_checksum));
|
||||
}
|
||||
|
||||
switch (drr->drr_type) {
|
||||
case DRR_BEGIN:
|
||||
{
|
||||
ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
|
||||
|
||||
int sz = drr->drr_payloadlen;
|
||||
if (sz != 0) {
|
||||
if (sz > bufsz) {
|
||||
buf = realloc(buf, sz);
|
||||
if (buf == NULL)
|
||||
err(1, "realloc");
|
||||
bufsz = sz;
|
||||
}
|
||||
(void) sfread(buf, sz, stdin);
|
||||
}
|
||||
payload_size = sz;
|
||||
break;
|
||||
}
|
||||
case DRR_END:
|
||||
{
|
||||
struct drr_end *drre = &drr->drr_u.drr_end;
|
||||
/*
|
||||
* Use the recalculated checksum, unless this is
|
||||
* the END record of a stream package, which has
|
||||
* no checksum.
|
||||
*/
|
||||
if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum))
|
||||
drre->drr_checksum = stream_cksum;
|
||||
break;
|
||||
}
|
||||
|
||||
case DRR_OBJECT:
|
||||
{
|
||||
struct drr_object *drro = &drr->drr_u.drr_object;
|
||||
|
||||
if (drro->drr_bonuslen > 0) {
|
||||
payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
|
||||
(void) sfread(buf, payload_size, stdin);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case DRR_SPILL:
|
||||
{
|
||||
struct drr_spill *drrs = &drr->drr_u.drr_spill;
|
||||
payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs);
|
||||
(void) sfread(buf, payload_size, stdin);
|
||||
break;
|
||||
}
|
||||
|
||||
case DRR_WRITE_BYREF:
|
||||
fprintf(stderr,
|
||||
"Deduplicated streams are not supported\n");
|
||||
exit(1);
|
||||
break;
|
||||
|
||||
case DRR_WRITE:
|
||||
{
|
||||
drrw = &thedrr.drr_u.drr_write;
|
||||
payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
|
||||
ENTRY *p;
|
||||
char key[KEYSIZE];
|
||||
|
||||
snprintf(key, KEYSIZE, "%llu,%llu",
|
||||
(u_longlong_t)drrw->drr_object,
|
||||
(u_longlong_t)drrw->drr_offset);
|
||||
ENTRY e = {.key = key};
|
||||
|
||||
p = hsearch(e, FIND);
|
||||
if (p != NULL) {
|
||||
zio_decompress_func_t *xfunc = NULL;
|
||||
switch ((enum zio_compress)(intptr_t)p->data) {
|
||||
case ZIO_COMPRESS_LZJB:
|
||||
xfunc = lzjb_decompress;
|
||||
break;
|
||||
case ZIO_COMPRESS_GZIP_1:
|
||||
xfunc = gzip_decompress;
|
||||
break;
|
||||
case ZIO_COMPRESS_ZLE:
|
||||
xfunc = zle_decompress;
|
||||
break;
|
||||
case ZIO_COMPRESS_LZ4:
|
||||
xfunc = lz4_decompress_zfs;
|
||||
break;
|
||||
case ZIO_COMPRESS_ZSTD:
|
||||
xfunc = zfs_zstd_decompress;
|
||||
break;
|
||||
default:
|
||||
assert(B_FALSE);
|
||||
}
|
||||
assert(xfunc != NULL);
|
||||
|
||||
|
||||
/*
|
||||
* Read and decompress the block
|
||||
*/
|
||||
char *lzbuf = safe_calloc(payload_size);
|
||||
(void) sfread(lzbuf, payload_size, stdin);
|
||||
if (0 != xfunc(lzbuf, buf,
|
||||
payload_size, payload_size, 0)) {
|
||||
/*
|
||||
* The block must not be compressed,
|
||||
* possibly because it gets written
|
||||
* multiple times in this stream.
|
||||
*/
|
||||
warnx("decompression failed for "
|
||||
"ino %llu offset %llu",
|
||||
(u_longlong_t)drrw->drr_object,
|
||||
(u_longlong_t)drrw->drr_offset);
|
||||
memcpy(buf, lzbuf, payload_size);
|
||||
} else if (verbose) {
|
||||
fprintf(stderr, "successfully "
|
||||
"decompressed ino %llu "
|
||||
"offset %llu\n",
|
||||
(u_longlong_t)drrw->drr_object,
|
||||
(u_longlong_t)drrw->drr_offset);
|
||||
}
|
||||
free(lzbuf);
|
||||
} else {
|
||||
/*
|
||||
* Read the contents of the block unaltered
|
||||
*/
|
||||
(void) sfread(buf, payload_size, stdin);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case DRR_WRITE_EMBEDDED:
|
||||
{
|
||||
struct drr_write_embedded *drrwe =
|
||||
&drr->drr_u.drr_write_embedded;
|
||||
payload_size =
|
||||
P2ROUNDUP((uint64_t)drrwe->drr_psize, 8);
|
||||
(void) sfread(buf, payload_size, stdin);
|
||||
break;
|
||||
}
|
||||
|
||||
case DRR_FREEOBJECTS:
|
||||
case DRR_FREE:
|
||||
case DRR_OBJECT_RANGE:
|
||||
break;
|
||||
|
||||
default:
|
||||
(void) fprintf(stderr, "INVALID record type 0x%x\n",
|
||||
drr->drr_type);
|
||||
/* should never happen, so assert */
|
||||
assert(B_FALSE);
|
||||
}
|
||||
|
||||
if (feof(stdout)) {
|
||||
fprintf(stderr, "Error: unexpected end-of-file\n");
|
||||
exit(1);
|
||||
}
|
||||
if (ferror(stdout)) {
|
||||
fprintf(stderr, "Error while reading file: %s\n",
|
||||
strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to recalculate the checksum, and it needs to be
|
||||
* initially zero to do that. BEGIN records don't have
|
||||
* a checksum.
|
||||
*/
|
||||
if (drr->drr_type != DRR_BEGIN) {
|
||||
memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
|
||||
sizeof (drr->drr_u.drr_checksum.drr_checksum));
|
||||
}
|
||||
if (dump_record(drr, buf, payload_size,
|
||||
&stream_cksum, STDOUT_FILENO) != 0)
|
||||
break;
|
||||
if (drr->drr_type == DRR_END) {
|
||||
/*
|
||||
* Typically the END record is either the last
|
||||
* thing in the stream, or it is followed
|
||||
* by a BEGIN record (which also zeros the checksum).
|
||||
* However, a stream package ends with two END
|
||||
* records. The last END record's checksum starts
|
||||
* from zero.
|
||||
*/
|
||||
ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
|
||||
}
|
||||
}
|
||||
free(buf);
|
||||
fletcher_4_fini();
|
||||
hdestroy();
|
||||
|
||||
return (0);
|
||||
}
|
|
@ -59,7 +59,7 @@ FILE *send_stream = 0;
|
|||
boolean_t do_byteswap = B_FALSE;
|
||||
boolean_t do_cksum = B_TRUE;
|
||||
|
||||
static void *
|
||||
void *
|
||||
safe_malloc(size_t size)
|
||||
{
|
||||
void *rv = malloc(size);
|
||||
|
|
|
@ -65,7 +65,7 @@ highbit64(uint64_t i)
|
|||
return (NBBY * sizeof (uint64_t) - __builtin_clzll(i));
|
||||
}
|
||||
|
||||
static void *
|
||||
void *
|
||||
safe_calloc(size_t n)
|
||||
{
|
||||
void *rv = calloc(1, n);
|
||||
|
@ -81,7 +81,7 @@ safe_calloc(size_t n)
|
|||
/*
|
||||
* Safe version of fread(), exits on error.
|
||||
*/
|
||||
static int
|
||||
int
|
||||
sfread(void *buf, size_t size, FILE *fp)
|
||||
{
|
||||
int rv = fread(buf, size, 1, fp);
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
.\"
|
||||
.\" Copyright (c) 2020 by Delphix. All rights reserved.
|
||||
.\"
|
||||
.Dd May 8, 2021
|
||||
.Dd March 25, 2022
|
||||
.Dt ZSTREAM 8
|
||||
.Os
|
||||
.
|
||||
|
@ -33,6 +33,10 @@
|
|||
.Op Fl Cvd
|
||||
.Op Ar file
|
||||
.Nm
|
||||
.Cm decompress
|
||||
.Op Fl v
|
||||
.Op Ar object Ns Sy \&, Ns Ar offset Ns Op Sy \&, Ns Ar type Ns ...
|
||||
.Nm
|
||||
.Cm redup
|
||||
.Op Fl v
|
||||
.Ar file
|
||||
|
@ -82,6 +86,36 @@ alias is provided for compatibility and is equivalent to running
|
|||
Dumps zfs resume token information
|
||||
.It Xo
|
||||
.Nm
|
||||
.Cm decompress
|
||||
.Op Fl v
|
||||
.Op Ar object Ns Sy \&, Ns Ar offset Ns Op Sy \&, Ns Ar type Ns ...
|
||||
.Xc
|
||||
Decompress selected records in a ZFS send stream provided on standard input,
|
||||
when the compression type recorded in ZFS metadata may be incorrect.
|
||||
Specify the object number and byte offset of each record that you wish to
|
||||
decompress.
|
||||
Optionally specify the compression type.
|
||||
Valid compression types include
|
||||
.Sy gzip ,
|
||||
.Sy lz4 ,
|
||||
.Sy lzjb ,
|
||||
.Sy zstd ,
|
||||
and
|
||||
.Sy zle .
|
||||
The default is
|
||||
.Sy lz4 .
|
||||
Every record for that object beginning at that offset will be decompressed, if
|
||||
possible.
|
||||
It may not be possible, because the record may be corrupted in some but not
|
||||
all of the stream's snapshots.
|
||||
The repaired stream will be written to standard output.
|
||||
.Bl -tag -width "-v"
|
||||
.It Fl v
|
||||
Verbose.
|
||||
Print summary of decompressed records.
|
||||
.El
|
||||
.It Xo
|
||||
.Nm
|
||||
.Cm redup
|
||||
.Op Fl v
|
||||
.Ar file
|
||||
|
@ -111,7 +145,24 @@ Print summary of converted records.
|
|||
.El
|
||||
.El
|
||||
.
|
||||
.Sh EXAMPLES
|
||||
Heal a dataset that was corrupted due to OpenZFS bug #12762.
|
||||
First, determine which records are corrupt.
|
||||
That cannot be done automatically; it requires information beyond ZFS's
|
||||
metadata.
|
||||
If object
|
||||
.Sy 128
|
||||
is corrupted at offset
|
||||
.Sy 0
|
||||
and is compressed using
|
||||
.Sy lz4 ,
|
||||
then run this command:
|
||||
.Bd -literal
|
||||
.No # Nm zfs Ar send Fl c Ar … | Nm zstream decompress Ar 128,0,lz4 | \
|
||||
Nm zfs recv Ar …
|
||||
.Ed
|
||||
.Sh SEE ALSO
|
||||
.Xr zfs 8 ,
|
||||
.Xr zfs-receive 8 ,
|
||||
.Xr zfs-send 8
|
||||
.Xr zfs-send 8 ,
|
||||
.Lk https://github.com/openzfs/zfs/issues/12762
|
||||
|
|
Loading…
Reference in New Issue