Illumos 4891 - want zdb option to dump all metadata
4891 want zdb option to dump all metadata Reviewed by: Sonu Pillai <sonu.pillai@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Dan McDonald <danmcd@omniti.com> Reviewed by: Richard Lowe <richlowe@richlowe.net> Approved by: Garrett D'Amore <garrett@damore.org> We'd like a way for zdb to dump metadata in a machine-readable format, so that we can bring that back from a customer site for in-house diagnosis. Think of it as a crash dump for zpools, which can be used for post-mortem analysis of a malfunctioning pool References: https://www.illumos.org/issues/4891 https://github.com/illumos/illumos-gate/commit/df15e41 Porting notes: - [cmd/zdb/zdb.c] -a5778ea
zdb: Introduce -V for verbatim import - In main() getopt 'opt' variable removed and the code was brought back in line with illumos. - [lib/libzpool/kernel.c] -1e33ac1
Fix Solaris thread dependency by using pthreads -f0e324f
Update utsname support -4d58b69
Fix vn_open/vn_rdwr error handling - In vn_open() allocate 'dumppath' on heap instead of stack - Properly handle 'dump_fd == -1' error path - Free 'realpath' after added vn_dumpdir_code block Ported-by: kernelOfTruth kerneloftruth@gmail.com Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
parent
f3c9dca093
commit
9867e8be2a
|
@ -118,7 +118,7 @@ usage(void)
|
||||||
{
|
{
|
||||||
(void) fprintf(stderr,
|
(void) fprintf(stderr,
|
||||||
"Usage: %s [-CumMdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
|
"Usage: %s [-CumMdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
|
||||||
"[-U config] [-I inflight I/Os] poolname [object...]\n"
|
"[-U config] [-I inflight I/Os] [-x dumpdir] poolname [object...]\n"
|
||||||
" %s [-divPA] [-e -p path...] [-U config] dataset "
|
" %s [-divPA] [-e -p path...] [-U config] dataset "
|
||||||
"[object...]\n"
|
"[object...]\n"
|
||||||
" %s -mM [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
|
" %s -mM [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
|
||||||
|
@ -157,7 +157,7 @@ usage(void)
|
||||||
(void) fprintf(stderr, " -R read and display block from a "
|
(void) fprintf(stderr, " -R read and display block from a "
|
||||||
"device\n\n");
|
"device\n\n");
|
||||||
(void) fprintf(stderr, " Below options are intended for use "
|
(void) fprintf(stderr, " Below options are intended for use "
|
||||||
"with other options (except -l):\n");
|
"with other options:\n");
|
||||||
(void) fprintf(stderr, " -A ignore assertions (-A), enable "
|
(void) fprintf(stderr, " -A ignore assertions (-A), enable "
|
||||||
"panic recovery (-AA) or both (-AAA)\n");
|
"panic recovery (-AA) or both (-AAA)\n");
|
||||||
(void) fprintf(stderr, " -F attempt automatic rewind within "
|
(void) fprintf(stderr, " -F attempt automatic rewind within "
|
||||||
|
@ -170,12 +170,14 @@ usage(void)
|
||||||
"has altroot/not in a cachefile\n");
|
"has altroot/not in a cachefile\n");
|
||||||
(void) fprintf(stderr, " -p <path> -- use one or more with "
|
(void) fprintf(stderr, " -p <path> -- use one or more with "
|
||||||
"-e to specify path to vdev dir\n");
|
"-e to specify path to vdev dir\n");
|
||||||
|
(void) fprintf(stderr, " -x <dumpdir> -- "
|
||||||
|
"dump all read blocks into specified directory\n");
|
||||||
(void) fprintf(stderr, " -P print numbers in parseable form\n");
|
(void) fprintf(stderr, " -P print numbers in parseable form\n");
|
||||||
(void) fprintf(stderr, " -t <txg> -- highest txg to use when "
|
(void) fprintf(stderr, " -t <txg> -- highest txg to use when "
|
||||||
"searching for uberblocks\n");
|
"searching for uberblocks\n");
|
||||||
(void) fprintf(stderr, " -I <number of inflight I/Os> -- "
|
(void) fprintf(stderr, " -I <number of inflight I/Os> -- "
|
||||||
"specify the maximum number of checksumming I/Os "
|
"specify the maximum number of "
|
||||||
"[default is 200]\n");
|
"checksumming I/Os [default is 200]\n");
|
||||||
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
|
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
|
||||||
"to make only that option verbose\n");
|
"to make only that option verbose\n");
|
||||||
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
|
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
|
||||||
|
@ -3626,7 +3628,6 @@ main(int argc, char **argv)
|
||||||
int flags = ZFS_IMPORT_MISSING_LOG;
|
int flags = ZFS_IMPORT_MISSING_LOG;
|
||||||
int rewind = ZPOOL_NEVER_REWIND;
|
int rewind = ZPOOL_NEVER_REWIND;
|
||||||
char *spa_config_path_env;
|
char *spa_config_path_env;
|
||||||
const char *opts = "bcdhilmMI:suCDRSAFLXevp:t:U:PV";
|
|
||||||
boolean_t target_is_spa = B_TRUE;
|
boolean_t target_is_spa = B_TRUE;
|
||||||
|
|
||||||
(void) setrlimit(RLIMIT_NOFILE, &rl);
|
(void) setrlimit(RLIMIT_NOFILE, &rl);
|
||||||
|
@ -3643,7 +3644,8 @@ main(int argc, char **argv)
|
||||||
if (spa_config_path_env != NULL)
|
if (spa_config_path_env != NULL)
|
||||||
spa_config_path = spa_config_path_env;
|
spa_config_path = spa_config_path_env;
|
||||||
|
|
||||||
while ((c = getopt(argc, argv, opts)) != -1) {
|
while ((c = getopt(argc, argv,
|
||||||
|
"bcdhilmMI:suCDRSAFLXx:evp:t:U:PV")) != -1) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 'b':
|
case 'b':
|
||||||
case 'c':
|
case 'c':
|
||||||
|
@ -3697,6 +3699,9 @@ main(int argc, char **argv)
|
||||||
}
|
}
|
||||||
searchdirs[nsearch++] = optarg;
|
searchdirs[nsearch++] = optarg;
|
||||||
break;
|
break;
|
||||||
|
case 'x':
|
||||||
|
vn_dumpdir = optarg;
|
||||||
|
break;
|
||||||
case 't':
|
case 't':
|
||||||
max_txg = strtoull(optarg, NULL, 0);
|
max_txg = strtoull(optarg, NULL, 0);
|
||||||
if (max_txg < TXG_INITIAL) {
|
if (max_txg < TXG_INITIAL) {
|
||||||
|
|
|
@ -500,8 +500,10 @@ typedef struct vnode {
|
||||||
uint64_t v_size;
|
uint64_t v_size;
|
||||||
int v_fd;
|
int v_fd;
|
||||||
char *v_path;
|
char *v_path;
|
||||||
|
int v_dump_fd;
|
||||||
} vnode_t;
|
} vnode_t;
|
||||||
|
|
||||||
|
extern char *vn_dumpdir;
|
||||||
#define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */
|
#define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */
|
||||||
|
|
||||||
typedef struct xoptattr {
|
typedef struct xoptattr {
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <zlib.h>
|
#include <zlib.h>
|
||||||
|
#include <libgen.h>
|
||||||
#include <sys/signal.h>
|
#include <sys/signal.h>
|
||||||
#include <sys/spa.h>
|
#include <sys/spa.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
|
@ -50,6 +51,9 @@ char hw_serial[HW_HOSTID_LEN];
|
||||||
struct utsname hw_utsname;
|
struct utsname hw_utsname;
|
||||||
vmem_t *zio_arena = NULL;
|
vmem_t *zio_arena = NULL;
|
||||||
|
|
||||||
|
/* If set, all blocks read will be copied to the specified directory. */
|
||||||
|
char *vn_dumpdir = NULL;
|
||||||
|
|
||||||
/* this only exists to have its address taken */
|
/* this only exists to have its address taken */
|
||||||
struct proc p0;
|
struct proc p0;
|
||||||
|
|
||||||
|
@ -588,6 +592,7 @@ int
|
||||||
vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
|
vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
|
||||||
{
|
{
|
||||||
int fd;
|
int fd;
|
||||||
|
int dump_fd;
|
||||||
vnode_t *vp;
|
vnode_t *vp;
|
||||||
int old_umask = 0;
|
int old_umask = 0;
|
||||||
char *realpath;
|
char *realpath;
|
||||||
|
@ -655,13 +660,31 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
|
||||||
* FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
|
* FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
|
||||||
*/
|
*/
|
||||||
fd = open64(realpath, flags - FREAD, mode);
|
fd = open64(realpath, flags - FREAD, mode);
|
||||||
free(realpath);
|
err = errno;
|
||||||
|
|
||||||
if (flags & FCREAT)
|
if (flags & FCREAT)
|
||||||
(void) umask(old_umask);
|
(void) umask(old_umask);
|
||||||
|
|
||||||
|
if (vn_dumpdir != NULL) {
|
||||||
|
char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL);
|
||||||
|
(void) snprintf(dumppath, MAXPATHLEN,
|
||||||
|
"%s/%s", vn_dumpdir, basename(realpath));
|
||||||
|
dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
|
||||||
|
umem_free(dumppath, MAXPATHLEN);
|
||||||
|
if (dump_fd == -1) {
|
||||||
|
err = errno;
|
||||||
|
free(realpath);
|
||||||
|
close(fd);
|
||||||
|
return (err);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
dump_fd = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(realpath);
|
||||||
|
|
||||||
if (fd == -1)
|
if (fd == -1)
|
||||||
return (errno);
|
return (err);
|
||||||
|
|
||||||
if (fstat64_blk(fd, &st) == -1) {
|
if (fstat64_blk(fd, &st) == -1) {
|
||||||
err = errno;
|
err = errno;
|
||||||
|
@ -676,6 +699,7 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
|
||||||
vp->v_fd = fd;
|
vp->v_fd = fd;
|
||||||
vp->v_size = st.st_size;
|
vp->v_size = st.st_size;
|
||||||
vp->v_path = spa_strdup(path);
|
vp->v_path = spa_strdup(path);
|
||||||
|
vp->v_dump_fd = dump_fd;
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
@ -708,6 +732,11 @@ vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
|
||||||
|
|
||||||
if (uio == UIO_READ) {
|
if (uio == UIO_READ) {
|
||||||
rc = pread64(vp->v_fd, addr, len, offset);
|
rc = pread64(vp->v_fd, addr, len, offset);
|
||||||
|
if (vp->v_dump_fd != -1) {
|
||||||
|
int status =
|
||||||
|
pwrite64(vp->v_dump_fd, addr, rc, offset);
|
||||||
|
ASSERT(status != -1);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* To simulate partial disk writes, we split writes into two
|
* To simulate partial disk writes, we split writes into two
|
||||||
|
@ -750,6 +779,8 @@ void
|
||||||
vn_close(vnode_t *vp)
|
vn_close(vnode_t *vp)
|
||||||
{
|
{
|
||||||
close(vp->v_fd);
|
close(vp->v_fd);
|
||||||
|
if (vp->v_dump_fd != -1)
|
||||||
|
close(vp->v_dump_fd);
|
||||||
spa_strfree(vp->v_path);
|
spa_strfree(vp->v_path);
|
||||||
umem_free(vp, sizeof (vnode_t));
|
umem_free(vp, sizeof (vnode_t));
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
.\"
|
.\"
|
||||||
.\"
|
.\"
|
||||||
.\" Copyright 2012, Richard Lowe.
|
.\" Copyright 2012, Richard Lowe.
|
||||||
.\" Copyright (c) 2012 by Delphix. All rights reserved.
|
.\" Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||||
.\"
|
.\"
|
||||||
.TH "ZDB" "8" "February 15, 2012" "" ""
|
.TH "ZDB" "8" "February 15, 2012" "" ""
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@
|
||||||
|
|
||||||
.SH "SYNOPSIS"
|
.SH "SYNOPSIS"
|
||||||
\fBzdb\fR [-CumdibcsDvhLMXFPA] [-e [-p \fIpath\fR...]] [-t \fItxg\fR]
|
\fBzdb\fR [-CumdibcsDvhLMXFPA] [-e [-p \fIpath\fR...]] [-t \fItxg\fR]
|
||||||
[-U \fIcache\fR] [-I \fIinflight I/Os\fR]
|
[-U \fIcache\fR] [-I \fIinflight I/Os\fR] [-x \fIdumpdir\fR]
|
||||||
[\fIpoolname\fR [\fIobject\fR ...]]
|
[\fIpoolname\fR [\fIobject\fR ...]]
|
||||||
|
|
||||||
.P
|
.P
|
||||||
|
@ -372,6 +372,20 @@ Operate on an exported pool, not present in \fB/etc/zfs/zpool.cache\fR. The
|
||||||
\fB-p\fR flag specifies the path under which devices are to be searched.
|
\fB-p\fR flag specifies the path under which devices are to be searched.
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
|
.sp
|
||||||
|
.ne 2
|
||||||
|
.na
|
||||||
|
\fB-x\fR \fIdumpdir\fR
|
||||||
|
.ad
|
||||||
|
.sp .6
|
||||||
|
.RS 4n
|
||||||
|
All blocks accessed will be copied to files in the specified directory.
|
||||||
|
The blocks will be placed in sparse files whose name is the same as
|
||||||
|
that of the file or device read. zdb can be then run on the generated files.
|
||||||
|
Note that the \fB-bbc\fR flags are sufficient to access (and thus copy)
|
||||||
|
all metadata on the pool.
|
||||||
|
.RE
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
.ne 2
|
.ne 2
|
||||||
.na
|
.na
|
||||||
|
|
Loading…
Reference in New Issue