Illumos 4891 - want zdb option to dump all metadata

4891 want zdb option to dump all metadata
Reviewed by: Sonu Pillai <sonu.pillai@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Approved by: Garrett D'Amore <garrett@damore.org>

We'd like a way for zdb to dump metadata in a machine-readable
format, so that we can bring that back from a customer site for
in-house diagnosis.  Think of it as a crash dump for zpools,
which can be used for post-mortem analysis of a malfunctioning
pool

References:
  https://www.illumos.org/issues/4891
  https://github.com/illumos/illumos-gate/commit/df15e41

Porting notes:
- [cmd/zdb/zdb.c]
  - a5778ea zdb: Introduce -V for verbatim import
  - In main() getopt 'opt' variable removed and the code was
    brought back in line with illumos.
- [lib/libzpool/kernel.c]
  - 1e33ac1 Fix Solaris thread dependency by using pthreads
  - f0e324f Update utsname support
  - 4d58b69 Fix vn_open/vn_rdwr error handling
  - In vn_open() allocate 'dumppath' on heap instead of stack
  - Properly handle 'dump_fd == -1' error path
  - Free 'realpath' after added vn_dumpdir_code block

Ported-by: kernelOfTruth kerneloftruth@gmail.com
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
Matthew Ahrens 2016-01-01 14:42:58 +01:00 committed by Brian Behlendorf
parent f3c9dca093
commit 9867e8be2a
4 changed files with 62 additions and 10 deletions

View File

@ -118,7 +118,7 @@ usage(void)
{ {
(void) fprintf(stderr, (void) fprintf(stderr,
"Usage: %s [-CumMdibcsDvhLXFPA] [-t txg] [-e [-p path...]] " "Usage: %s [-CumMdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
"[-U config] [-I inflight I/Os] poolname [object...]\n" "[-U config] [-I inflight I/Os] [-x dumpdir] poolname [object...]\n"
" %s [-divPA] [-e -p path...] [-U config] dataset " " %s [-divPA] [-e -p path...] [-U config] dataset "
"[object...]\n" "[object...]\n"
" %s -mM [-LXFPA] [-t txg] [-e [-p path...]] [-U config] " " %s -mM [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
@ -157,7 +157,7 @@ usage(void)
(void) fprintf(stderr, " -R read and display block from a " (void) fprintf(stderr, " -R read and display block from a "
"device\n\n"); "device\n\n");
(void) fprintf(stderr, " Below options are intended for use " (void) fprintf(stderr, " Below options are intended for use "
"with other options (except -l):\n"); "with other options:\n");
(void) fprintf(stderr, " -A ignore assertions (-A), enable " (void) fprintf(stderr, " -A ignore assertions (-A), enable "
"panic recovery (-AA) or both (-AAA)\n"); "panic recovery (-AA) or both (-AAA)\n");
(void) fprintf(stderr, " -F attempt automatic rewind within " (void) fprintf(stderr, " -F attempt automatic rewind within "
@ -170,12 +170,14 @@ usage(void)
"has altroot/not in a cachefile\n"); "has altroot/not in a cachefile\n");
(void) fprintf(stderr, " -p <path> -- use one or more with " (void) fprintf(stderr, " -p <path> -- use one or more with "
"-e to specify path to vdev dir\n"); "-e to specify path to vdev dir\n");
(void) fprintf(stderr, " -x <dumpdir> -- "
"dump all read blocks into specified directory\n");
(void) fprintf(stderr, " -P print numbers in parseable form\n"); (void) fprintf(stderr, " -P print numbers in parseable form\n");
(void) fprintf(stderr, " -t <txg> -- highest txg to use when " (void) fprintf(stderr, " -t <txg> -- highest txg to use when "
"searching for uberblocks\n"); "searching for uberblocks\n");
(void) fprintf(stderr, " -I <number of inflight I/Os> -- " (void) fprintf(stderr, " -I <number of inflight I/Os> -- "
"specify the maximum number of checksumming I/Os " "specify the maximum number of "
"[default is 200]\n"); "checksumming I/Os [default is 200]\n");
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
"to make only that option verbose\n"); "to make only that option verbose\n");
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); (void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
@ -3626,7 +3628,6 @@ main(int argc, char **argv)
int flags = ZFS_IMPORT_MISSING_LOG; int flags = ZFS_IMPORT_MISSING_LOG;
int rewind = ZPOOL_NEVER_REWIND; int rewind = ZPOOL_NEVER_REWIND;
char *spa_config_path_env; char *spa_config_path_env;
const char *opts = "bcdhilmMI:suCDRSAFLXevp:t:U:PV";
boolean_t target_is_spa = B_TRUE; boolean_t target_is_spa = B_TRUE;
(void) setrlimit(RLIMIT_NOFILE, &rl); (void) setrlimit(RLIMIT_NOFILE, &rl);
@ -3643,7 +3644,8 @@ main(int argc, char **argv)
if (spa_config_path_env != NULL) if (spa_config_path_env != NULL)
spa_config_path = spa_config_path_env; spa_config_path = spa_config_path_env;
while ((c = getopt(argc, argv, opts)) != -1) { while ((c = getopt(argc, argv,
"bcdhilmMI:suCDRSAFLXx:evp:t:U:PV")) != -1) {
switch (c) { switch (c) {
case 'b': case 'b':
case 'c': case 'c':
@ -3697,6 +3699,9 @@ main(int argc, char **argv)
} }
searchdirs[nsearch++] = optarg; searchdirs[nsearch++] = optarg;
break; break;
case 'x':
vn_dumpdir = optarg;
break;
case 't': case 't':
max_txg = strtoull(optarg, NULL, 0); max_txg = strtoull(optarg, NULL, 0);
if (max_txg < TXG_INITIAL) { if (max_txg < TXG_INITIAL) {

View File

@ -500,8 +500,10 @@ typedef struct vnode {
uint64_t v_size; uint64_t v_size;
int v_fd; int v_fd;
char *v_path; char *v_path;
int v_dump_fd;
} vnode_t; } vnode_t;
extern char *vn_dumpdir;
#define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */ #define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */
typedef struct xoptattr { typedef struct xoptattr {

View File

@ -29,6 +29,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <zlib.h> #include <zlib.h>
#include <libgen.h>
#include <sys/signal.h> #include <sys/signal.h>
#include <sys/spa.h> #include <sys/spa.h>
#include <sys/stat.h> #include <sys/stat.h>
@ -50,6 +51,9 @@ char hw_serial[HW_HOSTID_LEN];
struct utsname hw_utsname; struct utsname hw_utsname;
vmem_t *zio_arena = NULL; vmem_t *zio_arena = NULL;
/* If set, all blocks read will be copied to the specified directory. */
char *vn_dumpdir = NULL;
/* this only exists to have its address taken */ /* this only exists to have its address taken */
struct proc p0; struct proc p0;
@ -588,6 +592,7 @@ int
vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
{ {
int fd; int fd;
int dump_fd;
vnode_t *vp; vnode_t *vp;
int old_umask = 0; int old_umask = 0;
char *realpath; char *realpath;
@ -655,13 +660,31 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
* FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
*/ */
fd = open64(realpath, flags - FREAD, mode); fd = open64(realpath, flags - FREAD, mode);
free(realpath); err = errno;
if (flags & FCREAT) if (flags & FCREAT)
(void) umask(old_umask); (void) umask(old_umask);
if (vn_dumpdir != NULL) {
char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL);
(void) snprintf(dumppath, MAXPATHLEN,
"%s/%s", vn_dumpdir, basename(realpath));
dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
umem_free(dumppath, MAXPATHLEN);
if (dump_fd == -1) {
err = errno;
free(realpath);
close(fd);
return (err);
}
} else {
dump_fd = -1;
}
free(realpath);
if (fd == -1) if (fd == -1)
return (errno); return (err);
if (fstat64_blk(fd, &st) == -1) { if (fstat64_blk(fd, &st) == -1) {
err = errno; err = errno;
@ -676,6 +699,7 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
vp->v_fd = fd; vp->v_fd = fd;
vp->v_size = st.st_size; vp->v_size = st.st_size;
vp->v_path = spa_strdup(path); vp->v_path = spa_strdup(path);
vp->v_dump_fd = dump_fd;
return (0); return (0);
} }
@ -708,6 +732,11 @@ vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
if (uio == UIO_READ) { if (uio == UIO_READ) {
rc = pread64(vp->v_fd, addr, len, offset); rc = pread64(vp->v_fd, addr, len, offset);
if (vp->v_dump_fd != -1) {
int status =
pwrite64(vp->v_dump_fd, addr, rc, offset);
ASSERT(status != -1);
}
} else { } else {
/* /*
* To simulate partial disk writes, we split writes into two * To simulate partial disk writes, we split writes into two
@ -750,6 +779,8 @@ void
vn_close(vnode_t *vp) vn_close(vnode_t *vp)
{ {
close(vp->v_fd); close(vp->v_fd);
if (vp->v_dump_fd != -1)
close(vp->v_dump_fd);
spa_strfree(vp->v_path); spa_strfree(vp->v_path);
umem_free(vp, sizeof (vnode_t)); umem_free(vp, sizeof (vnode_t));
} }

View File

@ -11,7 +11,7 @@
.\" .\"
.\" .\"
.\" Copyright 2012, Richard Lowe. .\" Copyright 2012, Richard Lowe.
.\" Copyright (c) 2012 by Delphix. All rights reserved. .\" Copyright (c) 2012, 2014 by Delphix. All rights reserved.
.\" .\"
.TH "ZDB" "8" "February 15, 2012" "" "" .TH "ZDB" "8" "February 15, 2012" "" ""
@ -20,7 +20,7 @@
.SH "SYNOPSIS" .SH "SYNOPSIS"
\fBzdb\fR [-CumdibcsDvhLMXFPA] [-e [-p \fIpath\fR...]] [-t \fItxg\fR] \fBzdb\fR [-CumdibcsDvhLMXFPA] [-e [-p \fIpath\fR...]] [-t \fItxg\fR]
[-U \fIcache\fR] [-I \fIinflight I/Os\fR] [-U \fIcache\fR] [-I \fIinflight I/Os\fR] [-x \fIdumpdir\fR]
[\fIpoolname\fR [\fIobject\fR ...]] [\fIpoolname\fR [\fIobject\fR ...]]
.P .P
@ -372,6 +372,20 @@ Operate on an exported pool, not present in \fB/etc/zfs/zpool.cache\fR. The
\fB-p\fR flag specifies the path under which devices are to be searched. \fB-p\fR flag specifies the path under which devices are to be searched.
.RE .RE
.sp
.ne 2
.na
\fB-x\fR \fIdumpdir\fR
.ad
.sp .6
.RS 4n
All blocks accessed will be copied to files in the specified directory.
The blocks will be placed in sparse files whose name is the same as
that of the file or device read. zdb can be then run on the generated files.
Note that the \fB-bbc\fR flags are sufficient to access (and thus copy)
all metadata on the pool.
.RE
.sp .sp
.ne 2 .ne 2
.na .na