From 9867e8be2a7182ce9b5eb28ed1d142e2ee60d69d Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Fri, 1 Jan 2016 14:42:58 +0100 Subject: [PATCH] Illumos 4891 - want zdb option to dump all metadata 4891 want zdb option to dump all metadata Reviewed by: Sonu Pillai Reviewed by: George Wilson Reviewed by: Christopher Siden Reviewed by: Dan McDonald Reviewed by: Richard Lowe Approved by: Garrett D'Amore We'd like a way for zdb to dump metadata in a machine-readable format, so that we can bring that back from a customer site for in-house diagnosis. Think of it as a crash dump for zpools, which can be used for post-mortem analysis of a malfunctioning pool References: https://www.illumos.org/issues/4891 https://github.com/illumos/illumos-gate/commit/df15e41 Porting notes: - [cmd/zdb/zdb.c] - a5778ea zdb: Introduce -V for verbatim import - In main() getopt 'opt' variable removed and the code was brought back in line with illumos. - [lib/libzpool/kernel.c] - 1e33ac1 Fix Solaris thread dependency by using pthreads - f0e324f Update utsname support - 4d58b69 Fix vn_open/vn_rdwr error handling - In vn_open() allocate 'dumppath' on heap instead of stack - Properly handle 'dump_fd == -1' error path - Free 'realpath' after added vn_dumpdir_code block Ported-by: kernelOfTruth kerneloftruth@gmail.com Signed-off-by: Brian Behlendorf --- cmd/zdb/zdb.c | 17 +++++++++++------ include/sys/zfs_context.h | 2 ++ lib/libzpool/kernel.c | 35 +++++++++++++++++++++++++++++++++-- man/man8/zdb.8 | 18 ++++++++++++++++-- 4 files changed, 62 insertions(+), 10 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index efac66c223..c43d9d0bd6 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -118,7 +118,7 @@ usage(void) { (void) fprintf(stderr, "Usage: %s [-CumMdibcsDvhLXFPA] [-t txg] [-e [-p path...]] " - "[-U config] [-I inflight I/Os] poolname [object...]\n" + "[-U config] [-I inflight I/Os] [-x dumpdir] poolname [object...]\n" " %s [-divPA] [-e -p path...] [-U config] dataset " "[object...]\n" " %s -mM [-LXFPA] [-t txg] [-e [-p path...]] [-U config] " @@ -157,7 +157,7 @@ usage(void) (void) fprintf(stderr, " -R read and display block from a " "device\n\n"); (void) fprintf(stderr, " Below options are intended for use " - "with other options (except -l):\n"); + "with other options:\n"); (void) fprintf(stderr, " -A ignore assertions (-A), enable " "panic recovery (-AA) or both (-AAA)\n"); (void) fprintf(stderr, " -F attempt automatic rewind within " @@ -170,12 +170,14 @@ usage(void) "has altroot/not in a cachefile\n"); (void) fprintf(stderr, " -p -- use one or more with " "-e to specify path to vdev dir\n"); + (void) fprintf(stderr, " -x -- " + "dump all read blocks into specified directory\n"); (void) fprintf(stderr, " -P print numbers in parseable form\n"); (void) fprintf(stderr, " -t -- highest txg to use when " "searching for uberblocks\n"); (void) fprintf(stderr, " -I -- " - "specify the maximum number of checksumming I/Os " - "[default is 200]\n"); + "specify the maximum number of " + "checksumming I/Os [default is 200]\n"); (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " "to make only that option verbose\n"); (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); @@ -3626,7 +3628,6 @@ main(int argc, char **argv) int flags = ZFS_IMPORT_MISSING_LOG; int rewind = ZPOOL_NEVER_REWIND; char *spa_config_path_env; - const char *opts = "bcdhilmMI:suCDRSAFLXevp:t:U:PV"; boolean_t target_is_spa = B_TRUE; (void) setrlimit(RLIMIT_NOFILE, &rl); @@ -3643,7 +3644,8 @@ main(int argc, char **argv) if (spa_config_path_env != NULL) spa_config_path = spa_config_path_env; - while ((c = getopt(argc, argv, opts)) != -1) { + while ((c = getopt(argc, argv, + "bcdhilmMI:suCDRSAFLXx:evp:t:U:PV")) != -1) { switch (c) { case 'b': case 'c': @@ -3697,6 +3699,9 @@ main(int argc, char **argv) } searchdirs[nsearch++] = optarg; break; + case 'x': + vn_dumpdir = optarg; + break; case 't': max_txg = strtoull(optarg, NULL, 0); if (max_txg < TXG_INITIAL) { diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index 036235e2b4..a967c05679 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -500,8 +500,10 @@ typedef struct vnode { uint64_t v_size; int v_fd; char *v_path; + int v_dump_fd; } vnode_t; +extern char *vn_dumpdir; #define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */ typedef struct xoptattr { diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index a451026999..31d3336a89 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -50,6 +51,9 @@ char hw_serial[HW_HOSTID_LEN]; struct utsname hw_utsname; vmem_t *zio_arena = NULL; +/* If set, all blocks read will be copied to the specified directory. */ +char *vn_dumpdir = NULL; + /* this only exists to have its address taken */ struct proc p0; @@ -588,6 +592,7 @@ int vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) { int fd; + int dump_fd; vnode_t *vp; int old_umask = 0; char *realpath; @@ -655,13 +660,31 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. */ fd = open64(realpath, flags - FREAD, mode); - free(realpath); + err = errno; if (flags & FCREAT) (void) umask(old_umask); + if (vn_dumpdir != NULL) { + char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL); + (void) snprintf(dumppath, MAXPATHLEN, + "%s/%s", vn_dumpdir, basename(realpath)); + dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666); + umem_free(dumppath, MAXPATHLEN); + if (dump_fd == -1) { + err = errno; + free(realpath); + close(fd); + return (err); + } + } else { + dump_fd = -1; + } + + free(realpath); + if (fd == -1) - return (errno); + return (err); if (fstat64_blk(fd, &st) == -1) { err = errno; @@ -676,6 +699,7 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) vp->v_fd = fd; vp->v_size = st.st_size; vp->v_path = spa_strdup(path); + vp->v_dump_fd = dump_fd; return (0); } @@ -708,6 +732,11 @@ vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, if (uio == UIO_READ) { rc = pread64(vp->v_fd, addr, len, offset); + if (vp->v_dump_fd != -1) { + int status = + pwrite64(vp->v_dump_fd, addr, rc, offset); + ASSERT(status != -1); + } } else { /* * To simulate partial disk writes, we split writes into two @@ -750,6 +779,8 @@ void vn_close(vnode_t *vp) { close(vp->v_fd); + if (vp->v_dump_fd != -1) + close(vp->v_dump_fd); spa_strfree(vp->v_path); umem_free(vp, sizeof (vnode_t)); } diff --git a/man/man8/zdb.8 b/man/man8/zdb.8 index faae3aa541..efa1f41eb5 100644 --- a/man/man8/zdb.8 +++ b/man/man8/zdb.8 @@ -11,7 +11,7 @@ .\" .\" .\" Copyright 2012, Richard Lowe. -.\" Copyright (c) 2012 by Delphix. All rights reserved. +.\" Copyright (c) 2012, 2014 by Delphix. All rights reserved. .\" .TH "ZDB" "8" "February 15, 2012" "" "" @@ -20,7 +20,7 @@ .SH "SYNOPSIS" \fBzdb\fR [-CumdibcsDvhLMXFPA] [-e [-p \fIpath\fR...]] [-t \fItxg\fR] - [-U \fIcache\fR] [-I \fIinflight I/Os\fR] + [-U \fIcache\fR] [-I \fIinflight I/Os\fR] [-x \fIdumpdir\fR] [\fIpoolname\fR [\fIobject\fR ...]] .P @@ -372,6 +372,20 @@ Operate on an exported pool, not present in \fB/etc/zfs/zpool.cache\fR. The \fB-p\fR flag specifies the path under which devices are to be searched. .RE +.sp +.ne 2 +.na +\fB-x\fR \fIdumpdir\fR +.ad +.sp .6 +.RS 4n +All blocks accessed will be copied to files in the specified directory. +The blocks will be placed in sparse files whose name is the same as +that of the file or device read. zdb can be then run on the generated files. +Note that the \fB-bbc\fR flags are sufficient to access (and thus copy) +all metadata on the pool. +.RE + .sp .ne 2 .na