Add zdb -r <dataset> <object-id | file> <output>

While you can use zdb -R poolname vdev:offset:[<lsize>/]<psize>[:flags] to extract individual DVAs from a vdev, it would be handy for be able copy an entire file out of the pool. Given a file or object number, add support to copy the contents to a file. Useful for debugging and recovery. Reviewed-by: Jorgen Lundman <lundman@lundman.net> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Allan Jude <allan@klarasystems.com> Closes #11027
2021-01-28 00:36:01 -05:00 · 2021-01-28 00:36:01 -05:00 · 393e69241e
parent b2c5904a78
commit 393e69241e
7 changed files with 240 additions and 15 deletions
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@ -31,6 +31,7 @@
 *
 * [1] Portions of this software were developed by Allan Jude
 *     under sponsorship from the FreeBSD Foundation.
+ * Copyright (c) 2021 Allan Jude
 */

 #include <stdio.h>
@ -755,13 +756,14 @@ usage(void)
 	    "\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] "
 	    "[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n"
 	    "\t%s -O <dataset> <path>\n"
+	    "\t%s -r <dataset> <path> <destination>\n"
 	    "\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n"
 	    "\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n"
 	    "\t%s -E [-A] word0:word1:...:word15\n"
 	    "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] "
 	    "<poolname>\n\n",
 	    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname,
-	    cmdname, cmdname, cmdname);
+	    cmdname, cmdname, cmdname, cmdname);

 	(void) fprintf(stderr, "    Dataset name must include at least one "
 	    "separator character '/' or '@'\n");
@ -800,6 +802,7 @@ usage(void)
 	(void) fprintf(stderr, "        -m metaslabs\n");
 	(void) fprintf(stderr, "        -M metaslab groups\n");
 	(void) fprintf(stderr, "        -O perform object lookups by path\n");
+	(void) fprintf(stderr, "        -r copy an object by path to file\n");
 	(void) fprintf(stderr, "        -R read and display block from a "
 	    "device\n");
 	(void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
@ -4490,7 +4493,7 @@ static char curpath[PATH_MAX];
 * for the last one.
 */
 static int
-dump_path_impl(objset_t *os, uint64_t obj, char *name)
+dump_path_impl(objset_t *os, uint64_t obj, char *name, uint64_t *retobj)
 {
 	int err;
 	boolean_t header = B_TRUE;
@ -4540,10 +4543,15 @@ dump_path_impl(objset_t *os, uint64_t obj, char *name)
 	switch (doi.doi_type) {
 	case DMU_OT_DIRECTORY_CONTENTS:
 		if (s != NULL && *(s + 1) != '\0')
-			return (dump_path_impl(os, child_obj, s + 1));
+			return (dump_path_impl(os, child_obj, s + 1, retobj));
 		/*FALLTHROUGH*/
 	case DMU_OT_PLAIN_FILE_CONTENTS:
-		dump_object(os, child_obj, dump_opt['v'], &header, NULL, 0);
+		if (retobj != NULL) {
+			*retobj = child_obj;
+		} else {
+			dump_object(os, child_obj, dump_opt['v'], &header,
+			    NULL, 0);
+		}
 		return (0);
 	default:
 		(void) fprintf(stderr, "object %llu has non-file/directory "
@ -4558,7 +4566,7 @@ dump_path_impl(objset_t *os, uint64_t obj, char *name)
 * Dump the blocks for the object specified by path inside the dataset.
 */
 static int
-dump_path(char *ds, char *path)
+dump_path(char *ds, char *path, uint64_t *retobj)
 {
 	int err;
 	objset_t *os;
@ -4578,12 +4586,89 @@ dump_path(char *ds, char *path)

 	(void) snprintf(curpath, sizeof (curpath), "dataset=%s path=/", ds);

-	err = dump_path_impl(os, root_obj, path);
+	err = dump_path_impl(os, root_obj, path, retobj);

 	close_objset(os, FTAG);
 	return (err);
 }

+static int
+zdb_copy_object(objset_t *os, uint64_t srcobj, char *destfile)
+{
+	int err = 0;
+	uint64_t size, readsize, oursize, offset;
+	ssize_t writesize;
+	sa_handle_t *hdl;
+
+	(void) printf("Copying object %" PRIu64 " to file %s\n", srcobj,
+	    destfile);
+
+	VERIFY3P(os, ==, sa_os);
+	if ((err = sa_handle_get(os, srcobj, NULL, SA_HDL_PRIVATE, &hdl))) {
+		(void) printf("Failed to get handle for SA znode\n");
+		return (err);
+	}
+	if ((err = sa_lookup(hdl, sa_attr_table[ZPL_SIZE], &size, 8))) {
+		(void) sa_handle_destroy(hdl);
+		return (err);
+	}
+	(void) sa_handle_destroy(hdl);
+
+	(void) printf("Object %" PRIu64 " is %" PRIu64 " bytes\n", srcobj,
+	    size);
+	if (size == 0) {
+		return (EINVAL);
+	}
+
+	int fd = open(destfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+	/*
+	 * We cap the size at 1 mebibyte here to prevent
+	 * allocation failures and nigh-infinite printing if the
+	 * object is extremely large.
+	 */
+	oursize = MIN(size, 1 << 20);
+	offset = 0;
+	char *buf = kmem_alloc(oursize, KM_NOSLEEP);
+	if (buf == NULL) {
+		return (ENOMEM);
+	}
+
+	while (offset < size) {
+		readsize = MIN(size - offset, 1 << 20);
+		err = dmu_read(os, srcobj, offset, readsize, buf, 0);
+		if (err != 0) {
+			(void) printf("got error %u from dmu_read\n", err);
+			kmem_free(buf, oursize);
+			return (err);
+		}
+		if (dump_opt['v'] > 3) {
+			(void) printf("Read offset=%" PRIu64 " size=%" PRIu64
+			    " error=%d\n", offset, readsize, err);
+		}
+
+		writesize = write(fd, buf, readsize);
+		if (writesize < 0) {
+			err = errno;
+			break;
+		} else if (writesize != readsize) {
+			/* Incomplete write */
+			(void) fprintf(stderr, "Short write, only wrote %llu of"
+			    " %" PRIu64 " bytes, exiting...\n",
+			    (u_longlong_t)writesize, readsize);
+			break;
+		}
+
+		offset += readsize;
+	}
+
+	(void) close(fd);
+
+	if (buf != NULL)
+		kmem_free(buf, oursize);
+
+	return (err);
+}
+
 static int
 dump_label(const char *dev)
 {
@ -8167,6 +8252,7 @@ main(int argc, char **argv)
 	nvlist_t *policy = NULL;
 	uint64_t max_txg = UINT64_MAX;
 	int64_t objset_id = -1;
+	uint64_t object;
 	int flags = ZFS_IMPORT_MISSING_LOG;
 	int rewind = ZPOOL_NEVER_REWIND;
 	char *spa_config_path_env, *objset_str;
@ -8195,7 +8281,7 @@ main(int argc, char **argv)
 	zfs_btree_verify_intensity = 3;

 	while ((c = getopt(argc, argv,
-	    "AbcCdDeEFGhiI:klLmMo:Op:PqRsSt:uU:vVx:XYyZ")) != -1) {
+	    "AbcCdDeEFGhiI:klLmMo:Op:PqrRsSt:uU:vVx:XYyZ")) != -1) {
 		switch (c) {
 		case 'b':
 		case 'c':
@ -8210,6 +8296,7 @@ main(int argc, char **argv)
 		case 'm':
 		case 'M':
 		case 'O':
+		case 'r':
 		case 'R':
 		case 's':
 		case 'S':
@ -8299,7 +8386,7 @@ main(int argc, char **argv)
 		(void) fprintf(stderr, "-p option requires use of -e\n");
 		usage();
 	}
-	if (dump_opt['d']) {
+	if (dump_opt['d'] || dump_opt['r']) {
 		/* <pool>[/<dataset | objset id> is accepted */
 		if (argv[2] && (objset_str = strchr(argv[2], '/')) != NULL &&
 		    objset_str++ != NULL) {
@ -8358,7 +8445,7 @@ main(int argc, char **argv)
 		verbose = MAX(verbose, 1);

 	for (c = 0; c < 256; c++) {
-		if (dump_all && strchr("AeEFklLOPRSXy", c) == NULL)
+		if (dump_all && strchr("AeEFklLOPrRSXy", c) == NULL)
 			dump_opt[c] = 1;
 		if (dump_opt[c])
 			dump_opt[c] += verbose;
@ -8394,7 +8481,13 @@ main(int argc, char **argv)
 		if (argc != 2)
 			usage();
 		dump_opt['v'] = verbose + 3;
-		return (dump_path(argv[0], argv[1]));
+		return (dump_path(argv[0], argv[1], NULL));
+	}
+	if (dump_opt['r']) {
+		if (argc != 3)
+			usage();
+		dump_opt['v'] = verbose;
+		error = dump_path(argv[0], argv[1], &object);
 	}

 	if (dump_opt['X'] || dump_opt['F'])
@ -8572,7 +8665,9 @@ main(int argc, char **argv)

 	argv++;
 	argc--;
-	if (!dump_opt['R']) {
+	if (dump_opt['r']) {
+		error = zdb_copy_object(os, object, argv[1]);
+	} else if (!dump_opt['R']) {
 		flagbits['d'] = ZOR_FLAG_DIRECTORY;
 		flagbits['f'] = ZOR_FLAG_PLAIN_FILE;
 		flagbits['m'] = ZOR_FLAG_SPACE_MAP;
--- a/man/man8/zdb.8
+++ b/man/man8/zdb.8
@ -15,7 +15,7 @@
 .\" Copyright (c) 2017 Lawrence Livermore National Security, LLC.
 .\" Copyright (c) 2017 Intel Corporation.
 .\"
-.Dd April 14, 2019
+.Dd October 7, 2020
 .Dt ZDB 8 SMM
 .Os
 .Sh NAME
@ -60,6 +60,9 @@
 .Fl O
 .Ar dataset path
 .Nm
+.Fl r
+.Ar dataset path destination
+.Nm
 .Fl R
 .Op Fl A
 .Op Fl e Oo Fl V Oc Op Fl p Ar path ...
@ -274,6 +277,19 @@ must be relative to the root of
 This option can be combined with
 .Fl v
 for increasing verbosity.
+.It Fl r Ar dataset path destination
+Copy the specified
+.Ar path
+inside of the
+.Ar dataset
+to the specified destination.
+Specified
+.Ar path
+must be relative to the root of
+.Ar dataset .
+This option can be combined with
+.Fl v
+for increasing verbosity.
 .It Xo
 .Fl R Ar poolname vdev Ns \&: Ns Ar offset Ns \&: Ns Ar [<lsize>/]<psize> Ns Op : Ns Ar flags
 .Xc
--- a/tests/runfiles/common.run
+++ b/tests/runfiles/common.run
@ -119,7 +119,7 @@ tests = ['zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', 'zdb_005_pos',
    'zdb_006_pos', 'zdb_args_neg', 'zdb_args_pos',
    'zdb_block_size_histogram', 'zdb_checksum', 'zdb_decompress',
    'zdb_display_block', 'zdb_object_range_neg', 'zdb_object_range_pos',
-    'zdb_objset_id', 'zdb_decompress_zstd']
+    'zdb_objset_id', 'zdb_decompress_zstd', 'zdb_recover', 'zdb_recover_2']
 pre =
 post =
 tags = ['functional', 'cli_root', 'zdb']
--- a/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am
@ -14,4 +14,6 @@ dist_pkgdata_SCRIPTS = \
 	zdb_object_range_neg.ksh \
 	zdb_object_range_pos.ksh \
 	zdb_display_block.ksh \
-	zdb_objset_id.ksh
+	zdb_objset_id.ksh \
+	zdb_recover.ksh \
+	zdb_recover_2.ksh
--- a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_args_neg.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_args_neg.ksh
@ -56,7 +56,7 @@ set -A args "create" "add" "destroy" "import fakepool" \
    "add mirror fakepool" "add raidz fakepool" \
    "add raidz1 fakepool" "add raidz2 fakepool" \
    "setvprop" "blah blah" "-%" "--?" "-*" "-=" \
-    "-a" "-f" "-g" "-j" "-n" "-o" "-p" "-p /tmp" "-r" \
+    "-a" "-f" "-g" "-j" "-n" "-o" "-p" "-p /tmp" \
    "-t" "-w" "-z" "-E" "-H" "-I" "-J" "-K" \
    "-N" "-Q" "-R" "-T" "-W"

--- a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_recover.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_recover.ksh
@ -0,0 +1,55 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2021 by Allan Jude.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# Description:
+# zdb -r <dataset> <path> <destination>
+# Will extract <path> (relative to <dataset>) to the file <destination>
+# Similar to -R, except it does the work for you to find each record
+#
+# Strategy:
+# 1. Create a pool
+# 2. Write some data to a file
+# 3. Extract the file
+# 4. Compare the file to the original
+#
+
+function cleanup
+{
+	datasetexists $TESTPOOL && destroy_pool $TESTPOOL
+	rm $tmpfile
+}
+
+log_assert "Verify zdb -r <dataset> <path> <dest> extract the correct data."
+log_onexit cleanup
+init_data=$TESTDIR/file1
+tmpfile="$TEST_BASE_DIR/zdb-recover"
+write_count=8
+blksize=131072
+verify_runnable "global"
+verify_disk_count "$DISKS" 2
+
+default_mirror_setup_noexit $DISKS
+file_write -o create -w -f $init_data -b $blksize -c $write_count
+log_must zpool sync $TESTPOOL
+
+output=$(zdb -r $TESTPOOL/$TESTFS file1 $tmpfile)
+log_must cmp $init_data $tmpfile
+
+log_pass "zdb -r <dataset> <path> <dest> extracts the correct data."
--- a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_recover_2.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_recover_2.ksh
@ -0,0 +1,57 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2021 by Allan Jude.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# Description:
+# zdb -r <dataset> <path> <destination>
+# Will extract <path> (relative to <dataset>) to the file <destination>
+# Similar to -R, except it does the work for you to find each record
+#
+# Strategy:
+# 1. Create a pool
+# 2. Write some data to a file
+# 3. Append to the file so it isn't an divisible by 2
+# 4. Extract the file
+# 5. Compare the file to the original
+#
+
+function cleanup
+{
+	datasetexists $TESTPOOL && destroy_pool $TESTPOOL
+	rm $tmpfile
+}
+
+log_assert "Verify zdb -r <dataset> <path> <dest> extract the correct data."
+log_onexit cleanup
+init_data=$TESTDIR/file1
+tmpfile="$TEST_BASE_DIR/zdb-recover"
+write_count=8
+blksize=131072
+verify_runnable "global"
+verify_disk_count "$DISKS" 2
+
+default_mirror_setup_noexit $DISKS
+file_write -o create -w -f $init_data -b $blksize -c $write_count
+log_must echo "zfs" >> $init_data
+log_must zpool sync $TESTPOOL
+
+output=$(zdb -r $TESTPOOL/$TESTFS file1 $tmpfile)
+log_must cmp $init_data $tmpfile
+
+log_pass "zdb -r <dataset> <path> <dest> extracts the correct data."