Distributed Spare (dRAID) Feature

This patch adds a new top-level vdev type called dRAID, which stands for Distributed parity RAID. This pool configuration allows all dRAID vdevs to participate when rebuilding to a distributed hot spare device. This can substantially reduce the total time required to restore full parity to pool with a failed device. A dRAID pool can be created using the new top-level `draid` type. Like `raidz`, the desired redundancy is specified after the type: `draid[1,2,3]`. No additional information is required to create the pool and reasonable default values will be chosen based on the number of child vdevs in the dRAID vdev. zpool create <pool> draid[1,2,3] <vdevs...> Unlike raidz, additional optional dRAID configuration values can be provided as part of the draid type as colon separated values. This allows administrators to fully specify a layout for either performance or capacity reasons. The supported options include: zpool create <pool> \ draid[<parity>][:<data>d][:<children>c][:<spares>s] \ <vdevs...> - draid[parity] - Parity level (default 1) - draid[:<data>d] - Data devices per group (default 8) - draid[:<children>c] - Expected number of child vdevs - draid[:<spares>s] - Distributed hot spares (default 0) Abbreviated example `zpool status` output for a 68 disk dRAID pool with two distributed spares using special allocation classes. ``` pool: tank state: ONLINE config: NAME STATE READ WRITE CKSUM slag7 ONLINE 0 0 0 draid2:8d:68c:2s-0 ONLINE 0 0 0 L0 ONLINE 0 0 0 L1 ONLINE 0 0 0 ... U25 ONLINE 0 0 0 U26 ONLINE 0 0 0 spare-53 ONLINE 0 0 0 U27 ONLINE 0 0 0 draid2-0-0 ONLINE 0 0 0 U28 ONLINE 0 0 0 U29 ONLINE 0 0 0 ... U42 ONLINE 0 0 0 U43 ONLINE 0 0 0 special mirror-1 ONLINE 0 0 0 L5 ONLINE 0 0 0 U5 ONLINE 0 0 0 mirror-2 ONLINE 0 0 0 L6 ONLINE 0 0 0 U6 ONLINE 0 0 0 spares draid2-0-0 INUSE currently in use draid2-0-1 AVAIL ``` When adding test coverage for the new dRAID vdev type the following options were added to the ztest command. These options are leverages by zloop.sh to test a wide range of dRAID configurations. -K draid|raidz|random - kind of RAID to test -D <value> - dRAID data drives per group -S <value> - dRAID distributed hot spares -R <value> - RAID parity (raidz or dRAID) The zpool_create, zpool_import, redundancy, replacement and fault test groups have all been updated provide test coverage for the dRAID feature. Co-authored-by: Isaac Huang <he.huang@intel.com> Co-authored-by: Mark Maybee <mmaybee@cray.com> Co-authored-by: Don Brady <don.brady@delphix.com> Co-authored-by: Matthew Ahrens <mahrens@delphix.com> Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Mark Maybee <mmaybee@cray.com> Reviewed-by: Matt Ahrens <matt@delphix.com> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #10102
2020-11-13 13:51:51 -08:00 · 2020-11-13 13:51:51 -08:00 · b2255edcc0
parent a724db0374
commit b2255edcc0
153 changed files with 10203 additions and 1882 deletions
--- a/cmd/raidz_test/raidz_bench.c
+++ b/cmd/raidz_test/raidz_bench.c
@ -83,8 +83,17 @@ run_gen_bench_impl(const char *impl)
 			/* create suitable raidz_map */
 			ncols = rto_opts.rto_dcols + fn + 1;
 			zio_bench.io_size = 1ULL << ds;
 			if (rto_opts.rto_expand) {
 				rm_bench = vdev_raidz_map_alloc_expanded(
 				    zio_bench.io_abd,
 				    zio_bench.io_size, zio_bench.io_offset,
 				    rto_opts.rto_ashift, ncols+1, ncols,
 				    fn+1, rto_opts.rto_expand_offset);
 			} else {
 				rm_bench = vdev_raidz_map_alloc(&zio_bench,
 				    BENCH_ASHIFT, ncols, fn+1);
 			}
 			/* estimate iteration count */
 			iter_cnt = GEN_BENCH_MEMORY;
@ -163,8 +172,16 @@ run_rec_bench_impl(const char *impl)
 			    (1ULL << BENCH_ASHIFT))
 				continue;
 			if (rto_opts.rto_expand) {
 				rm_bench = vdev_raidz_map_alloc_expanded(
 				    zio_bench.io_abd,
 				    zio_bench.io_size, zio_bench.io_offset,
 				    BENCH_ASHIFT, ncols+1, ncols,
 				    PARITY_PQR, rto_opts.rto_expand_offset);
 			} else {
 				rm_bench = vdev_raidz_map_alloc(&zio_bench,
 				    BENCH_ASHIFT, ncols, PARITY_PQR);
 			}
 			/* estimate iteration count */
 			iter_cnt = (REC_BENCH_MEMORY);
--- a/cmd/raidz_test/raidz_test.c
+++ b/cmd/raidz_test/raidz_test.c
@ -77,12 +77,16 @@ static void print_opts(raidz_test_opts_t *opts, boolean_t force)
 		(void) fprintf(stdout, DBLSEP "Running with options:\n"
 		    "  (-a) zio ashift                   : %zu\n"
 		    "  (-o) zio offset                   : 1 << %zu\n"
 		    "  (-e) expanded map                 : %s\n"
 		    "  (-r) reflow offset                : %llx\n"
 		    "  (-d) number of raidz data columns : %zu\n"
 		    "  (-s) size of DATA                 : 1 << %zu\n"
 		    "  (-S) sweep parameters             : %s \n"
 		    "  (-v) verbose                      : %s \n\n",
 		    opts->rto_ashift,				/* -a */
 		    ilog2(opts->rto_offset),			/* -o */
 		    opts->rto_expand ? "yes" : "no",		/* -e */
 		    (u_longlong_t)opts->rto_expand_offset,	/* -r */
 		    opts->rto_dcols,				/* -d */
 		    ilog2(opts->rto_dsize),			/* -s */
 		    opts->rto_sweep ? "yes" : "no",		/* -S */
@ -104,6 +108,8 @@ static void usage(boolean_t requested)
 	    "\t[-S parameter sweep (default: %s)]\n"
 	    "\t[-t timeout for parameter sweep test]\n"
 	    "\t[-B benchmark all raidz implementations]\n"
 	    "\t[-e use expanded raidz map (default: %s)]\n"
 	    "\t[-r expanded raidz map reflow offset (default: %llx)]\n"
 	    "\t[-v increase verbosity (default: %zu)]\n"
 	    "\t[-h (print help)]\n"
 	    "\t[-T test the test, see if failure would be detected]\n"
@ -114,6 +120,8 @@ static void usage(boolean_t requested)
 	    o->rto_dcols,				/* -d */
 	    ilog2(o->rto_dsize),			/* -s */
 	    rto_opts.rto_sweep ? "yes" : "no",		/* -S */
 	    rto_opts.rto_expand ? "yes" : "no",		/* -e */
 	    (u_longlong_t)o->rto_expand_offset,		/* -r */
 	    o->rto_v);					/* -d */
 	exit(requested ? 0 : 1);
@ -128,7 +136,7 @@ static void process_options(int argc, char **argv)
 	bcopy(&rto_opts_defaults, o, sizeof (*o));
-	while ((opt = getopt(argc, argv, "TDBSvha:o:d:s:t:")) != -1) {
+	while ((opt = getopt(argc, argv, "TDBSvha:er:o:d:s:t:")) != -1) {
 		value = 0;
 		switch (opt) {
@ -136,6 +144,12 @@ static void process_options(int argc, char **argv)
 			value = strtoull(optarg, NULL, 0);
 			o->rto_ashift = MIN(13, MAX(9, value));
 			break;
 		case 'e':
 			o->rto_expand = 1;
 			break;
 		case 'r':
 			o->rto_expand_offset = strtoull(optarg, NULL, 0);
 			break;
 		case 'o':
 			value = strtoull(optarg, NULL, 0);
 			o->rto_offset = ((1ULL << MIN(12, value)) >> 9) << 9;
@ -179,45 +193,64 @@ static void process_options(int argc, char **argv)
 	}
 }
-#define	DATA_COL(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_abd)
+#define	DATA_COL(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_abd)
-#define	DATA_COL_SIZE(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_size)
+#define	DATA_COL_SIZE(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_size)
-#define	CODE_COL(rm, i) ((rm)->rm_col[(i)].rc_abd)
+#define	CODE_COL(rr, i) ((rr)->rr_col[(i)].rc_abd)
-#define	CODE_COL_SIZE(rm, i) ((rm)->rm_col[(i)].rc_size)
+#define	CODE_COL_SIZE(rr, i) ((rr)->rr_col[(i)].rc_size)
 static int
 cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
 {
-	int i, ret = 0;
+	int r, i, ret = 0;
 	VERIFY(parity >= 1 && parity <= 3);
 	for (r = 0; r < rm->rm_nrows; r++) {
 		raidz_row_t * const rr = rm->rm_row[r];
 		raidz_row_t * const rrg = opts->rm_golden->rm_row[r];
 		for (i = 0; i < parity; i++) {
-		if (abd_cmp(CODE_COL(rm, i), CODE_COL(opts->rm_golden, i))
+			if (CODE_COL_SIZE(rrg, i) == 0) {
-		    != 0) {
+				VERIFY0(CODE_COL_SIZE(rr, i));
 				continue;
 			}
 			if (abd_cmp(CODE_COL(rr, i),
 			    CODE_COL(rrg, i)) != 0) {
 				ret++;
 				LOG_OPT(D_DEBUG, opts,
 				    "\nParity block [%d] different!\n", i);
 			}
 		}
 	}
 	return (ret);
 }
 static int
 cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
 {
-	int i, ret = 0;
+	int r, i, dcols, ret = 0;
 	int dcols = opts->rm_golden->rm_cols - raidz_parity(opts->rm_golden);
 	for (r = 0; r < rm->rm_nrows; r++) {
 		raidz_row_t *rr = rm->rm_row[r];
 		raidz_row_t *rrg = opts->rm_golden->rm_row[r];
 		dcols = opts->rm_golden->rm_row[0]->rr_cols -
 		    raidz_parity(opts->rm_golden);
 		for (i = 0; i < dcols; i++) {
-		if (abd_cmp(DATA_COL(opts->rm_golden, i), DATA_COL(rm, i))
+			if (DATA_COL_SIZE(rrg, i) == 0) {
-		    != 0) {
+				VERIFY0(DATA_COL_SIZE(rr, i));
 				continue;
 			}
 			if (abd_cmp(DATA_COL(rrg, i),
 			    DATA_COL(rr, i)) != 0) {
 				ret++;
 				LOG_OPT(D_DEBUG, opts,
 				    "\nData block [%d] different!\n", i);
 			}
 		}
 	}
 	return (ret);
 }
@ -236,12 +269,13 @@ init_rand(void *data, size_t size, void *private)
 static void
 corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt)
 {
-	int i;
+	for (int r = 0; r < rm->rm_nrows; r++) {
-	raidz_col_t *col;
+		raidz_row_t *rr = rm->rm_row[r];
-
+		for (int i = 0; i < cnt; i++) {
-	for (i = 0; i < cnt; i++) {
+			raidz_col_t *col = &rr->rr_col[tgts[i]];
-		col = &rm->rm_col[tgts[i]];
+			abd_iterate_func(col->rc_abd, 0, col->rc_size,
-		abd_iterate_func(col->rc_abd, 0, col->rc_size, init_rand, NULL);
+			    init_rand, NULL);
 		}
 	}
 }
@ -288,10 +322,22 @@ init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
 	VERIFY0(vdev_raidz_impl_set("original"));
 	if (opts->rto_expand) {
 		opts->rm_golden =
 		    vdev_raidz_map_alloc_expanded(opts->zio_golden->io_abd,
 		    opts->zio_golden->io_size, opts->zio_golden->io_offset,
 		    opts->rto_ashift, total_ncols+1, total_ncols,
 		    parity, opts->rto_expand_offset);
 		rm_test = vdev_raidz_map_alloc_expanded(zio_test->io_abd,
 		    zio_test->io_size, zio_test->io_offset,
 		    opts->rto_ashift, total_ncols+1, total_ncols,
 		    parity, opts->rto_expand_offset);
 	} else {
 		opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
 		    opts->rto_ashift, total_ncols, parity);
 		rm_test = vdev_raidz_map_alloc(zio_test,
 		    opts->rto_ashift, total_ncols, parity);
 	}
 	VERIFY(opts->zio_golden);
 	VERIFY(opts->rm_golden);
@ -312,6 +358,188 @@ init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
 	return (err);
 }
 /*
 * If reflow is not in progress, reflow_offset should be UINT64_MAX.
 * For each row, if the row is entirely before reflow_offset, it will
 * come from the new location.  Otherwise this row will come from the
 * old location.  Therefore, rows that straddle the reflow_offset will
 * come from the old location.
 *
 * NOTE: Until raidz expansion is implemented this function is only
 * needed by raidz_test.c to the multi-row raid_map_t functionality.
 */
 raidz_map_t *
 vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset,
    uint64_t ashift, uint64_t physical_cols, uint64_t logical_cols,
    uint64_t nparity, uint64_t reflow_offset)
 {
 	/* The zio's size in units of the vdev's minimum sector size. */
 	uint64_t s = size >> ashift;
 	uint64_t q, r, bc, devidx, asize = 0, tot;
 	/*
 	 * "Quotient": The number of data sectors for this stripe on all but
 	 * the "big column" child vdevs that also contain "remainder" data.
 	 * AKA "full rows"
 	 */
 	q = s / (logical_cols - nparity);
 	/*
 	 * "Remainder": The number of partial stripe data sectors in this I/O.
 	 * This will add a sector to some, but not all, child vdevs.
 	 */
 	r = s - q * (logical_cols - nparity);
 	/* The number of "big columns" - those which contain remainder data. */
 	bc = (r == 0 ? 0 : r + nparity);
 	/*
 	 * The total number of data and parity sectors associated with
 	 * this I/O.
 	 */
 	tot = s + nparity * (q + (r == 0 ? 0 : 1));
 	/* How many rows contain data (not skip) */
 	uint64_t rows = howmany(tot, logical_cols);
 	int cols = MIN(tot, logical_cols);
 	raidz_map_t *rm = kmem_zalloc(offsetof(raidz_map_t, rm_row[rows]),
 	    KM_SLEEP);
 	rm->rm_nrows = rows;
 	for (uint64_t row = 0; row < rows; row++) {
 		raidz_row_t *rr = kmem_alloc(offsetof(raidz_row_t,
 		    rr_col[cols]), KM_SLEEP);
 		rm->rm_row[row] = rr;
 		/* The starting RAIDZ (parent) vdev sector of the row. */
 		uint64_t b = (offset >> ashift) + row * logical_cols;
 		/*
 		 * If we are in the middle of a reflow, and any part of this
 		 * row has not been copied, then use the old location of
 		 * this row.
 		 */
 		int row_phys_cols = physical_cols;
 		if (b + (logical_cols - nparity) > reflow_offset >> ashift)
 			row_phys_cols--;
 		/* starting child of this row */
 		uint64_t child_id = b % row_phys_cols;
 		/* The starting byte offset on each child vdev. */
 		uint64_t child_offset = (b / row_phys_cols) << ashift;
 		/*
 		 * We set cols to the entire width of the block, even
 		 * if this row is shorter.  This is needed because parity
 		 * generation (for Q and R) needs to know the entire width,
 		 * because it treats the short row as though it was
 		 * full-width (and the "phantom" sectors were zero-filled).
 		 *
 		 * Another approach to this would be to set cols shorter
 		 * (to just the number of columns that we might do i/o to)
 		 * and have another mechanism to tell the parity generation
 		 * about the "entire width".  Reconstruction (at least
 		 * vdev_raidz_reconstruct_general()) would also need to
 		 * know about the "entire width".
 		 */
 		rr->rr_cols = cols;
 		rr->rr_bigcols = bc;
 		rr->rr_missingdata = 0;
 		rr->rr_missingparity = 0;
 		rr->rr_firstdatacol = nparity;
 		rr->rr_abd_copy = NULL;
 		rr->rr_abd_empty = NULL;
 		rr->rr_nempty = 0;
 		for (int c = 0; c < rr->rr_cols; c++, child_id++) {
 			if (child_id >= row_phys_cols) {
 				child_id -= row_phys_cols;
 				child_offset += 1ULL << ashift;
 			}
 			rr->rr_col[c].rc_devidx = child_id;
 			rr->rr_col[c].rc_offset = child_offset;
 			rr->rr_col[c].rc_gdata = NULL;
 			rr->rr_col[c].rc_orig_data = NULL;
 			rr->rr_col[c].rc_error = 0;
 			rr->rr_col[c].rc_tried = 0;
 			rr->rr_col[c].rc_skipped = 0;
 			rr->rr_col[c].rc_need_orig_restore = B_FALSE;
 			uint64_t dc = c - rr->rr_firstdatacol;
 			if (c < rr->rr_firstdatacol) {
 				rr->rr_col[c].rc_size = 1ULL << ashift;
 				rr->rr_col[c].rc_abd =
 				    abd_alloc_linear(rr->rr_col[c].rc_size,
 				    B_TRUE);
 			} else if (row == rows - 1 && bc != 0 && c >= bc) {
 				/*
 				 * Past the end, this for parity generation.
 				 */
 				rr->rr_col[c].rc_size = 0;
 				rr->rr_col[c].rc_abd = NULL;
 			} else {
 				/*
 				 * "data column" (col excluding parity)
 				 * Add an ASCII art diagram here
 				 */
 				uint64_t off;
 				if (c < bc || r == 0) {
 					off = dc * rows + row;
 				} else {
 					off = r * rows +
 					    (dc - r) * (rows - 1) + row;
 				}
 				rr->rr_col[c].rc_size = 1ULL << ashift;
 				rr->rr_col[c].rc_abd =
 				    abd_get_offset(abd, off << ashift);
 			}
 			asize += rr->rr_col[c].rc_size;
 		}
 		/*
 		 * If all data stored spans all columns, there's a danger that
 		 * parity will always be on the same device and, since parity
 		 * isn't read during normal operation, that that device's I/O
 		 * bandwidth won't be used effectively. We therefore switch
 		 * the parity every 1MB.
 		 *
 		 * ...at least that was, ostensibly, the theory. As a practical
 		 * matter unless we juggle the parity between all devices
 		 * evenly, we won't see any benefit. Further, occasional writes
 		 * that aren't a multiple of the LCM of the number of children
 		 * and the minimum stripe width are sufficient to avoid pessimal
 		 * behavior. Unfortunately, this decision created an implicit
 		 * on-disk format requirement that we need to support for all
 		 * eternity, but only for single-parity RAID-Z.
 		 *
 		 * If we intend to skip a sector in the zeroth column for
 		 * padding we must make sure to note this swap. We will never
 		 * intend to skip the first column since at least one data and
 		 * one parity column must appear in each row.
 		 */
 		if (rr->rr_firstdatacol == 1 && rr->rr_cols > 1 &&
 		    (offset & (1ULL << 20))) {
 			ASSERT(rr->rr_cols >= 2);
 			ASSERT(rr->rr_col[0].rc_size == rr->rr_col[1].rc_size);
 			devidx = rr->rr_col[0].rc_devidx;
 			uint64_t o = rr->rr_col[0].rc_offset;
 			rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx;
 			rr->rr_col[0].rc_offset = rr->rr_col[1].rc_offset;
 			rr->rr_col[1].rc_devidx = devidx;
 			rr->rr_col[1].rc_offset = o;
 		}
 	}
 	ASSERT3U(asize, ==, tot << ashift);
 	/* init RAIDZ parity ops */
 	rm->rm_ops = vdev_raidz_math_get_ops();
 	return (rm);
 }
 static raidz_map_t *
 init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
 {
@ -330,8 +558,15 @@ init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
 	(*zio)->io_abd = raidz_alloc(alloc_dsize);
 	init_zio_abd(*zio);
 	if (opts->rto_expand) {
 		rm = vdev_raidz_map_alloc_expanded((*zio)->io_abd,
 		    (*zio)->io_size, (*zio)->io_offset,
 		    opts->rto_ashift, total_ncols+1, total_ncols,
 		    parity, opts->rto_expand_offset);
 	} else {
 		rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
 		    total_ncols, parity);
 	}
 	VERIFY(rm);
 	/* Make sure code columns are destroyed */
@ -420,7 +655,7 @@ run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
 	if (fn < RAIDZ_REC_PQ) {
 		/* can reconstruct 1 failed data disk */
 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
-			if (x0 >= rm->rm_cols - raidz_parity(rm))
+			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
 				continue;
 			/* Check if should stop */
@ -445,10 +680,11 @@ run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
 	} else if (fn < RAIDZ_REC_PQR) {
 		/* can reconstruct 2 failed data disk */
 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
-			if (x0 >= rm->rm_cols - raidz_parity(rm))
+			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
 				continue;
 			for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
-				if (x1 >= rm->rm_cols - raidz_parity(rm))
+				if (x1 >= rm->rm_row[0]->rr_cols -
 				    raidz_parity(rm))
 					continue;
 				/* Check if should stop */
@ -475,14 +711,15 @@ run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
 	} else {
 		/* can reconstruct 3 failed data disk */
 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
-			if (x0 >= rm->rm_cols - raidz_parity(rm))
+			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
 				continue;
 			for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
-				if (x1 >= rm->rm_cols - raidz_parity(rm))
+				if (x1 >= rm->rm_row[0]->rr_cols -
 				    raidz_parity(rm))
 					continue;
 				for (x2 = x1 + 1; x2 < opts->rto_dcols; x2++) {
-					if (x2 >=
+					if (x2 >= rm->rm_row[0]->rr_cols -
-					    rm->rm_cols - raidz_parity(rm))
+					    raidz_parity(rm))
 						continue;
 					/* Check if should stop */
@ -700,6 +937,8 @@ run_sweep(void)
 		opts->rto_dcols = dcols_v[d];
 		opts->rto_offset = (1 << ashift_v[a]) * rand();
 		opts->rto_dsize = size_v[s];
 		opts->rto_expand = rto_opts.rto_expand;
 		opts->rto_expand_offset = rto_opts.rto_expand_offset;
 		opts->rto_v = 0; /* be quiet */
 		VERIFY3P(thread_create(NULL, 0, sweep_thread, (void *) opts,
@ -732,6 +971,7 @@ exit:
 	return (sweep_state == SWEEP_ERROR ? SWEEP_ERROR : 0);
 }
 int
 main(int argc, char **argv)
 {
--- a/cmd/raidz_test/raidz_test.h
+++ b/cmd/raidz_test/raidz_test.h
@ -44,13 +44,15 @@ static const char *raidz_impl_names[] = {
 typedef struct raidz_test_opts {
 	size_t rto_ashift;
-	size_t rto_offset;
+	uint64_t rto_offset;
 	size_t rto_dcols;
 	size_t rto_dsize;
 	size_t rto_v;
 	size_t rto_sweep;
 	size_t rto_sweep_timeout;
 	size_t rto_benchmark;
 	size_t rto_expand;
 	uint64_t rto_expand_offset;
 	size_t rto_sanity;
 	size_t rto_gdb;
@ -69,6 +71,8 @@ static const raidz_test_opts_t rto_opts_defaults = {
 	.rto_v = 0,
 	.rto_sweep = 0,
 	.rto_benchmark = 0,
 	.rto_expand = 0,
 	.rto_expand_offset = -1ULL,
 	.rto_sanity = 0,
 	.rto_gdb = 0,
 	.rto_should_stop = B_FALSE
@ -113,4 +117,7 @@ void init_zio_abd(zio_t *zio);
 void run_raidz_benchmark(void);
 struct raidz_map *vdev_raidz_map_alloc_expanded(abd_t *, uint64_t, uint64_t,
    uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
 #endif /* RAIDZ_TEST_H */
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@ -1642,7 +1642,11 @@ dump_metaslab(metaslab_t *msp)
 		    SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
 	}
-	ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
+	if (vd->vdev_ops == &vdev_draid_ops)
 		ASSERT3U(msp->ms_size, <=, 1ULL << vd->vdev_ms_shift);
 	else
 		ASSERT3U(msp->ms_size, ==, 1ULL << vd->vdev_ms_shift);
 	dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
 	if (spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) {
@ -5203,8 +5207,6 @@ zdb_blkptr_done(zio_t *zio)
 	zdb_cb_t *zcb = zio->io_private;
 	zbookmark_phys_t *zb = &zio->io_bookmark;
 	abd_free(zio->io_abd);
 	mutex_enter(&spa->spa_scrub_lock);
 	spa->spa_load_verify_bytes -= BP_GET_PSIZE(bp);
 	cv_broadcast(&spa->spa_scrub_io_cv);
@ -5231,6 +5233,8 @@ zdb_blkptr_done(zio_t *zio)
 		    blkbuf);
 	}
 	mutex_exit(&spa->spa_scrub_lock);
 	abd_free(zio->io_abd);
 }
 static int
--- a/cmd/zed/agents/zfs_mod.c
+++ b/cmd/zed/agents/zfs_mod.c
@ -435,7 +435,15 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
 		return;
 	}
-	ret = zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE, B_FALSE);
+	/*
 	 * Prefer sequential resilvering when supported (mirrors and dRAID),
 	 * otherwise fallback to a traditional healing resilver.
 	 */
 	ret = zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE, B_TRUE);
 	if (ret != 0) {
 		ret = zpool_vdev_attach(zhp, fullpath, path, nvroot,
 		    B_TRUE, B_FALSE);
 	}
 	zed_log_msg(LOG_INFO, "  zpool_vdev_replace: %s with %s (%s)",
 	    fullpath, path, (ret == 0) ? "no errors" :
--- a/cmd/zed/agents/zfs_retire.c
+++ b/cmd/zed/agents/zfs_retire.c
@ -219,12 +219,18 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
 	 * replace it.
 	 */
 	for (s = 0; s < nspares; s++) {
-		char *spare_name;
+		boolean_t rebuild = B_FALSE;
 		char *spare_name, *type;
 		if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
 		    &spare_name) != 0)
 			continue;
 		/* prefer sequential resilvering for distributed spares */
 		if ((nvlist_lookup_string(spares[s], ZPOOL_CONFIG_TYPE,
 		    &type) == 0) && strcmp(type, VDEV_TYPE_DRAID_SPARE) == 0)
 			rebuild = B_TRUE;
 		/* if set, add the "ashift" pool property to the spare nvlist */
 		if (source != ZPROP_SRC_DEFAULT)
 			(void) nvlist_add_uint64(spares[s],
@ -237,7 +243,7 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
 		    dev_name, basename(spare_name));
 		if (zpool_vdev_attach(zhp, dev_name, spare_name,
-		    replacement, B_TRUE, B_FALSE) == 0) {
+		    replacement, B_TRUE, rebuild) == 0) {
 			free(dev_name);
 			nvlist_free(replacement);
 			return (B_TRUE);
@ -499,6 +505,7 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
 		 * Attempt to substitute a hot spare.
 		 */
 		(void) replace_with_spare(hdl, zhp, vdev);
 		zpool_close(zhp);
 	}
--- a/cmd/zfs/zfs_main.c
+++ b/cmd/zfs/zfs_main.c
@ -892,6 +892,107 @@ usage:
 	return (-1);
 }
 /*
 * Return a default volblocksize for the pool which always uses more than
 * half of the data sectors.  This primarily applies to dRAID which always
 * writes full stripe widths.
 */
 static uint64_t
 default_volblocksize(zpool_handle_t *zhp, nvlist_t *props)
 {
 	uint64_t volblocksize, asize = SPA_MINBLOCKSIZE;
 	nvlist_t *tree, **vdevs;
 	uint_t nvdevs;
 	nvlist_t *config = zpool_get_config(zhp, NULL);
 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree) != 0 ||
 	    nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN,
 	    &vdevs, &nvdevs) != 0) {
 		return (ZVOL_DEFAULT_BLOCKSIZE);
 	}
 	for (int i = 0; i < nvdevs; i++) {
 		nvlist_t *nv = vdevs[i];
 		uint64_t ashift, ndata, nparity;
 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &ashift) != 0)
 			continue;
 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DRAID_NDATA,
 		    &ndata) == 0) {
 			/* dRAID minimum allocation width */
 			asize = MAX(asize, ndata * (1ULL << ashift));
 		} else if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
 		    &nparity) == 0) {
 			/* raidz minimum allocation width */
 			if (nparity == 1)
 				asize = MAX(asize, 2 * (1ULL << ashift));
 			else
 				asize = MAX(asize, 4 * (1ULL << ashift));
 		} else {
 			/* mirror or (non-redundant) leaf vdev */
 			asize = MAX(asize, 1ULL << ashift);
 		}
 	}
 	/*
 	 * Calculate the target volblocksize such that more than half
 	 * of the asize is used. The following table is for 4k sectors.
 	 *
 	 * n   asize   blksz  used  |   n   asize   blksz  used
 	 * -------------------------+---------------------------------
 	 * 1   4,096   8,192  100%  |   9  36,864  32,768   88%
 	 * 2   8,192   8,192  100%  |  10  40,960  32,768   80%
 	 * 3  12,288   8,192   66%  |  11  45,056  32,768   72%
 	 * 4  16,384  16,384  100%  |  12  49,152  32,768   66%
 	 * 5  20,480  16,384   80%  |  13  53,248  32,768   61%
 	 * 6  24,576  16,384   66%  |  14  57,344  32,768   57%
 	 * 7  28,672  16,384   57%  |  15  61,440  32,768   53%
 	 * 8  32,768  32,768  100%  |  16  65,536  65,636  100%
 	 *
 	 * This is primarily a concern for dRAID which always allocates
 	 * a full stripe width.  For dRAID the default stripe width is
 	 * n=8 in which case the volblocksize is set to 32k. Ignoring
 	 * compression there are no unused sectors.  This same reasoning
 	 * applies to raidz[2,3] so target 4 sectors to minimize waste.
 	 */
 	uint64_t tgt_volblocksize = ZVOL_DEFAULT_BLOCKSIZE;
 	while (tgt_volblocksize * 2 <= asize)
 		tgt_volblocksize *= 2;
 	const char *prop = zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE);
 	if (nvlist_lookup_uint64(props, prop, &volblocksize) == 0) {
 		/* Issue a warning when a non-optimal size is requested. */
 		if (volblocksize < ZVOL_DEFAULT_BLOCKSIZE) {
 			(void) fprintf(stderr, gettext("Warning: "
 			    "volblocksize (%llu) is less than the default "
 			    "minimum block size (%llu).\nTo reduce wasted "
 			    "space a volblocksize of %llu is recommended.\n"),
 			    (u_longlong_t)volblocksize,
 			    (u_longlong_t)ZVOL_DEFAULT_BLOCKSIZE,
 			    (u_longlong_t)tgt_volblocksize);
 		} else if (volblocksize < tgt_volblocksize) {
 			(void) fprintf(stderr, gettext("Warning: "
 			    "volblocksize (%llu) is much less than the "
 			    "minimum allocation\nunit (%llu), which wastes "
 			    "at least %llu%% of space. To reduce wasted "
 			    "space,\nuse a larger volblocksize (%llu is "
 			    "recommended), fewer dRAID data disks\n"
 			    "per group, or smaller sector size (ashift).\n"),
 			    (u_longlong_t)volblocksize, (u_longlong_t)asize,
 			    (u_longlong_t)((100 * (asize - volblocksize)) /
 			    asize), (u_longlong_t)tgt_volblocksize);
 		}
 	} else {
 		volblocksize = tgt_volblocksize;
 		fnvlist_add_uint64(props, prop, volblocksize);
 	}
 	return (volblocksize);
 }
 /*
 * zfs create [-Pnpv] [-o prop=value] ... fs
 * zfs create [-Pnpsv] [-b blocksize] [-o prop=value] ... -V vol size
@ -932,6 +1033,7 @@ zfs_do_create(int argc, char **argv)
 	int ret = 1;
 	nvlist_t *props;
 	uint64_t intval;
 	char *strval;
 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
@ -1018,7 +1120,7 @@ zfs_do_create(int argc, char **argv)
 		goto badusage;
 	}
-	if (dryrun || (type == ZFS_TYPE_VOLUME && !noreserve)) {
+	if (dryrun || type == ZFS_TYPE_VOLUME) {
 		char msg[ZFS_MAX_DATASET_NAME_LEN * 2];
 		char *p;
@ -1040,18 +1142,24 @@ zfs_do_create(int argc, char **argv)
 		}
 	}
 	/*
 	 * if volsize is not a multiple of volblocksize, round it up to the
 	 * nearest multiple of the volblocksize
 	 */
 	if (type == ZFS_TYPE_VOLUME) {
-		uint64_t volblocksize;
+		const char *prop = zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE);
 		uint64_t volblocksize = default_volblocksize(zpool_handle,
 		    real_props);
-		if (nvlist_lookup_uint64(props,
+		if (volblocksize != ZVOL_DEFAULT_BLOCKSIZE &&
-		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
+		    nvlist_lookup_string(props, prop, &strval) != 0) {
-		    &volblocksize) != 0)
+			if (asprintf(&strval, "%llu",
-			volblocksize = ZVOL_DEFAULT_BLOCKSIZE;
+			    (u_longlong_t)volblocksize) == -1)
 				nomem();
 			nvlist_add_string(props, prop, strval);
 			free(strval);
 		}
 		/*
 		 * If volsize is not a multiple of volblocksize, round it
 		 * up to the nearest multiple of the volblocksize.
 		 */
 		if (volsize % volblocksize) {
 			volsize = P2ROUNDUP_TYPED(volsize, volblocksize,
 			    uint64_t);
@ -1064,11 +1172,9 @@ zfs_do_create(int argc, char **argv)
 		}
 	}
 	if (type == ZFS_TYPE_VOLUME && !noreserve) {
 		uint64_t spa_version;
 		zfs_prop_t resv_prop;
 		char *strval;
 		spa_version = zpool_get_prop_int(zpool_handle,
 		    ZPOOL_PROP_VERSION, NULL);
--- a/cmd/zpool/zpool_main.c
+++ b/cmd/zpool/zpool_main.c
@ -2294,7 +2294,7 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
 		}
 	}
-	/* Display vdev initialization and trim status for leaves */
+	/* Display vdev initialization and trim status for leaves. */
 	if (children == 0) {
 		print_status_initialize(vs, cb->cb_print_vdev_init);
 		print_status_trim(vs, cb->cb_print_vdev_trim);
@ -9849,7 +9849,8 @@ vdev_any_spare_replacing(nvlist_t *nv)
 	(void) nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &vdev_type);
 	if (strcmp(vdev_type, VDEV_TYPE_REPLACING) == 0 ||
-	    strcmp(vdev_type, VDEV_TYPE_SPARE) == 0) {
+	    strcmp(vdev_type, VDEV_TYPE_SPARE) == 0 ||
 	    strcmp(vdev_type, VDEV_TYPE_DRAID_SPARE) == 0) {
 		return (B_TRUE);
 	}
--- a/cmd/zpool/zpool_vdev.c
+++ b/cmd/zpool/zpool_vdev.c
@ -86,9 +86,6 @@
 boolean_t error_seen;
 boolean_t is_force;
 /*PRINTFLIKE1*/
 void
 vdev_error(const char *fmt, ...)
@ -222,6 +219,9 @@ is_spare(nvlist_t *config, const char *path)
 	uint_t i, nspares;
 	boolean_t inuse;
 	if (zpool_is_draid_spare(path))
 		return (B_TRUE);
 	if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
 		return (B_FALSE);
@ -267,9 +267,10 @@ is_spare(nvlist_t *config, const char *path)
 *	/dev/xxx	Complete disk path
 *	/xxx		Full path to file
 *	xxx		Shorthand for <zfs_vdev_paths>/xxx
 *	draid*		Virtual dRAID spare
 */
 static nvlist_t *
-make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log)
+make_leaf_vdev(nvlist_t *props, const char *arg, boolean_t is_primary)
 {
 	char path[MAXPATHLEN];
 	struct stat64 statbuf;
@ -309,6 +310,17 @@ make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log)
 		/* After whole disk check restore original passed path */
 		strlcpy(path, arg, sizeof (path));
 	} else if (zpool_is_draid_spare(arg)) {
 		if (!is_primary) {
 			(void) fprintf(stderr,
 			    gettext("cannot open '%s': dRAID spares can only "
 			    "be used to replace primary vdevs\n"), arg);
 			return (NULL);
 		}
 		wholedisk = B_TRUE;
 		strlcpy(path, arg, sizeof (path));
 		type = VDEV_TYPE_DRAID_SPARE;
 	} else {
 		err = is_shorthand_path(arg, path, sizeof (path),
 		    &statbuf, &wholedisk);
@ -337,6 +349,7 @@ make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log)
 		}
 	}
 	if (type == NULL) {
 		/*
 		 * Determine whether this is a device or a file.
 		 */
@ -345,10 +358,11 @@ make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log)
 		} else if (S_ISREG(statbuf.st_mode)) {
 			type = VDEV_TYPE_FILE;
 		} else {
-		(void) fprintf(stderr, gettext("cannot use '%s': must be a "
+			fprintf(stderr, gettext("cannot use '%s': must "
-		    "block device or regular file\n"), path);
+			    "be a block device or regular file\n"), path);
 			return (NULL);
 		}
 	}
 	/*
 	 * Finally, we have the complete device or file, and we know that it is
@ -358,10 +372,7 @@ make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log)
 	verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0);
 	verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
 	verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
-	verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_LOG, is_log) == 0);
+
 	if (is_log)
 		verify(nvlist_add_string(vdev, ZPOOL_CONFIG_ALLOCATION_BIAS,
 		    VDEV_ALLOC_BIAS_LOG) == 0);
 	if (strcmp(type, VDEV_TYPE_DISK) == 0)
 		verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
 		    (uint64_t)wholedisk) == 0);
@ -432,11 +443,16 @@ typedef struct replication_level {
 #define	ZPOOL_FUZZ	(16 * 1024 * 1024)
 /*
 * N.B. For the purposes of comparing replication levels dRAID can be
 * considered functionally equivilant to raidz.
 */
 static boolean_t
 is_raidz_mirror(replication_level_t *a, replication_level_t *b,
    replication_level_t **raidz, replication_level_t **mirror)
 {
-	if (strcmp(a->zprl_type, "raidz") == 0 &&
+	if ((strcmp(a->zprl_type, "raidz") == 0 ||
 	    strcmp(a->zprl_type, "draid") == 0) &&
 	    strcmp(b->zprl_type, "mirror") == 0) {
 		*raidz = a;
 		*mirror = b;
@ -445,6 +461,22 @@ is_raidz_mirror(replication_level_t *a, replication_level_t *b,
 	return (B_FALSE);
 }
 /*
 * Comparison for determining if dRAID and raidz where passed in either order.
 */
 static boolean_t
 is_raidz_draid(replication_level_t *a, replication_level_t *b)
 {
 	if ((strcmp(a->zprl_type, "raidz") == 0 ||
 	    strcmp(a->zprl_type, "draid") == 0) &&
 	    (strcmp(b->zprl_type, "raidz") == 0 ||
 	    strcmp(b->zprl_type, "draid") == 0)) {
 		return (B_TRUE);
 	}
 	return (B_FALSE);
 }
 /*
 * Given a list of toplevel vdevs, return the current replication level.  If
 * the config is inconsistent, then NULL is returned.  If 'fatal' is set, then
@ -511,7 +543,8 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
 			rep.zprl_type = type;
 			rep.zprl_children = 0;
-			if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
+			if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
 			    strcmp(type, VDEV_TYPE_DRAID) == 0) {
 				verify(nvlist_lookup_uint64(nv,
 				    ZPOOL_CONFIG_NPARITY,
 				    &rep.zprl_parity) == 0);
@ -677,6 +710,29 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
 					else
 						return (NULL);
 				}
 			} else if (is_raidz_draid(&lastrep, &rep)) {
 				/*
 				 * Accepted raidz and draid when they can
 				 * handle the same number of disk failures.
 				 */
 				if (lastrep.zprl_parity != rep.zprl_parity) {
 					if (ret != NULL)
 						free(ret);
 					ret = NULL;
 					if (fatal)
 						vdev_error(gettext(
 						    "mismatched replication "
 						    "level: %s and %s vdevs "
 						    "with different "
 						    "redundancy, %llu vs. "
 						    "%llu are present\n"),
 						    lastrep.zprl_type,
 						    rep.zprl_type,
 						    lastrep.zprl_parity,
 						    rep.zprl_parity);
 					else
 						return (NULL);
 				}
 			} else if (strcmp(lastrep.zprl_type, rep.zprl_type) !=
 			    0) {
 				if (ret != NULL)
@ -1103,31 +1159,87 @@ is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
 	return (anyinuse);
 }
 /*
 * Returns the parity level extracted from a raidz or draid type.
 * If the parity cannot be determined zero is returned.
 */
 static int
 get_parity(const char *type)
 {
 	long parity = 0;
 	const char *p;
 	if (strncmp(type, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0) {
 		p = type + strlen(VDEV_TYPE_RAIDZ);
 		if (*p == '\0') {
 			/* when unspecified default to single parity */
 			return (1);
 		} else if (*p == '0') {
 			/* no zero prefixes allowed */
 			return (0);
 		} else {
 			/* 0-3, no suffixes allowed */
 			char *end;
 			errno = 0;
 			parity = strtol(p, &end, 10);
 			if (errno != 0 || *end != '\0' ||
 			    parity < 1 || parity > VDEV_RAIDZ_MAXPARITY) {
 				return (0);
 			}
 		}
 	} else if (strncmp(type, VDEV_TYPE_DRAID,
 	    strlen(VDEV_TYPE_DRAID)) == 0) {
 		p = type + strlen(VDEV_TYPE_DRAID);
 		if (*p == '\0' || *p == ':') {
 			/* when unspecified default to single parity */
 			return (1);
 		} else if (*p == '0') {
 			/* no zero prefixes allowed */
 			return (0);
 		} else {
 			/* 0-3, allowed suffixes: '\0' or ':' */
 			char *end;
 			errno = 0;
 			parity = strtol(p, &end, 10);
 			if (errno != 0 ||
 			    parity < 1 || parity > VDEV_DRAID_MAXPARITY ||
 			    (*end != '\0' && *end != ':')) {
 				return (0);
 			}
 		}
 	}
 	return ((int)parity);
 }
 /*
 * Assign the minimum and maximum number of devices allowed for
 * the specified type.  On error NULL is returned, otherwise the
 * type prefix is returned (raidz, mirror, etc).
 */
 static const char *
 is_grouping(const char *type, int *mindev, int *maxdev)
 {
-	if (strncmp(type, "raidz", 5) == 0) {
+	int nparity;
 		const char *p = type + 5;
 		char *end;
 		long nparity;
-		if (*p == '\0') {
+	if (strncmp(type, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
-			nparity = 1;
+	    strncmp(type, VDEV_TYPE_DRAID, strlen(VDEV_TYPE_DRAID)) == 0) {
-		} else if (*p == '0') {
+		nparity = get_parity(type);
-			return (NULL); /* no zero prefixes allowed */
+		if (nparity == 0)
 		} else {
 			errno = 0;
 			nparity = strtol(p, &end, 10);
 			if (errno != 0 || nparity < 1 || nparity >= 255 ||
 			    *end != '\0')
 			return (NULL);
 		}
 		if (mindev != NULL)
 			*mindev = nparity + 1;
 		if (maxdev != NULL)
 			*maxdev = 255;
 		if (strncmp(type, VDEV_TYPE_RAIDZ,
 		    strlen(VDEV_TYPE_RAIDZ)) == 0) {
 			return (VDEV_TYPE_RAIDZ);
 		} else {
 			return (VDEV_TYPE_DRAID);
 		}
 	}
 	if (maxdev != NULL)
@ -1167,6 +1279,163 @@ is_grouping(const char *type, int *mindev, int *maxdev)
 	return (NULL);
 }
 /*
 * Extract the configuration parameters encoded in the dRAID type and
 * use them to generate a dRAID configuration.  The expected format is:
 *
 * draid[<parity>][:<data><d|D>][:<children><c|C>][:<spares><s|S>]
 *
 * The intent is to be able to generate a good configuration when no
 * additional information is provided.  The only mandatory component
 * of the 'type' is the 'draid' prefix.  If a value is not provided
 * then reasonable defaults are used.  The optional components may
 * appear in any order but the d/s/c suffix is required.
 *
 * Valid inputs:
 * - data:     number of data devices per group (1-255)
 * - parity:   number of parity blocks per group (1-3)
 * - spares:   number of distributed spare (0-100)
 * - children: total number of devices (1-255)
 *
 * Examples:
 * - zpool create tank draid <devices...>
 * - zpool create tank draid2:8d:51c:2s <devices...>
 */
 static int
 draid_config_by_type(nvlist_t *nv, const char *type, uint64_t children)
 {
 	uint64_t nparity = 1;
 	uint64_t nspares = 0;
 	uint64_t ndata = UINT64_MAX;
 	uint64_t ngroups = 1;
 	long value;
 	if (strncmp(type, VDEV_TYPE_DRAID, strlen(VDEV_TYPE_DRAID)) != 0)
 		return (EINVAL);
 	nparity = (uint64_t)get_parity(type);
 	if (nparity == 0)
 		return (EINVAL);
 	char *p = (char *)type;
 	while ((p = strchr(p, ':')) != NULL) {
 		char *end;
 		p = p + 1;
 		errno = 0;
 		if (!isdigit(p[0])) {
 			(void) fprintf(stderr, gettext("invalid dRAID "
 			    "syntax; expected [:<number><c|d|s>] not '%s'\n"),
 			    type);
 			return (EINVAL);
 		}
 		/* Expected non-zero value with c/d/s suffix */
 		value = strtol(p, &end, 10);
 		char suffix = tolower(*end);
 		if (errno != 0 ||
 		    (suffix != 'c' && suffix != 'd' && suffix != 's')) {
 			(void) fprintf(stderr, gettext("invalid dRAID "
 			    "syntax; expected [:<number><c|d|s>] not '%s'\n"),
 			    type);
 			return (EINVAL);
 		}
 		if (suffix == 'c') {
 			if ((uint64_t)value != children) {
 				fprintf(stderr,
 				    gettext("invalid number of dRAID children; "
 				    "%llu required but %llu provided\n"),
 				    (u_longlong_t)value,
 				    (u_longlong_t)children);
 				return (EINVAL);
 			}
 		} else if (suffix == 'd') {
 			ndata = (uint64_t)value;
 		} else if (suffix == 's') {
 			nspares = (uint64_t)value;
 		} else {
 			verify(0); /* Unreachable */
 		}
 	}
 	/*
 	 * When a specific number of data disks is not provided limit a
 	 * redundancy group to 8 data disks.  This value was selected to
 	 * provide a reasonable tradeoff between capacity and performance.
 	 */
 	if (ndata == UINT64_MAX) {
 		if (children > nspares + nparity) {
 			ndata = MIN(children - nspares - nparity, 8);
 		} else {
 			fprintf(stderr, gettext("request number of "
 			    "distributed spares %llu and parity level %llu\n"
 			    "leaves no disks available for data\n"),
 			    (u_longlong_t)nspares, (u_longlong_t)nparity);
 			return (EINVAL);
 		}
 	}
 	/* Verify the maximum allowed group size is never exceeded. */
 	if (ndata == 0 || (ndata + nparity > children - nspares)) {
 		fprintf(stderr, gettext("requested number of dRAID data "
 		    "disks per group %llu is too high,\nat most %llu disks "
 		    "are available for data\n"), (u_longlong_t)ndata,
 		    (u_longlong_t)(children - nspares - nparity));
 		return (EINVAL);
 	}
 	if (nparity == 0 || nparity > VDEV_DRAID_MAXPARITY) {
 		fprintf(stderr,
 		    gettext("invalid dRAID parity level %llu; must be "
 		    "between 1 and %d\n"), (u_longlong_t)nparity,
 		    VDEV_DRAID_MAXPARITY);
 		return (EINVAL);
 	}
 	/*
 	 * Verify the requested number of spares can be satisfied.
 	 * An arbitrary limit of 100 distributed spares is applied.
 	 */
 	if (nspares > 100 || nspares > (children - (ndata + nparity))) {
 		fprintf(stderr,
 		    gettext("invalid number of dRAID spares %llu; additional "
 		    "disks would be required\n"), (u_longlong_t)nspares);
 		return (EINVAL);
 	}
 	/* Verify the requested number children is sufficient. */
 	if (children < (ndata + nparity + nspares)) {
 		fprintf(stderr, gettext("%llu disks were provided, but at "
 		    "least %llu disks are required for this config\n"),
 		    (u_longlong_t)children,
 		    (u_longlong_t)(ndata + nparity + nspares));
 	}
 	if (children > VDEV_DRAID_MAX_CHILDREN) {
 		fprintf(stderr, gettext("%llu disks were provided, but "
 		    "dRAID only supports up to %u disks"),
 		    (u_longlong_t)children, VDEV_DRAID_MAX_CHILDREN);
 	}
 	/*
 	 * Calculate the minimum number of groups required to fill a slice.
 	 * This is the LCM of the stripe width (ndata + nparity) and the
 	 * number of data drives (children - nspares).
 	 */
 	while (ngroups * (ndata + nparity) % (children - nspares) != 0)
 		ngroups++;
 	/* Store the basic dRAID configuration. */
 	fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, nparity);
 	fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NDATA, ndata);
 	fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NSPARES, nspares);
 	fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NGROUPS, ngroups);
 	return (0);
 }
 /*
 * Construct a syntactically valid vdev specification,
 * and ensure that all devices and files exist and can be opened.
@ -1178,8 +1447,8 @@ construct_spec(nvlist_t *props, int argc, char **argv)
 {
 	nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
 	int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache;
-	const char *type;
+	const char *type, *fulltype;
-	uint64_t is_log, is_special, is_dedup;
+	boolean_t is_log, is_special, is_dedup, is_spare;
 	boolean_t seen_logs;
 	top = NULL;
@ -1189,18 +1458,20 @@ construct_spec(nvlist_t *props, int argc, char **argv)
 	nspares = 0;
 	nlogs = 0;
 	nl2cache = 0;
-	is_log = is_special = is_dedup = B_FALSE;
+	is_log = is_special = is_dedup = is_spare = B_FALSE;
 	seen_logs = B_FALSE;
 	nvroot = NULL;
 	while (argc > 0) {
 		fulltype = argv[0];
 		nv = NULL;
 		/*
-		 * If it's a mirror or raidz, the subsequent arguments are
+		 * If it's a mirror, raidz, or draid the subsequent arguments
-		 * its leaves -- until we encounter the next mirror or raidz.
+		 * are its leaves -- until we encounter the next mirror,
 		 * raidz or draid.
 		 */
-		if ((type = is_grouping(argv[0], &mindev, &maxdev)) != NULL) {
+		if ((type = is_grouping(fulltype, &mindev, &maxdev)) != NULL) {
 			nvlist_t **child = NULL;
 			int c, children = 0;
@ -1212,6 +1483,7 @@ construct_spec(nvlist_t *props, int argc, char **argv)
 					    "specified only once\n"));
 					goto spec_out;
 				}
 				is_spare = B_TRUE;
 				is_log = is_special = is_dedup = B_FALSE;
 			}
@ -1225,8 +1497,7 @@ construct_spec(nvlist_t *props, int argc, char **argv)
 				}
 				seen_logs = B_TRUE;
 				is_log = B_TRUE;
-				is_special = B_FALSE;
+				is_special = is_dedup = is_spare = B_FALSE;
 				is_dedup = B_FALSE;
 				argc--;
 				argv++;
 				/*
@ -1238,8 +1509,7 @@ construct_spec(nvlist_t *props, int argc, char **argv)
 			if (strcmp(type, VDEV_ALLOC_BIAS_SPECIAL) == 0) {
 				is_special = B_TRUE;
-				is_log = B_FALSE;
+				is_log = is_dedup = is_spare = B_FALSE;
 				is_dedup = B_FALSE;
 				argc--;
 				argv++;
 				continue;
@ -1247,8 +1517,7 @@ construct_spec(nvlist_t *props, int argc, char **argv)
 			if (strcmp(type, VDEV_ALLOC_BIAS_DEDUP) == 0) {
 				is_dedup = B_TRUE;
-				is_log = B_FALSE;
+				is_log = is_special = is_spare = B_FALSE;
 				is_special = B_FALSE;
 				argc--;
 				argv++;
 				continue;
@ -1262,7 +1531,8 @@ construct_spec(nvlist_t *props, int argc, char **argv)
 					    "specified only once\n"));
 					goto spec_out;
 				}
-				is_log = is_special = is_dedup = B_FALSE;
+				is_log = is_special = B_FALSE;
 				is_dedup = is_spare = B_FALSE;
 			}
 			if (is_log || is_special || is_dedup) {
@ -1280,13 +1550,15 @@ construct_spec(nvlist_t *props, int argc, char **argv)
 			for (c = 1; c < argc; c++) {
 				if (is_grouping(argv[c], NULL, NULL) != NULL)
 					break;
 				children++;
 				child = realloc(child,
 				    children * sizeof (nvlist_t *));
 				if (child == NULL)
 					zpool_no_memory();
 				if ((nv = make_leaf_vdev(props, argv[c],
-				    B_FALSE)) == NULL) {
+				    !(is_log || is_special || is_dedup ||
 				    is_spare))) == NULL) {
 					for (c = 0; c < children - 1; c++)
 						nvlist_free(child[c]);
 					free(child);
@ -1335,10 +1607,11 @@ construct_spec(nvlist_t *props, int argc, char **argv)
 				    type) == 0);
 				verify(nvlist_add_uint64(nv,
 				    ZPOOL_CONFIG_IS_LOG, is_log) == 0);
-				if (is_log)
+				if (is_log) {
 					verify(nvlist_add_string(nv,
 					    ZPOOL_CONFIG_ALLOCATION_BIAS,
 					    VDEV_ALLOC_BIAS_LOG) == 0);
 				}
 				if (is_special) {
 					verify(nvlist_add_string(nv,
 					    ZPOOL_CONFIG_ALLOCATION_BIAS,
@ -1354,6 +1627,15 @@ construct_spec(nvlist_t *props, int argc, char **argv)
 					    ZPOOL_CONFIG_NPARITY,
 					    mindev - 1) == 0);
 				}
 				if (strcmp(type, VDEV_TYPE_DRAID) == 0) {
 					if (draid_config_by_type(nv,
 					    fulltype, children) != 0) {
 						for (c = 0; c < children; c++)
 							nvlist_free(child[c]);
 						free(child);
 						goto spec_out;
 					}
 				}
 				verify(nvlist_add_nvlist_array(nv,
 				    ZPOOL_CONFIG_CHILDREN, child,
 				    children) == 0);
@ -1367,12 +1649,19 @@ construct_spec(nvlist_t *props, int argc, char **argv)
 			 * We have a device.  Pass off to make_leaf_vdev() to
 			 * construct the appropriate nvlist describing the vdev.
 			 */
-			if ((nv = make_leaf_vdev(props, argv[0],
+			if ((nv = make_leaf_vdev(props, argv[0], !(is_log ||
-			    is_log)) == NULL)
+			    is_special || is_dedup || is_spare))) == NULL)
 				goto spec_out;
-			if (is_log)
+			verify(nvlist_add_uint64(nv,
 			    ZPOOL_CONFIG_IS_LOG, is_log) == 0);
 			if (is_log) {
 				verify(nvlist_add_string(nv,
 				    ZPOOL_CONFIG_ALLOCATION_BIAS,
 				    VDEV_ALLOC_BIAS_LOG) == 0);
 				nlogs++;
 			}
 			if (is_special) {
 				verify(nvlist_add_string(nv,
 				    ZPOOL_CONFIG_ALLOCATION_BIAS,
--- a/cmd/ztest/ztest.c
+++ b/cmd/ztest/ztest.c
@ -104,6 +104,7 @@
 #include <sys/zio.h>
 #include <sys/zil.h>
 #include <sys/zil_impl.h>
 #include <sys/vdev_draid.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_file.h>
 #include <sys/vdev_initialize.h>
@ -167,8 +168,11 @@ typedef struct ztest_shared_opts {
 	size_t zo_vdev_size;
 	int zo_ashift;
 	int zo_mirrors;
-	int zo_raidz;
+	int zo_raid_children;
-	int zo_raidz_parity;
+	int zo_raid_parity;
 	char zo_raid_type[8];
 	int zo_draid_data;
 	int zo_draid_spares;
 	int zo_datasets;
 	int zo_threads;
 	uint64_t zo_passtime;
@ -191,9 +195,12 @@ static const ztest_shared_opts_t ztest_opts_defaults = {
 	.zo_vdevs = 5,
 	.zo_ashift = SPA_MINBLOCKSHIFT,
 	.zo_mirrors = 2,
-	.zo_raidz = 4,
+	.zo_raid_children = 4,
-	.zo_raidz_parity = 1,
+	.zo_raid_parity = 1,
 	.zo_raid_type = VDEV_TYPE_RAIDZ,
 	.zo_vdev_size = SPA_MINDEVSIZE * 4,	/* 256m default size */
 	.zo_draid_data = 4,		/* data drives */
 	.zo_draid_spares = 1,		/* distributed spares */
 	.zo_datasets = 7,
 	.zo_threads = 23,
 	.zo_passtime = 60,		/* 60 seconds */
@ -232,7 +239,7 @@ static ztest_shared_ds_t *ztest_shared_ds;
 #define	BT_MAGIC	0x123456789abcdefULL
 #define	MAXFAULTS(zs) \
-	(MAX((zs)->zs_mirrors, 1) * (ztest_opts.zo_raidz_parity + 1) - 1)
+	(MAX((zs)->zs_mirrors, 1) * (ztest_opts.zo_raid_parity + 1) - 1)
 enum ztest_io_type {
 	ZTEST_IO_WRITE_TAG,
@ -689,8 +696,11 @@ usage(boolean_t requested)
 	    "\t[-s size_of_each_vdev (default: %s)]\n"
 	    "\t[-a alignment_shift (default: %d)] use 0 for random\n"
 	    "\t[-m mirror_copies (default: %d)]\n"
-	    "\t[-r raidz_disks (default: %d)]\n"
+	    "\t[-r raidz_disks / draid_disks (default: %d)]\n"
-	    "\t[-R raidz_parity (default: %d)]\n"
+	    "\t[-R raid_parity (default: %d)]\n"
 	    "\t[-K raid_kind (default: random)] raidz|draid|random\n"
 	    "\t[-D draid_data (default: %d)] in config\n"
 	    "\t[-S draid_spares (default: %d)]\n"
 	    "\t[-d datasets (default: %d)]\n"
 	    "\t[-t threads (default: %d)]\n"
 	    "\t[-g gang_block_threshold (default: %s)]\n"
@ -716,8 +726,10 @@ usage(boolean_t requested)
 	    nice_vdev_size,				/* -s */
 	    zo->zo_ashift,				/* -a */
 	    zo->zo_mirrors,				/* -m */
-	    zo->zo_raidz,				/* -r */
+	    zo->zo_raid_children,			/* -r */
-	    zo->zo_raidz_parity,			/* -R */
+	    zo->zo_raid_parity,				/* -R */
 	    zo->zo_draid_data,				/* -D */
 	    zo->zo_draid_spares,			/* -S */
 	    zo->zo_datasets,				/* -d */
 	    zo->zo_threads,				/* -t */
 	    nice_force_ganging,				/* -g */
@ -731,6 +743,21 @@ usage(boolean_t requested)
 	exit(requested ? 0 : 1);
 }
 static uint64_t
 ztest_random(uint64_t range)
 {
 	uint64_t r;
 	ASSERT3S(ztest_fd_rand, >=, 0);
 	if (range == 0)
 		return (0);
 	if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r))
 		fatal(1, "short read from /dev/urandom");
 	return (r % range);
 }
 static void
 ztest_parse_name_value(const char *input, ztest_shared_opts_t *zo)
@ -780,11 +807,12 @@ process_options(int argc, char **argv)
 	int opt;
 	uint64_t value;
 	char altdir[MAXNAMELEN] = { 0 };
 	char raid_kind[8] = { "random" };
 	bcopy(&ztest_opts_defaults, zo, sizeof (*zo));
 	while ((opt = getopt(argc, argv,
-	    "v:s:a:m:r:R:d:t:g:i:k:p:f:MVET:P:hF:B:C:o:G")) != EOF) {
+	    "v:s:a:m:r:R:K:D:S:d:t:g:i:k:p:f:MVET:P:hF:B:C:o:G")) != EOF) {
 		value = 0;
 		switch (opt) {
 		case 'v':
@ -793,6 +821,8 @@ process_options(int argc, char **argv)
 		case 'm':
 		case 'r':
 		case 'R':
 		case 'D':
 		case 'S':
 		case 'd':
 		case 't':
 		case 'g':
@ -817,10 +847,19 @@ process_options(int argc, char **argv)
 			zo->zo_mirrors = value;
 			break;
 		case 'r':
-			zo->zo_raidz = MAX(1, value);
+			zo->zo_raid_children = MAX(1, value);
 			break;
 		case 'R':
-			zo->zo_raidz_parity = MIN(MAX(value, 1), 3);
+			zo->zo_raid_parity = MIN(MAX(value, 1), 3);
 			break;
 		case 'K':
 			(void) strlcpy(raid_kind, optarg, sizeof (raid_kind));
 			break;
 		case 'D':
 			zo->zo_draid_data = MAX(1, value);
 			break;
 		case 'S':
 			zo->zo_draid_spares = MAX(1, value);
 			break;
 		case 'd':
 			zo->zo_datasets = MAX(1, value);
@ -895,7 +934,54 @@ process_options(int argc, char **argv)
 		}
 	}
-	zo->zo_raidz_parity = MIN(zo->zo_raidz_parity, zo->zo_raidz - 1);
+	/* When raid choice is 'random' add a draid pool 50% of the time */
 	if (strcmp(raid_kind, "random") == 0) {
 		(void) strlcpy(raid_kind, (ztest_random(2) == 0) ?
 		    "draid" : "raidz", sizeof (raid_kind));
 		if (ztest_opts.zo_verbose >= 3)
 			(void) printf("choosing RAID type '%s'\n", raid_kind);
 	}
 	if (strcmp(raid_kind, "draid") == 0) {
 		uint64_t min_devsize;
 		/* With fewer disk use 256M, otherwise 128M is OK */
 		min_devsize = (ztest_opts.zo_raid_children < 16) ?
 		    (256ULL << 20) : (128ULL << 20);
 		/* No top-level mirrors with dRAID for now */
 		zo->zo_mirrors = 0;
 		/* Use more appropriate defaults for dRAID */
 		if (zo->zo_vdevs == ztest_opts_defaults.zo_vdevs)
 			zo->zo_vdevs = 1;
 		if (zo->zo_raid_children ==
 		    ztest_opts_defaults.zo_raid_children)
 			zo->zo_raid_children = 16;
 		if (zo->zo_ashift < 12)
 			zo->zo_ashift = 12;
 		if (zo->zo_vdev_size < min_devsize)
 			zo->zo_vdev_size = min_devsize;
 		if (zo->zo_draid_data + zo->zo_raid_parity >
 		    zo->zo_raid_children - zo->zo_draid_spares) {
 			(void) fprintf(stderr, "error: too few draid "
 			    "children (%d) for stripe width (%d)\n",
 			    zo->zo_raid_children,
 			    zo->zo_draid_data + zo->zo_raid_parity);
 			usage(B_FALSE);
 		}
 		(void) strlcpy(zo->zo_raid_type, VDEV_TYPE_DRAID,
 		    sizeof (zo->zo_raid_type));
 	} else /* using raidz */ {
 		ASSERT0(strcmp(raid_kind, "raidz"));
 		zo->zo_raid_parity = MIN(zo->zo_raid_parity,
 		    zo->zo_raid_children - 1);
 	}
 	zo->zo_vdevtime =
 	    (zo->zo_vdevs > 0 ? zo->zo_time * NANOSEC / zo->zo_vdevs :
@ -966,22 +1052,6 @@ ztest_kill(ztest_shared_t *zs)
 	(void) kill(getpid(), SIGKILL);
 }
 static uint64_t
 ztest_random(uint64_t range)
 {
 	uint64_t r;
 	ASSERT3S(ztest_fd_rand, >=, 0);
 	if (range == 0)
 		return (0);
 	if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r))
 		fatal(1, "short read from /dev/urandom");
 	return (r % range);
 }
 /* ARGSUSED */
 static void
 ztest_record_enospc(const char *s)
@ -997,12 +1067,27 @@ ztest_get_ashift(void)
 	return (ztest_opts.zo_ashift);
 }
 static boolean_t
 ztest_is_draid_spare(const char *name)
 {
 	uint64_t spare_id = 0, parity = 0, vdev_id = 0;
 	if (sscanf(name, VDEV_TYPE_DRAID "%llu-%llu-%llu",
 	    (u_longlong_t *)&parity, (u_longlong_t *)&vdev_id,
 	    (u_longlong_t *)&spare_id) == 3) {
 		return (B_TRUE);
 	}
 	return (B_FALSE);
 }
 static nvlist_t *
 make_vdev_file(char *path, char *aux, char *pool, size_t size, uint64_t ashift)
 {
 	char *pathbuf;
 	uint64_t vdev;
 	nvlist_t *file;
 	boolean_t draid_spare = B_FALSE;
 	pathbuf = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
@ -1024,9 +1109,11 @@ make_vdev_file(char *path, char *aux, char *pool, size_t size, uint64_t ashift)
 			    ztest_dev_template, ztest_opts.zo_dir,
 			    pool == NULL ? ztest_opts.zo_pool : pool, vdev);
 		}
 	} else {
 		draid_spare = ztest_is_draid_spare(path);
 	}
-	if (size != 0) {
+	if (size != 0 && !draid_spare) {
 		int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666);
 		if (fd == -1)
 			fatal(1, "can't open %s", path);
@ -1035,20 +1122,21 @@ make_vdev_file(char *path, char *aux, char *pool, size_t size, uint64_t ashift)
 		(void) close(fd);
 	}
-	VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
+	VERIFY0(nvlist_alloc(&file, NV_UNIQUE_NAME, 0));
-	VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
+	VERIFY0(nvlist_add_string(file, ZPOOL_CONFIG_TYPE,
-	VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0);
+	    draid_spare ? VDEV_TYPE_DRAID_SPARE : VDEV_TYPE_FILE));
-	VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0);
+	VERIFY0(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path));
 	VERIFY0(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift));
 	umem_free(pathbuf, MAXPATHLEN);
 	return (file);
 }
 static nvlist_t *
-make_vdev_raidz(char *path, char *aux, char *pool, size_t size,
+make_vdev_raid(char *path, char *aux, char *pool, size_t size,
    uint64_t ashift, int r)
 {
-	nvlist_t *raidz, **child;
+	nvlist_t *raid, **child;
 	int c;
 	if (r < 2)
@ -1058,20 +1146,41 @@ make_vdev_raidz(char *path, char *aux, char *pool, size_t size,
 	for (c = 0; c < r; c++)
 		child[c] = make_vdev_file(path, aux, pool, size, ashift);
-	VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0);
+	VERIFY0(nvlist_alloc(&raid, NV_UNIQUE_NAME, 0));
-	VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE,
+	VERIFY0(nvlist_add_string(raid, ZPOOL_CONFIG_TYPE,
-	    VDEV_TYPE_RAIDZ) == 0);
+	    ztest_opts.zo_raid_type));
-	VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY,
+	VERIFY0(nvlist_add_uint64(raid, ZPOOL_CONFIG_NPARITY,
-	    ztest_opts.zo_raidz_parity) == 0);
+	    ztest_opts.zo_raid_parity));
-	VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN,
+	VERIFY0(nvlist_add_nvlist_array(raid, ZPOOL_CONFIG_CHILDREN,
-	    child, r) == 0);
+	    child, r));
 	if (strcmp(ztest_opts.zo_raid_type, VDEV_TYPE_DRAID) == 0) {
 		uint64_t ndata = ztest_opts.zo_draid_data;
 		uint64_t nparity = ztest_opts.zo_raid_parity;
 		uint64_t nspares = ztest_opts.zo_draid_spares;
 		uint64_t children = ztest_opts.zo_raid_children;
 		uint64_t ngroups = 1;
 		/*
 		 * Calculate the minimum number of groups required to fill a
 		 * slice. This is the LCM of the stripe width (data + parity)
 		 * and the number of data drives (children - spares).
 		 */
 		while (ngroups * (ndata + nparity) % (children - nspares) != 0)
 			ngroups++;
 		/* Store the basic dRAID configuration. */
 		fnvlist_add_uint64(raid, ZPOOL_CONFIG_DRAID_NDATA, ndata);
 		fnvlist_add_uint64(raid, ZPOOL_CONFIG_DRAID_NSPARES, nspares);
 		fnvlist_add_uint64(raid, ZPOOL_CONFIG_DRAID_NGROUPS, ngroups);
 	}
 	for (c = 0; c < r; c++)
 		nvlist_free(child[c]);
 	umem_free(child, r * sizeof (nvlist_t *));
-	return (raidz);
+	return (raid);
 }
 static nvlist_t *
@ -1082,12 +1191,12 @@ make_vdev_mirror(char *path, char *aux, char *pool, size_t size,
 	int c;
 	if (m < 1)
-		return (make_vdev_raidz(path, aux, pool, size, ashift, r));
+		return (make_vdev_raid(path, aux, pool, size, ashift, r));
 	child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL);
 	for (c = 0; c < m; c++)
-		child[c] = make_vdev_raidz(path, aux, pool, size, ashift, r);
+		child[c] = make_vdev_raid(path, aux, pool, size, ashift, r);
 	VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0);
 	VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE,
@ -2809,6 +2918,10 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
 	if (ztest_opts.zo_mmp_test)
 		return;
 	/* dRAID added after feature flags, skip upgrade test. */
 	if (strcmp(ztest_opts.zo_raid_type, VDEV_TYPE_DRAID) == 0)
 		return;
 	mutex_enter(&ztest_vdev_lock);
 	name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool);
@ -2818,13 +2931,13 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
 	(void) spa_destroy(name);
 	nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0,
-	    NULL, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1);
+	    NULL, ztest_opts.zo_raid_children, ztest_opts.zo_mirrors, 1);
 	/*
 	 * If we're configuring a RAIDZ device then make sure that the
 	 * initial version is capable of supporting that feature.
 	 */
-	switch (ztest_opts.zo_raidz_parity) {
+	switch (ztest_opts.zo_raid_parity) {
 	case 0:
 	case 1:
 		initial_version = SPA_VERSION_INITIAL;
@ -2970,7 +3083,8 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
 		return;
 	mutex_enter(&ztest_vdev_lock);
-	leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
+	leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) *
 	    ztest_opts.zo_raid_children;
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
@ -3024,7 +3138,8 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
 		 */
 		nvroot = make_vdev_root(NULL, NULL, NULL,
 		    ztest_opts.zo_vdev_size, 0, (ztest_random(4) == 0) ?
-		    "log" : NULL, ztest_opts.zo_raidz, zs->zs_mirrors, 1);
+		    "log" : NULL, ztest_opts.zo_raid_children, zs->zs_mirrors,
 		    1);
 		error = spa_vdev_add(spa, nvroot);
 		nvlist_free(nvroot);
@ -3078,14 +3193,15 @@ ztest_vdev_class_add(ztest_ds_t *zd, uint64_t id)
 		return;
 	}
-	leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
+	leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) *
 	    ztest_opts.zo_raid_children;
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 	ztest_shared->zs_vdev_next_leaf = spa_num_top_vdevs(spa) * leaves;
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 	nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0,
-	    class, ztest_opts.zo_raidz, zs->zs_mirrors, 1);
+	    class, ztest_opts.zo_raid_children, zs->zs_mirrors, 1);
 	error = spa_vdev_add(spa, nvroot);
 	nvlist_free(nvroot);
@ -3134,7 +3250,7 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
 	char *aux;
 	char *path;
 	uint64_t guid = 0;
-	int error;
+	int error, ignore_err = 0;
 	if (ztest_opts.zo_mmp_test)
 		return;
@ -3157,7 +3273,13 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
 		/*
 		 * Pick a random device to remove.
 		 */
-		guid = sav->sav_vdevs[ztest_random(sav->sav_count)]->vdev_guid;
+		vdev_t *svd = sav->sav_vdevs[ztest_random(sav->sav_count)];
 		/* dRAID spares cannot be removed; try anyways to see ENOTSUP */
 		if (strstr(svd->vdev_path, VDEV_TYPE_DRAID) != NULL)
 			ignore_err = ENOTSUP;
 		guid = svd->vdev_guid;
 	} else {
 		/*
 		 * Find an unused device we can add.
@ -3214,7 +3336,9 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
 		case ZFS_ERR_DISCARDING_CHECKPOINT:
 			break;
 		default:
-			fatal(0, "spa_vdev_remove(%llu) = %d", guid, error);
+			if (error != ignore_err)
 				fatal(0, "spa_vdev_remove(%llu) = %d", guid,
 				    error);
 		}
 	}
@ -3243,7 +3367,7 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id)
 	mutex_enter(&ztest_vdev_lock);
 	/* ensure we have a usable config; mirrors of raidz aren't supported */
-	if (zs->zs_mirrors < 3 || ztest_opts.zo_raidz > 1) {
+	if (zs->zs_mirrors < 3 || ztest_opts.zo_raid_children > 1) {
 		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
@ -3343,6 +3467,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
 	int replacing;
 	int oldvd_has_siblings = B_FALSE;
 	int newvd_is_spare = B_FALSE;
 	int newvd_is_dspare = B_FALSE;
 	int oldvd_is_log;
 	int error, expected_error;
@ -3353,7 +3478,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
 	newpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 	mutex_enter(&ztest_vdev_lock);
-	leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
+	leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raid_children;
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
@ -3393,14 +3518,17 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
 	if (zs->zs_mirrors >= 1) {
 		ASSERT(oldvd->vdev_ops == &vdev_mirror_ops);
 		ASSERT(oldvd->vdev_children >= zs->zs_mirrors);
-		oldvd = oldvd->vdev_child[leaf / ztest_opts.zo_raidz];
+		oldvd = oldvd->vdev_child[leaf / ztest_opts.zo_raid_children];
 	}
 	/* pick a child out of the raidz group */
-	if (ztest_opts.zo_raidz > 1) {
+	if (ztest_opts.zo_raid_children > 1) {
 		if (strcmp(oldvd->vdev_ops->vdev_op_type, "raidz") == 0)
 			ASSERT(oldvd->vdev_ops == &vdev_raidz_ops);
-		ASSERT(oldvd->vdev_children == ztest_opts.zo_raidz);
+		else
-		oldvd = oldvd->vdev_child[leaf % ztest_opts.zo_raidz];
+			ASSERT(oldvd->vdev_ops == &vdev_draid_ops);
 		ASSERT(oldvd->vdev_children == ztest_opts.zo_raid_children);
 		oldvd = oldvd->vdev_child[leaf % ztest_opts.zo_raid_children];
 	}
 	/*
@ -3447,6 +3575,10 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
 	if (sav->sav_count != 0 && ztest_random(3) == 0) {
 		newvd = sav->sav_vdevs[ztest_random(sav->sav_count)];
 		newvd_is_spare = B_TRUE;
 		if (newvd->vdev_ops == &vdev_draid_spare_ops)
 			newvd_is_dspare = B_TRUE;
 		(void) strcpy(newpath, newvd->vdev_path);
 	} else {
 		(void) snprintf(newpath, MAXPATHLEN, ztest_dev_template,
@ -3480,6 +3612,9 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
 	 * If newvd is already part of the pool, it should fail with EBUSY.
 	 *
 	 * If newvd is too small, it should fail with EOVERFLOW.
 	 *
 	 * If newvd is a distributed spare and it's being attached to a
 	 * dRAID which is not its parent it should fail with EINVAL.
 	 */
 	if (pvd->vdev_ops != &vdev_mirror_ops &&
 	    pvd->vdev_ops != &vdev_root_ops && (!replacing ||
@ -3492,10 +3627,12 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
 		expected_error = replacing ? 0 : EBUSY;
 	else if (vdev_lookup_by_path(rvd, newpath) != NULL)
 		expected_error = EBUSY;
-	else if (newsize < oldsize)
+	else if (!newvd_is_dspare && newsize < oldsize)
 		expected_error = EOVERFLOW;
 	else if (ashift > oldvd->vdev_top->vdev_ashift)
 		expected_error = EDOM;
 	else if (newvd_is_dspare && pvd != vdev_draid_spare_get_parent(newvd))
 		expected_error = ENOTSUP;
 	else
 		expected_error = 0;
@ -4880,13 +5017,13 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
 			void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
 			void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
-			VERIFY(0 == dmu_read(os, packobj, packoff,
+			VERIFY0(dmu_read(os, packobj, packoff,
 			    packsize, packcheck, DMU_READ_PREFETCH));
-			VERIFY(0 == dmu_read(os, bigobj, bigoff,
+			VERIFY0(dmu_read(os, bigobj, bigoff,
 			    bigsize, bigcheck, DMU_READ_PREFETCH));
-			ASSERT(bcmp(packbuf, packcheck, packsize) == 0);
+			ASSERT0(bcmp(packbuf, packcheck, packsize));
-			ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0);
+			ASSERT0(bcmp(bigbuf, bigcheck, bigsize));
 			umem_free(packcheck, packsize);
 			umem_free(bigcheck, bigsize);
@ -5761,7 +5898,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
 	}
 	maxfaults = MAXFAULTS(zs);
-	leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
+	leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raid_children;
 	mirror_save = zs->zs_mirrors;
 	mutex_exit(&ztest_vdev_lock);
@ -6011,7 +6148,7 @@ out:
 /*
 * By design ztest will never inject uncorrectable damage in to the pool.
 * Issue a scrub, wait for it to complete, and verify there is never any
- * any persistent damage.
+ * persistent damage.
 *
 * Only after a full scrub has been completed is it safe to start injecting
 * data corruption.  See the comment in zfs_fault_inject().
@ -7347,7 +7484,7 @@ ztest_init(ztest_shared_t *zs)
 	zs->zs_splits = 0;
 	zs->zs_mirrors = ztest_opts.zo_mirrors;
 	nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0,
-	    NULL, ztest_opts.zo_raidz, zs->zs_mirrors, 1);
+	    NULL, ztest_opts.zo_raid_children, zs->zs_mirrors, 1);
 	props = make_random_props();
 	/*
@ -7683,10 +7820,12 @@ main(int argc, char **argv)
 	if (ztest_opts.zo_verbose >= 1) {
 		(void) printf("%llu vdevs, %d datasets, %d threads,"
-		    " %llu seconds...\n",
+		    "%d %s disks, %llu seconds...\n\n",
 		    (u_longlong_t)ztest_opts.zo_vdevs,
 		    ztest_opts.zo_datasets,
 		    ztest_opts.zo_threads,
 		    ztest_opts.zo_raid_children,
 		    ztest_opts.zo_raid_type,
 		    (u_longlong_t)ztest_opts.zo_time);
 	}
--- a/configure.ac
+++ b/configure.ac
@ -209,6 +209,7 @@ AC_CONFIG_FILES([
 	tests/zfs-tests/cmd/btree_test/Makefile
 	tests/zfs-tests/cmd/chg_usr_exec/Makefile
 	tests/zfs-tests/cmd/devname2devid/Makefile
 	tests/zfs-tests/cmd/draid/Makefile
 	tests/zfs-tests/cmd/dir_rd_update/Makefile
 	tests/zfs-tests/cmd/file_check/Makefile
 	tests/zfs-tests/cmd/file_trunc/Makefile
--- a/include/libzfs.h
+++ b/include/libzfs.h
@ -455,6 +455,7 @@ extern void zpool_explain_recover(libzfs_handle_t *, const char *, int,
    nvlist_t *);
 extern int zpool_checkpoint(zpool_handle_t *);
 extern int zpool_discard_checkpoint(zpool_handle_t *);
 extern boolean_t zpool_is_draid_spare(const char *);
 /*
 * Basic handle manipulations.  These functions do not create or destroy the
--- a/include/sys/Makefile.am
+++ b/include/sys/Makefile.am
@ -82,6 +82,7 @@ COMMON_H = \
 	vdev_disk.h \
 	vdev_file.h \
 	vdev.h \
 	vdev_draid.h \
 	vdev_impl.h \
 	vdev_indirect_births.h \
 	vdev_indirect_mapping.h \
--- a/include/sys/dsl_scan.h
+++ b/include/sys/dsl_scan.h
@ -163,6 +163,7 @@ typedef struct dsl_scan_io_queue dsl_scan_io_queue_t;
 void scan_init(void);
 void scan_fini(void);
 int dsl_scan_init(struct dsl_pool *dp, uint64_t txg);
 void dsl_scan_setup_sync(void *, dmu_tx_t *);
 void dsl_scan_fini(struct dsl_pool *dp);
 void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
 int dsl_scan_cancel(struct dsl_pool *);
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@ -617,6 +617,7 @@ typedef struct zpool_load_policy {
 #define	ZPOOL_CONFIG_PREV_INDIRECT_VDEV	"com.delphix:prev_indirect_vdev"
 #define	ZPOOL_CONFIG_PATH		"path"
 #define	ZPOOL_CONFIG_DEVID		"devid"
 #define	ZPOOL_CONFIG_SPARE_ID		"spareid"
 #define	ZPOOL_CONFIG_METASLAB_ARRAY	"metaslab_array"
 #define	ZPOOL_CONFIG_METASLAB_SHIFT	"metaslab_shift"
 #define	ZPOOL_CONFIG_ASHIFT		"ashift"
@ -757,10 +758,17 @@ typedef struct zpool_load_policy {
 #define	ZPOOL_CONFIG_LOAD_DATA_ERRORS	"verify_data_errors"
 #define	ZPOOL_CONFIG_REWIND_TIME	"seconds_of_rewind"
 /* dRAID configuration */
 #define	ZPOOL_CONFIG_DRAID_NDATA	"draid_ndata"
 #define	ZPOOL_CONFIG_DRAID_NSPARES	"draid_nspares"
 #define	ZPOOL_CONFIG_DRAID_NGROUPS	"draid_ngroups"
 #define	VDEV_TYPE_ROOT			"root"
 #define	VDEV_TYPE_MIRROR		"mirror"
 #define	VDEV_TYPE_REPLACING		"replacing"
 #define	VDEV_TYPE_RAIDZ			"raidz"
 #define	VDEV_TYPE_DRAID			"draid"
 #define	VDEV_TYPE_DRAID_SPARE		"dspare"
 #define	VDEV_TYPE_DISK			"disk"
 #define	VDEV_TYPE_FILE			"file"
 #define	VDEV_TYPE_MISSING		"missing"
@ -770,6 +778,12 @@ typedef struct zpool_load_policy {
 #define	VDEV_TYPE_L2CACHE		"l2cache"
 #define	VDEV_TYPE_INDIRECT		"indirect"
 #define	VDEV_RAIDZ_MAXPARITY		3
 #define	VDEV_DRAID_MAXPARITY		3
 #define	VDEV_DRAID_MIN_CHILDREN		2
 #define	VDEV_DRAID_MAX_CHILDREN		UINT8_MAX
 /* VDEV_TOP_ZAP_* are used in top-level vdev ZAP objects. */
 #define	VDEV_TOP_ZAP_INDIRECT_OBSOLETE_SM \
 	"com.delphix:indirect_obsolete_sm"
--- a/include/sys/spa_impl.h
+++ b/include/sys/spa_impl.h
@ -240,8 +240,9 @@ struct spa {
 	kcondvar_t	spa_evicting_os_cv;	/* Objset Eviction Completion */
 	txg_list_t	spa_vdev_txg_list;	/* per-txg dirty vdev list */
 	vdev_t		*spa_root_vdev;		/* top-level vdev container */
-	int		spa_min_ashift;		/* of vdevs in normal class */
+	uint64_t	spa_min_ashift;		/* of vdevs in normal class */
-	int		spa_max_ashift;		/* of vdevs in normal class */
+	uint64_t	spa_max_ashift;		/* of vdevs in normal class */
 	uint64_t	spa_min_alloc;		/* of vdevs in normal class */
 	uint64_t	spa_config_guid;	/* config pool guid */
 	uint64_t	spa_load_guid;		/* spa_load initialized guid */
 	uint64_t	spa_last_synced_guid;	/* last synced guid */
--- a/include/sys/txg.h
+++ b/include/sys/txg.h
@ -41,6 +41,7 @@ extern "C" {
 #define	TXG_MASK		(TXG_SIZE - 1)	/* mask for size	*/
 #define	TXG_INITIAL		TXG_SIZE	/* initial txg 		*/
 #define	TXG_IDX			(txg & TXG_MASK)
 #define	TXG_UNKNOWN		0
 /* Number of txgs worth of frees we defer adding to in-core spacemaps */
 #define	TXG_DEFER_SIZE		2
--- a/include/sys/vdev.h
+++ b/include/sys/vdev.h
@ -49,10 +49,13 @@ typedef enum vdev_dtl_type {
 extern int zfs_nocacheflush;
 typedef boolean_t vdev_open_children_func_t(vdev_t *vd);
 extern void vdev_dbgmsg(vdev_t *vd, const char *fmt, ...);
 extern void vdev_dbgmsg_print_tree(vdev_t *, int);
 extern int vdev_open(vdev_t *);
 extern void vdev_open_children(vdev_t *);
 extern void vdev_open_children_subset(vdev_t *, vdev_open_children_func_t *);
 extern int vdev_validate(vdev_t *);
 extern int vdev_copy_path_strict(vdev_t *, vdev_t *);
 extern void vdev_copy_path_relaxed(vdev_t *, vdev_t *);
@ -71,7 +74,10 @@ extern void vdev_dtl_dirty(vdev_t *vd, vdev_dtl_type_t d,
 extern boolean_t vdev_dtl_contains(vdev_t *vd, vdev_dtl_type_t d,
    uint64_t txg, uint64_t size);
 extern boolean_t vdev_dtl_empty(vdev_t *vd, vdev_dtl_type_t d);
-extern boolean_t vdev_dtl_need_resilver(vdev_t *vd, uint64_t off, size_t size);
+extern boolean_t vdev_default_need_resilver(vdev_t *vd, const dva_t *dva,
    size_t psize, uint64_t phys_birth);
 extern boolean_t vdev_dtl_need_resilver(vdev_t *vd, const dva_t *dva,
    size_t psize, uint64_t phys_birth);
 extern void vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
    boolean_t scrub_done, boolean_t rebuild_done);
 extern boolean_t vdev_dtl_required(vdev_t *vd);
@ -97,8 +103,14 @@ extern void vdev_metaslab_set_size(vdev_t *);
 extern void vdev_expand(vdev_t *vd, uint64_t txg);
 extern void vdev_split(vdev_t *vd);
 extern void vdev_deadman(vdev_t *vd, char *tag);
 typedef void vdev_xlate_func_t(void *arg, range_seg64_t *physical_rs);
 extern boolean_t vdev_xlate_is_empty(range_seg64_t *rs);
 extern void vdev_xlate(vdev_t *vd, const range_seg64_t *logical_rs,
-    range_seg64_t *physical_rs);
+    range_seg64_t *physical_rs, range_seg64_t *remain_rs);
 extern void vdev_xlate_walk(vdev_t *vd, const range_seg64_t *logical_rs,
    vdev_xlate_func_t *func, void *arg);
 extern void vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx);
 extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
--- a/include/sys/vdev_draid.h
+++ b/include/sys/vdev_draid.h
@ -0,0 +1,110 @@
 /*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
 /*
 * Copyright (c) 2016, Intel Corporation.
 * Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
 */
 #ifndef _SYS_VDEV_DRAID_H
 #define	_SYS_VDEV_DRAID_H
 #include <sys/types.h>
 #include <sys/abd.h>
 #include <sys/nvpair.h>
 #include <sys/zio.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_raidz_impl.h>
 #include <sys/vdev.h>
 #ifdef  __cplusplus
 extern "C" {
 #endif
 /*
 * Constants required to generate and use dRAID permutations.
 */
 #define	VDEV_DRAID_SEED			0xd7a1d5eed
 #define	VDEV_DRAID_MAX_MAPS		254
 #define	VDEV_DRAID_ROWSHIFT		SPA_MAXBLOCKSHIFT
 #define	VDEV_DRAID_ROWHEIGHT		(1ULL << VDEV_DRAID_ROWSHIFT)
 #define	VDEV_DRAID_REFLOW_RESERVE	(2 * VDEV_DRAID_ROWHEIGHT)
 /*
 * dRAID permutation map.
 */
 typedef struct draid_map {
 	uint64_t dm_children;	/* # of permuation columns */
 	uint64_t dm_nperms;	/* # of permutation rows */
 	uint64_t dm_seed;	/* dRAID map seed */
 	uint64_t dm_checksum;	/* Checksum of generated map */
 	uint8_t *dm_perms;	/* base permutation array */
 } draid_map_t;
 /*
 * dRAID configuration.
 */
 typedef struct vdev_draid_config {
 	/*
 	 * Values read from the dRAID nvlist configuration.
 	 */
 	uint64_t vdc_ndata;		/* # of data devices in group */
 	uint64_t vdc_nparity;		/* # of parity devices in group */
 	uint64_t vdc_nspares;		/* # of distributed spares */
 	uint64_t vdc_children;		/* # of children */
 	uint64_t vdc_ngroups;		/* # groups per slice */
 	/*
 	 * Immutable derived constants.
 	 */
 	uint8_t *vdc_perms;		/* permutation array */
 	uint64_t vdc_nperms;		/* # of permutations */
 	uint64_t vdc_groupwidth;	/* = data + parity */
 	uint64_t vdc_ndisks;		/* = children - spares */
 	uint64_t vdc_groupsz;		/* = groupwidth * DRAID_ROWSIZE */
 	uint64_t vdc_devslicesz;	/* = (groupsz * groups) / ndisks */
 } vdev_draid_config_t;
 /*
 * Functions for handling dRAID permutation maps.
 */
 extern uint64_t vdev_draid_rand(uint64_t *);
 extern int vdev_draid_lookup_map(uint64_t, const draid_map_t **);
 extern int vdev_draid_generate_perms(const draid_map_t *, uint8_t **);
 /*
 * General dRAID support functions.
 */
 extern boolean_t vdev_draid_readable(vdev_t *, uint64_t);
 extern boolean_t vdev_draid_missing(vdev_t *, uint64_t, uint64_t, uint64_t);
 extern uint64_t vdev_draid_asize_to_psize(vdev_t *, uint64_t);
 extern void vdev_draid_map_alloc_empty(zio_t *, struct raidz_row *);
 extern nvlist_t *vdev_draid_read_config_spare(vdev_t *);
 /* Functions for dRAID distributed spares. */
 extern vdev_t *vdev_draid_spare_get_child(vdev_t *, uint64_t);
 extern vdev_t *vdev_draid_spare_get_parent(vdev_t *);
 extern int vdev_draid_spare_create(nvlist_t *, vdev_t *, uint64_t *, uint64_t);
 #ifdef  __cplusplus
 }
 #endif
 #endif /* _SYS_VDEV_DRAID_H */
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@ -68,14 +68,19 @@ extern uint32_t zfs_vdev_async_write_max_active;
 /*
 * Virtual device operations
 */
 typedef int	vdev_init_func_t(spa_t *spa, nvlist_t *nv, void **tsd);
 typedef void	vdev_fini_func_t(vdev_t *vd);
 typedef int	vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size,
    uint64_t *ashift, uint64_t *pshift);
 typedef void	vdev_close_func_t(vdev_t *vd);
 typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
 typedef uint64_t vdev_min_asize_func_t(vdev_t *vd);
 typedef uint64_t vdev_min_alloc_func_t(vdev_t *vd);
 typedef void	vdev_io_start_func_t(zio_t *zio);
 typedef void	vdev_io_done_func_t(zio_t *zio);
 typedef void	vdev_state_change_func_t(vdev_t *vd, int, int);
-typedef boolean_t vdev_need_resilver_func_t(vdev_t *vd, uint64_t, size_t);
+typedef boolean_t vdev_need_resilver_func_t(vdev_t *vd, const dva_t *dva,
    size_t psize, uint64_t phys_birth);
 typedef void	vdev_hold_func_t(vdev_t *vd);
 typedef void	vdev_rele_func_t(vdev_t *vd);
@ -87,13 +92,24 @@ typedef void	vdev_remap_func_t(vdev_t *vd, uint64_t offset, uint64_t size,
 * Given a target vdev, translates the logical range "in" to the physical
 * range "res"
 */
-typedef void vdev_xlation_func_t(vdev_t *cvd, const range_seg64_t *in,
+typedef void vdev_xlation_func_t(vdev_t *cvd, const range_seg64_t *logical,
-    range_seg64_t *res);
+    range_seg64_t *physical, range_seg64_t *remain);
 typedef uint64_t vdev_rebuild_asize_func_t(vdev_t *vd, uint64_t start,
    uint64_t size, uint64_t max_segment);
 typedef void vdev_metaslab_init_func_t(vdev_t *vd, uint64_t *startp,
    uint64_t *sizep);
 typedef void vdev_config_generate_func_t(vdev_t *vd, nvlist_t *nv);
 typedef uint64_t vdev_nparity_func_t(vdev_t *vd);
 typedef uint64_t vdev_ndisks_func_t(vdev_t *vd);
 typedef const struct vdev_ops {
 	vdev_init_func_t		*vdev_op_init;
 	vdev_fini_func_t		*vdev_op_fini;
 	vdev_open_func_t		*vdev_op_open;
 	vdev_close_func_t		*vdev_op_close;
 	vdev_asize_func_t		*vdev_op_asize;
 	vdev_min_asize_func_t		*vdev_op_min_asize;
 	vdev_min_alloc_func_t		*vdev_op_min_alloc;
 	vdev_io_start_func_t		*vdev_op_io_start;
 	vdev_io_done_func_t		*vdev_op_io_done;
 	vdev_state_change_func_t	*vdev_op_state_change;
@ -101,11 +117,12 @@ typedef const struct vdev_ops {
 	vdev_hold_func_t		*vdev_op_hold;
 	vdev_rele_func_t		*vdev_op_rele;
 	vdev_remap_func_t		*vdev_op_remap;
 	/*
 	 * For translating ranges from non-leaf vdevs (e.g. raidz) to leaves.
 	 * Used when initializing vdevs. Isn't used by leaf ops.
 	 */
 	vdev_xlation_func_t		*vdev_op_xlate;
 	vdev_rebuild_asize_func_t	*vdev_op_rebuild_asize;
 	vdev_metaslab_init_func_t	*vdev_op_metaslab_init;
 	vdev_config_generate_func_t	*vdev_op_config_generate;
 	vdev_nparity_func_t		*vdev_op_nparity;
 	vdev_ndisks_func_t		*vdev_op_ndisks;
 	char				vdev_op_type[16];
 	boolean_t			vdev_op_leaf;
 } vdev_ops_t;
@ -325,16 +342,13 @@ struct vdev {
 	kthread_t	*vdev_rebuild_thread;
 	vdev_rebuild_t	vdev_rebuild_config;
-	/* For limiting outstanding I/Os (initialize, TRIM, rebuild) */
+	/* For limiting outstanding I/Os (initialize, TRIM) */
 	kmutex_t	vdev_initialize_io_lock;
 	kcondvar_t	vdev_initialize_io_cv;
 	uint64_t	vdev_initialize_inflight;
 	kmutex_t	vdev_trim_io_lock;
 	kcondvar_t	vdev_trim_io_cv;
 	uint64_t	vdev_trim_inflight[3];
 	kmutex_t	vdev_rebuild_io_lock;
 	kcondvar_t	vdev_rebuild_io_cv;
 	uint64_t	vdev_rebuild_inflight;
 	/*
 	 * Values stored in the config for an indirect or removing vdev.
@ -392,7 +406,6 @@ struct vdev {
 	uint64_t	vdev_removed;	/* persistent removed state	*/
 	uint64_t	vdev_resilver_txg; /* persistent resilvering state */
 	uint64_t	vdev_rebuild_txg; /* persistent rebuilding state */
 	uint64_t	vdev_nparity;	/* number of parity devices for raidz */
 	char		*vdev_path;	/* vdev path (if any)		*/
 	char		*vdev_devid;	/* vdev devid (if any)		*/
 	char		*vdev_physpath;	/* vdev device path (if any)	*/
@ -445,8 +458,6 @@ struct vdev {
 	zfs_ratelimit_t vdev_checksum_rl;
 };
 #define	VDEV_RAIDZ_MAXPARITY	3
 #define	VDEV_PAD_SIZE		(8 << 10)
 /* 2 padding areas (vl_pad1 and vl_be) to skip */
 #define	VDEV_SKIP_SIZE		VDEV_PAD_SIZE * 2
@ -532,6 +543,9 @@ typedef struct vdev_label {
 #define	VDEV_LABEL_END_SIZE	(2 * sizeof (vdev_label_t))
 #define	VDEV_LABELS		4
 #define	VDEV_BEST_LABEL		VDEV_LABELS
 #define	VDEV_OFFSET_IS_LABEL(vd, off)                           \
 	(((off) < VDEV_LABEL_START_SIZE) ||                     \
 	((off) >= ((vd)->vdev_psize - VDEV_LABEL_END_SIZE)))
 #define	VDEV_ALLOC_LOAD		0
 #define	VDEV_ALLOC_ADD		1
@ -577,6 +591,8 @@ extern vdev_ops_t vdev_root_ops;
 extern vdev_ops_t vdev_mirror_ops;
 extern vdev_ops_t vdev_replacing_ops;
 extern vdev_ops_t vdev_raidz_ops;
 extern vdev_ops_t vdev_draid_ops;
 extern vdev_ops_t vdev_draid_spare_ops;
 extern vdev_ops_t vdev_disk_ops;
 extern vdev_ops_t vdev_file_ops;
 extern vdev_ops_t vdev_missing_ops;
@ -587,11 +603,15 @@ extern vdev_ops_t vdev_indirect_ops;
 /*
 * Common size functions
 */
-extern void vdev_default_xlate(vdev_t *vd, const range_seg64_t *in,
+extern void vdev_default_xlate(vdev_t *vd, const range_seg64_t *logical_rs,
-    range_seg64_t *out);
+    range_seg64_t *physical_rs, range_seg64_t *remain_rs);
 extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize);
 extern uint64_t vdev_default_min_asize(vdev_t *vd);
 extern uint64_t vdev_get_min_asize(vdev_t *vd);
 extern void vdev_set_min_asize(vdev_t *vd);
 extern uint64_t vdev_get_min_alloc(vdev_t *vd);
 extern uint64_t vdev_get_nparity(vdev_t *vd);
 extern uint64_t vdev_get_ndisks(vdev_t *vd);
 /*
 * Global variables
--- a/include/sys/vdev_raidz.h
+++ b/include/sys/vdev_raidz.h
@ -32,6 +32,7 @@ extern "C" {
 #endif
 struct zio;
 struct raidz_row;
 struct raidz_map;
 #if !defined(_KERNEL)
 struct kernel_param {};
@ -43,8 +44,11 @@ struct kernel_param {};
 struct raidz_map *vdev_raidz_map_alloc(struct zio *, uint64_t, uint64_t,
    uint64_t);
 void vdev_raidz_map_free(struct raidz_map *);
 void vdev_raidz_generate_parity_row(struct raidz_map *, struct raidz_row *);
 void vdev_raidz_generate_parity(struct raidz_map *);
-int vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
+void vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
 void vdev_raidz_child_done(zio_t *);
 void vdev_raidz_io_done(zio_t *);
 /*
 * vdev_raidz_math interface
@ -52,11 +56,16 @@ int vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
 void vdev_raidz_math_init(void);
 void vdev_raidz_math_fini(void);
 const struct raidz_impl_ops *vdev_raidz_math_get_ops(void);
-int vdev_raidz_math_generate(struct raidz_map *);
+int vdev_raidz_math_generate(struct raidz_map *, struct raidz_row *);
-int vdev_raidz_math_reconstruct(struct raidz_map *, const int *, const int *,
+int vdev_raidz_math_reconstruct(struct raidz_map *, struct raidz_row *,
-    const int);
+    const int *, const int *, const int);
 int vdev_raidz_impl_set(const char *);
 typedef struct vdev_raidz {
 	int vd_logical_width;
 	int vd_nparity;
 } vdev_raidz_t;
 #ifdef	__cplusplus
 }
 #endif
--- a/include/sys/vdev_raidz_impl.h
+++ b/include/sys/vdev_raidz_impl.h
@ -29,6 +29,7 @@
 #include <sys/debug.h>
 #include <sys/kstat.h>
 #include <sys/abd.h>
 #include <sys/vdev_impl.h>
 #ifdef  __cplusplus
 extern "C" {
@ -106,30 +107,45 @@ typedef struct raidz_col {
 	uint64_t rc_offset;		/* device offset */
 	uint64_t rc_size;		/* I/O size */
 	abd_t *rc_abd;			/* I/O data */
-	void *rc_gdata;			/* used to store the "good" version */
+	void *rc_orig_data;		/* pre-reconstruction */
 	abd_t *rc_gdata;		/* used to store the "good" version */
 	int rc_error;			/* I/O error for this device */
 	uint8_t rc_tried;		/* Did we attempt this I/O column? */
 	uint8_t rc_skipped;		/* Did we skip this I/O column? */
 	uint8_t rc_need_orig_restore;	/* need to restore from orig_data? */
 	uint8_t rc_repair;		/* Write good data to this column */
 } raidz_col_t;
 typedef struct raidz_row {
 	uint64_t rr_cols;		/* Regular column count */
 	uint64_t rr_scols;		/* Count including skipped columns */
 	uint64_t rr_bigcols;		/* Remainder data column count */
 	uint64_t rr_missingdata;	/* Count of missing data devices */
 	uint64_t rr_missingparity;	/* Count of missing parity devices */
 	uint64_t rr_firstdatacol;	/* First data column/parity count */
 	abd_t *rr_abd_copy;		/* rm_asize-buffer of copied data */
 	abd_t *rr_abd_empty;		/* dRAID empty sector buffer */
 	int rr_nempty;			/* empty sectors included in parity */
 	int rr_code;			/* reconstruction code (unused) */
 #ifdef ZFS_DEBUG
 	uint64_t rr_offset;		/* Logical offset for *_io_verify() */
 	uint64_t rr_size;		/* Physical size for *_io_verify() */
 #endif
 	raidz_col_t rr_col[0];		/* Flexible array of I/O columns */
 } raidz_row_t;
 typedef struct raidz_map {
 	uint64_t rm_cols;		/* Regular column count */
 	uint64_t rm_scols;		/* Count including skipped columns */
 	uint64_t rm_bigcols;		/* Number of oversized columns */
 	uint64_t rm_asize;		/* Actual total I/O size */
 	uint64_t rm_missingdata;	/* Count of missing data devices */
 	uint64_t rm_missingparity;	/* Count of missing parity devices */
 	uint64_t rm_firstdatacol;	/* First data column/parity count */
 	uint64_t rm_nskip;		/* Skipped sectors for padding */
 	uint64_t rm_skipstart;		/* Column index of padding start */
 	abd_t *rm_abd_copy;		/* rm_asize-buffer of copied data */
 	uintptr_t rm_reports;		/* # of referencing checksum reports */
-	uint8_t	rm_freed;		/* map no longer has referencing ZIO */
+	boolean_t rm_freed;		/* map no longer has referencing ZIO */
-	uint8_t	rm_ecksuminjected;	/* checksum error was injected */
+	boolean_t rm_ecksuminjected;	/* checksum error was injected */
 	int rm_nrows;			/* Regular row count */
 	int rm_nskip;			/* RAIDZ sectors skipped for padding */
 	int rm_skipstart;		/* Column index of padding start */
 	const raidz_impl_ops_t *rm_ops;	/* RAIDZ math operations */
-	raidz_col_t rm_col[1];		/* Flexible array of I/O columns */
+	raidz_row_t *rm_row[0];		/* flexible array of rows */
 } raidz_map_t;
 #define	RAIDZ_ORIGINAL_IMPL	(INT_MAX)
 extern const raidz_impl_ops_t vdev_raidz_scalar_impl;
@ -163,14 +179,15 @@ extern const raidz_impl_ops_t vdev_raidz_powerpc_altivec_impl;
 *
 * raidz_parity		Returns parity of the RAIDZ block
 * raidz_ncols		Returns number of columns the block spans
 *			Note, all rows have the same number of columns.
 * raidz_nbigcols	Returns number of big columns
 * raidz_col_p		Returns pointer to a column
 * raidz_col_size	Returns size of a column
 * raidz_big_size	Returns size of big columns
 * raidz_short_size	Returns size of short columns
 */
-#define	raidz_parity(rm)	((rm)->rm_firstdatacol)
+#define	raidz_parity(rm)	((rm)->rm_row[0]->rr_firstdatacol)
-#define	raidz_ncols(rm)		((rm)->rm_cols)
+#define	raidz_ncols(rm)		((rm)->rm_row[0]->rr_cols)
 #define	raidz_nbigcols(rm)	((rm)->rm_bigcols)
 #define	raidz_col_p(rm, c)	((rm)->rm_col + (c))
 #define	raidz_col_size(rm, c)	((rm)->rm_col[c].rc_size)
@ -185,10 +202,10 @@ extern const raidz_impl_ops_t vdev_raidz_powerpc_altivec_impl;
 */
 #define	_RAIDZ_GEN_WRAP(code, impl)					\
 static void								\
-impl ## _gen_ ## code(void *rmp)					\
+impl ## _gen_ ## code(void *rrp)					\
 {									\
-	raidz_map_t *rm = (raidz_map_t *)rmp;				\
+	raidz_row_t *rr = (raidz_row_t *)rrp;				\
-	raidz_generate_## code ## _impl(rm);				\
+	raidz_generate_## code ## _impl(rr);				\
 }
 /*
@ -199,10 +216,10 @@ impl ## _gen_ ## code(void *rmp)					\
 */
 #define	_RAIDZ_REC_WRAP(code, impl)					\
 static int								\
-impl ## _rec_ ## code(void *rmp, const int *tgtidx)			\
+impl ## _rec_ ## code(void *rrp, const int *tgtidx)			\
 {									\
-	raidz_map_t *rm = (raidz_map_t *)rmp;				\
+	raidz_row_t *rr = (raidz_row_t *)rrp;				\
-	return (raidz_reconstruct_## code ## _impl(rm, tgtidx));	\
+	return (raidz_reconstruct_## code ## _impl(rr, tgtidx));	\
 }
 /*
--- a/include/sys/vdev_rebuild.h
+++ b/include/sys/vdev_rebuild.h
@ -66,10 +66,14 @@ typedef struct vdev_rebuild {
 	vdev_t		*vr_top_vdev;		/* top-level vdev to rebuild */
 	metaslab_t	*vr_scan_msp;		/* scanning disabled metaslab */
 	range_tree_t	*vr_scan_tree;		/* scan ranges (in metaslab) */
 	kmutex_t	vr_io_lock;		/* inflight IO lock */
 	kcondvar_t	vr_io_cv;		/* inflight IO cv */
 	/* In-core state and progress */
 	uint64_t	vr_scan_offset[TXG_SIZE];
 	uint64_t	vr_prev_scan_time_ms;	/* any previous scan time */
 	uint64_t	vr_bytes_inflight_max;	/* maximum bytes inflight */
 	uint64_t	vr_bytes_inflight;	/* current bytes inflight */
 	/* Per-rebuild pass statistics for calculating bandwidth */
 	uint64_t	vr_pass_start_time;
--- a/include/sys/zio.h
+++ b/include/sys/zio.h
@ -372,6 +372,7 @@ struct zio_cksum_report {
 	nvlist_t		*zcr_detector;
 	void			*zcr_cbdata;
 	size_t			zcr_cbinfo;	/* passed to zcr_free() */
 	uint64_t		zcr_sector;
 	uint64_t		zcr_align;
 	uint64_t		zcr_length;
 	zio_cksum_finish_f	*zcr_finish;
--- a/include/zfeature_common.h
+++ b/include/zfeature_common.h
@ -76,6 +76,7 @@ typedef enum spa_feature {
 	SPA_FEATURE_LIVELIST,
 	SPA_FEATURE_DEVICE_REBUILD,
 	SPA_FEATURE_ZSTD_COMPRESS,
 	SPA_FEATURE_DRAID,
 	SPA_FEATURES
 } spa_feature_t;
--- a/lib/libzfs/libzfs_dataset.c
+++ b/lib/libzfs/libzfs_dataset.c
@ -5336,6 +5336,16 @@ zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl)
 * 160k.  Again, 128k is from SPA_OLD_MAXBLOCKSIZE and 160k is as calculated in
 * the 128k block example above.
 *
 * The situtation is slightly different for dRAID since the minimum allocation
 * size is the full group width.  The same 8K block above would be written as
 * follows in a dRAID group:
 *
 * +-------+-------+-------+-------+-------+
 * | disk1 | disk2 | disk3 | disk4 | disk5 |
 * +-------+-------+-------+-------+-------+
 * |  P0   |  D0   |  D1   |  S0   |  S1   |
 * +-------+-------+-------+-------+-------+
 *
 * Compression may lead to a variety of block sizes being written for the same
 * volume or file.  There is no clear way to reserve just the amount of space
 * that will be required, so the worst case (no compression) is assumed.
@ -5365,6 +5375,23 @@ vdev_raidz_asize(uint64_t ndisks, uint64_t nparity, uint64_t ashift,
 	return (asize);
 }
 /*
 * Derived from function of same name in module/zfs/vdev_draid.c.  Returns the
 * amount of space (in bytes) that will be allocated for the specified block
 * size.
 */
 static uint64_t
 vdev_draid_asize(uint64_t ndisks, uint64_t nparity, uint64_t ashift,
    uint64_t blksize)
 {
 	ASSERT3U(ndisks, >, nparity);
 	uint64_t ndata = ndisks - nparity;
 	uint64_t rows = ((blksize - 1) / (ndata << ashift)) + 1;
 	uint64_t asize = (rows * ndisks) << ashift;
 	return (asize);
 }
 /*
 * Determine how much space will be allocated if it lands on the most space-
 * inefficient top-level vdev.  Returns the size in bytes required to store one
@ -5374,7 +5401,7 @@ static uint64_t
 volsize_from_vdevs(zpool_handle_t *zhp, uint64_t nblocks, uint64_t blksize)
 {
 	nvlist_t *config, *tree, **vdevs;
-	uint_t nvdevs, v;
+	uint_t nvdevs;
 	uint64_t ret = 0;
 	config = zpool_get_config(zhp, NULL);
@ -5384,33 +5411,61 @@ volsize_from_vdevs(zpool_handle_t *zhp, uint64_t nblocks, uint64_t blksize)
 		return (nblocks * blksize);
 	}
-	for (v = 0; v < nvdevs; v++) {
+	for (int v = 0; v < nvdevs; v++) {
 		char *type;
 		uint64_t nparity, ashift, asize, tsize;
 		nvlist_t **disks;
 		uint_t ndisks;
 		uint64_t volsize;
 		if (nvlist_lookup_string(vdevs[v], ZPOOL_CONFIG_TYPE,
-		    &type) != 0 || strcmp(type, VDEV_TYPE_RAIDZ) != 0 ||
+		    &type) != 0)
-		    nvlist_lookup_uint64(vdevs[v], ZPOOL_CONFIG_NPARITY,
+			continue;
-		    &nparity) != 0 ||
+
-		    nvlist_lookup_uint64(vdevs[v], ZPOOL_CONFIG_ASHIFT,
+		if (strcmp(type, VDEV_TYPE_RAIDZ) != 0 &&
-		    &ashift) != 0 ||
+		    strcmp(type, VDEV_TYPE_DRAID) != 0)
-		    nvlist_lookup_nvlist_array(vdevs[v], ZPOOL_CONFIG_CHILDREN,
+			continue;
-		    &disks, &ndisks) != 0) {
+
 		if (nvlist_lookup_uint64(vdevs[v],
 		    ZPOOL_CONFIG_NPARITY, &nparity) != 0)
 			continue;
 		if (nvlist_lookup_uint64(vdevs[v],
 		    ZPOOL_CONFIG_ASHIFT, &ashift) != 0)
 			continue;
 		if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
 			nvlist_t **disks;
 			uint_t ndisks;
 			if (nvlist_lookup_nvlist_array(vdevs[v],
 			    ZPOOL_CONFIG_CHILDREN, &disks, &ndisks) != 0)
 				continue;
 		}
 			/* allocation size for the "typical" 128k block */
 			tsize = vdev_raidz_asize(ndisks, nparity, ashift,
 			    SPA_OLD_MAXBLOCKSIZE);
 			/* allocation size for the blksize block */
-		asize = vdev_raidz_asize(ndisks, nparity, ashift, blksize);
+			asize = vdev_raidz_asize(ndisks, nparity, ashift,
 			    blksize);
 		} else {
 			uint64_t ndata;
 			if (nvlist_lookup_uint64(vdevs[v],
 			    ZPOOL_CONFIG_DRAID_NDATA, &ndata) != 0)
 				continue;
 			/* allocation size for the "typical" 128k block */
 			tsize = vdev_draid_asize(ndata + nparity, nparity,
 			    ashift, SPA_OLD_MAXBLOCKSIZE);
 			/* allocation size for the blksize block */
 			asize = vdev_draid_asize(ndata + nparity, nparity,
 			    ashift, blksize);
 		}
 		/*
-		 * Scale this size down as a ratio of 128k / tsize.  See theory
+		 * Scale this size down as a ratio of 128k / tsize.
-		 * statement above.
+		 * See theory statement above.
 		 */
 		volsize = nblocks * asize * SPA_OLD_MAXBLOCKSIZE / tsize;
 		if (volsize > ret) {
--- a/lib/libzfs/libzfs_import.c
+++ b/lib/libzfs/libzfs_import.c
@ -112,7 +112,6 @@ refresh_config_libzfs(void *handle, nvlist_t *tryconfig)
 	return (refresh_config((libzfs_handle_t *)handle, tryconfig));
 }
 static int
 pool_active_libzfs(void *handle, const char *name, uint64_t guid,
    boolean_t *isactive)
--- a/lib/libzfs/libzfs_pool.c
+++ b/lib/libzfs/libzfs_pool.c
@ -42,10 +42,10 @@
 #include <sys/efi_partition.h>
 #include <sys/systeminfo.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_sysfs.h>
 #include <sys/vdev_disk.h>
 #include <dlfcn.h>
 #include <libzutil.h>
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
 #include "libzfs_impl.h"
@ -481,7 +481,8 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
 			if (err != 0) {
 				ASSERT3U(err, ==, ENOENT);
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-				    "invalid feature '%s'"), fname);
+				    "feature '%s' unsupported by kernel"),
 				    fname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
@ -960,6 +961,7 @@ zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
 	if (ret == 0 && !isopen &&
 	    (strncmp(pool, "mirror", 6) == 0 ||
 	    strncmp(pool, "raidz", 5) == 0 ||
 	    strncmp(pool, "draid", 5) == 0 ||
 	    strncmp(pool, "spare", 5) == 0 ||
 	    strcmp(pool, "log") == 0)) {
 		if (hdl != NULL)
@ -1186,6 +1188,37 @@ zpool_has_special_vdev(nvlist_t *nvroot)
 	return (B_FALSE);
 }
 /*
 * Output a dRAID top-level vdev name in to the provided buffer.
 */
 static char *
 zpool_draid_name(char *name, int len, uint64_t data, uint64_t parity,
    uint64_t spares, uint64_t children)
 {
 	snprintf(name, len, "%s%llu:%llud:%lluc:%llus",
 	    VDEV_TYPE_DRAID, (u_longlong_t)parity, (u_longlong_t)data,
 	    (u_longlong_t)children, (u_longlong_t)spares);
 	return (name);
 }
 /*
 * Return B_TRUE if the provided name is a dRAID spare name.
 */
 boolean_t
 zpool_is_draid_spare(const char *name)
 {
 	uint64_t spare_id, parity, vdev_id;
 	if (sscanf(name, VDEV_TYPE_DRAID "%llu-%llu-%llu",
 	    (u_longlong_t *)&parity, (u_longlong_t *)&vdev_id,
 	    (u_longlong_t *)&spare_id) == 3) {
 		return (B_TRUE);
 	}
 	return (B_FALSE);
 }
 /*
 * Create the named pool, using the provided vdev list.  It is assumed
 * that the consumer has already validated the contents of the nvlist, so we
@ -2668,6 +2701,11 @@ zpool_vdev_is_interior(const char *name)
 	    VDEV_TYPE_REPLACING, strlen(VDEV_TYPE_REPLACING)) == 0 ||
 	    strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
 		return (B_TRUE);
 	if (strncmp(name, VDEV_TYPE_DRAID, strlen(VDEV_TYPE_DRAID)) == 0 &&
 	    !zpool_is_draid_spare(name))
 		return (B_TRUE);
 	return (B_FALSE);
 }
@ -3101,7 +3139,8 @@ is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
 		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
 		    &type) == 0);
-		if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
+		if ((strcmp(type, VDEV_TYPE_SPARE) == 0 ||
 		    strcmp(type, VDEV_TYPE_DRAID_SPARE) == 0) &&
 		    children == 2 && child[which] == tgt)
 			return (B_TRUE);
@ -3216,8 +3255,12 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
 				    "cannot replace a log with a spare"));
 			} else if (rebuild) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-				    "only mirror vdevs support sequential "
+				    "only mirror and dRAID vdevs support "
-				    "reconstruction"));
+				    "sequential reconstruction"));
 			} else if (zpool_is_draid_spare(new_disk)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "dRAID spares can only replace child "
 				    "devices in their parent's dRAID vdev"));
 			} else if (version >= SPA_VERSION_MULTI_REPLACE) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "already in replacing/spare config; wait "
@ -3618,6 +3661,12 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
 	if (zpool_is_draid_spare(path)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "dRAID spares cannot be removed"));
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 	}
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
 	    &islog)) == NULL)
@ -3955,9 +4004,10 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
 		}
 		/*
-		 * Remove the partition from the path it this is a whole disk.
+		 * Remove the partition from the path if this is a whole disk.
 		 */
-		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value)
+		if (strcmp(type, VDEV_TYPE_DRAID_SPARE) != 0 &&
 		    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value)
 		    == 0 && value && !(name_flags & VDEV_NAME_PATH)) {
 			return (zfs_strip_partition(path));
 		}
@ -3975,6 +4025,27 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
 			path = buf;
 		}
 		/*
 		 * If it's a dRAID device, we add parity, groups, and spares.
 		 */
 		if (strcmp(path, VDEV_TYPE_DRAID) == 0) {
 			uint64_t ndata, nparity, nspares;
 			nvlist_t **child;
 			uint_t children;
 			verify(nvlist_lookup_nvlist_array(nv,
 			    ZPOOL_CONFIG_CHILDREN, &child, &children) == 0);
 			verify(nvlist_lookup_uint64(nv,
 			    ZPOOL_CONFIG_NPARITY, &nparity) == 0);
 			verify(nvlist_lookup_uint64(nv,
 			    ZPOOL_CONFIG_DRAID_NDATA, &ndata) == 0);
 			verify(nvlist_lookup_uint64(nv,
 			    ZPOOL_CONFIG_DRAID_NSPARES, &nspares) == 0);
 			path = zpool_draid_name(buf, sizeof (buf), ndata,
 			    nparity, nspares, children);
 		}
 		/*
 		 * We identify each top-level vdev by using a <type-id>
 		 * naming convention.
--- a/lib/libzpool/Makefile.am
+++ b/lib/libzpool/Makefile.am
@ -124,6 +124,8 @@ KERNEL_C = \
 	unique.c \
 	vdev.c \
 	vdev_cache.c \
 	vdev_draid.c \
 	vdev_draid_rand.c \
 	vdev_file.c \
 	vdev_indirect_births.c \
 	vdev_indirect.c \
@ -216,7 +218,7 @@ libzpool_la_LIBADD = \
 	$(abs_top_builddir)/lib/libnvpair/libnvpair.la \
 	$(abs_top_builddir)/lib/libzstd/libzstd.la
-libzpool_la_LIBADD += $(LIBCLOCK_GETTIME) $(ZLIB_LIBS) -ldl
+libzpool_la_LIBADD += $(LIBCLOCK_GETTIME) $(ZLIB_LIBS) -ldl -lm
 libzpool_la_LDFLAGS = -pthread
--- a/man/man1/raidz_test.1
+++ b/man/man1/raidz_test.1
@ -61,6 +61,11 @@ during testing.
 .IP
 Size of data for raidz block. Size is 1 << (zio_size_shift).
 .HP
 .BI "\-r" " reflow_offset" " (default: uint max)"
 .IP
 Set raidz expansion offset. The expanded raidz map allocation function will
 produce different map configurations depending on this value.
 .HP
 .BI "\-S(weep)"
 .IP
 Sweep parameter space while verifying the raidz implementations. This option
@ -77,6 +82,10 @@ This options starts the benchmark mode. All implementations are benchmarked
 using increasing per disk data size. Results are given as throughput per disk,
 measured in MiB/s.
 .HP
 .BI "\-e(xpansion)"
 .IP
 Use expanded raidz map allocation function.
 .HP
 .BI "\-v(erbose)"
 .IP
 Increase verbosity.
--- a/man/man1/ztest.1
+++ b/man/man1/ztest.1
@ -23,6 +23,7 @@
 .\" Copyright (c) 2009 Oracle and/or its affiliates. All rights reserved.
 .\" Copyright (c) 2009 Michael Gebetsroither <michael.geb@gmx.at>. All rights
 .\" reserved.
 .\" Copyright (c) 2017, Intel Corporation.
 .\"
 .TH ZTEST 1 "Aug 24, 2020" OpenZFS
@ -82,13 +83,29 @@ Used alignment in test.
 .IP
 Number of mirror copies.
 .HP
-.BI "\-r" " raidz_disks" " (default: 4)"
+.BI "\-r" " raidz_disks / draid_disks" " (default: 4 / 16)"
 .IP
 Number of raidz disks.
 .HP
-.BI "\-R" " raidz_parity" " (default: 1)"
+.BI "\-R" " raid_parity" " (default: 1)"
 .IP
-Raidz parity.
+Raid parity (raidz & draid).
 .HP
 .BI "\-K" " raid_kind" " (default: 'random') raidz|draid|random"
 .IP
 The kind of RAID config to use. With 'random' the kind alternates between raidz and draid.
 .HP
 .BI "\-D" " draid_data" " (default: 4)"
 .IP
 Number of data disks in a dRAID redundancy group.
 .HP
 .BI "\-S" " draid_spares" " (default: 1)"
 .IP
 Number of dRAID distributed spare disks.
 .HP
 .BI "\-C" " vdev_class_state" " (default: random)"
 .IP
 The vdev allocation class state: special=on|off|random.
 .HP
 .BI "\-d" " datasets" " (default: 7)"
 .IP
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@ -2902,6 +2902,31 @@ top-level vdev.
 Default value: \fB1,048,576\fR.
 .RE
 .sp
 .ne 2
 .na
 \fBzfs_rebuild_scrub_enabled\fR (int)
 .ad
 .RS 12n
 Automatically start a pool scrub when the last active sequential resilver
 completes in order to verify the checksums of all blocks which have been
 resilvered. This option is enabled by default and is strongly recommended.
 .sp
 Default value: \fB1\fR.
 .RE
 .sp
 .ne 2
 .na
 \fBzfs_rebuild_vdev_limit\fR (ulong)
 .ad
 .RS 12n
 Maximum amount of i/o that can be concurrently issued for a sequential
 resilver per leaf device, given in bytes.
 .sp
 Default value: \fB33,554,432\fR.
 .RE
 .sp
 .ne 2
 .na
--- a/man/man5/zpool-features.5
+++ b/man/man5/zpool-features.5
@ -306,6 +306,30 @@ This feature becomes \fBactive\fR when the \fBzpool remove\fR subcommand is used
 on a top-level vdev, and will never return to being \fBenabled\fR.
 .RE
 .sp
 .ne 2
 .na
 \fBdraid\fR
 .ad
 .RS 4n
 .TS
 l l .
 GUID	org.openzfs:draid
 READ\-ONLY COMPATIBLE	no
 DEPENDENCIES	none
 .TE
 This feature enables use of the \fBdraid\fR vdev type.  dRAID is a variant
 of raidz which provides integrated distributed hot spares that allow faster
 resilvering while retaining the benefits of raidz.  Data, parity, and spare
 space are organized in redundancy groups and distributed evenly over all of
 the devices.
 This feature becomes \fBactive\fR when creating a pool which uses the
 \fBdraid\fR vdev type, or when adding a new \fBdraid\fR vdev to an
 existing pool.
 .RE
 .sp
 .ne 2
 .na
--- a/man/man8/zpool-create.8
+++ b/man/man8/zpool-create.8
@ -73,12 +73,14 @@ and period
 The pool names
 .Sy mirror ,
 .Sy raidz ,
 .Sy draid ,
 .Sy spare
 and
 .Sy log
 are reserved, as are names beginning with
 .Sy mirror ,
 .Sy raidz ,
 .Sy draid ,
 .Sy spare ,
 and the pattern
 .Sy c[0-9] .
--- a/man/man8/zpool-scrub.8
+++ b/man/man8/zpool-scrub.8
@ -52,7 +52,7 @@ Begins a scrub or resumes a paused scrub.
 The scrub examines all data in the specified pools to verify that it checksums
 correctly.
 For replicated
-.Pq mirror or raidz
+.Pq mirror, raidz, or draid
 devices, ZFS automatically repairs any damage discovered during the scrub.
 The
 .Nm zpool Cm status
--- a/man/man8/zpoolconcepts.8
+++ b/man/man8/zpoolconcepts.8
@ -64,7 +64,7 @@ A file must be specified by a full path.
 A mirror of two or more devices.
 Data is replicated in an identical fashion across all components of a mirror.
 A mirror with N disks of size X can hold X bytes and can withstand (N-1) devices
-failing before data integrity is compromised.
+failing without losing data.
 .It Sy raidz , raidz1 , raidz2 , raidz3
 A variation on RAID-5 that allows for better distribution of parity and
 eliminates the RAID-5
@ -88,11 +88,75 @@ vdev type is an alias for
 .Sy raidz1 .
 .Pp
 A raidz group with N disks of size X with P parity disks can hold approximately
-(N-P)*X bytes and can withstand P device(s) failing before data integrity is
+(N-P)*X bytes and can withstand P device(s) failing without losing data.
 compromised.
 The minimum number of devices in a raidz group is one more than the number of
 parity disks.
 The recommended number is between 3 and 9 to help increase performance.
 .It Sy draid , draid1 , draid2 , draid3
 A variant of raidz that provides integrated distributed hot spares which
 allows for faster resilvering while retaining the benefits of raidz.
 A dRAID vdev is constructed from multiple internal raidz groups, each with D
 data devices and P parity devices.
 These groups are distributed over all of the children in order to fully
 utilize the available disk performance.
 .Pp
 Unlike raidz, dRAID uses a fixed stripe width (padding as necessary with
 zeros) to allow fully sequential resilvering.
 This fixed stripe width significantly effects both usable capacity and IOPS.
 For example, with the default D=8 and 4k disk sectors the minimum allocation
 size is 32k.
 If using compression, this relatively large allocation size can reduce the
 effective compression ratio.
 When using ZFS volumes and dRAID the default volblocksize property is increased
 to account for the allocation size.
 If a dRAID pool will hold a significant amount of small blocks, it is
 recommended to also add a mirrored
 .Sy special
 vdev to store those blocks.
 .Pp
 In regards to IO/s, performance is similar to raidz since for any read all D
 data disks must be accessed.
 Delivered random IOPS can be reasonably approximated as
 floor((N-S)/(D+P))*<single-drive-IOPS>.
 .Pp
 Like raidz a dRAID can have single-, double-, or triple-parity.  The
 .Sy draid1 ,
 .Sy draid2 ,
 and
 .Sy draid3
 types can be used to specify the parity level.
 The
 .Sy draid
 vdev type is an alias for
 .Sy draid1 .
 .Pp
 A dRAID with N disks of size X, D data disks per redundancy group, P parity
 level, and S distributed hot spares can hold approximately (N-S)*(D/(D+P))*X
 bytes and can withstand P device(s) failing without losing data.
 .It Sy draid[<parity>][:<data>d][:<children>c][:<spares>s]
 A non-default dRAID configuration can be specified by appending one or more
 of the following optional arguments to the
 .Sy draid
 keyword.
 .Pp
 .Em parity
 - The parity level (1-3).
 .Pp
 .Em data
 - The number of data devices per redundancy group.
 In general a smaller value of D will increase IOPS, improve the compression ratio, and speed up resilvering at the expense of total usable capacity.
 Defaults to 8, unless N-P-S is less than 8.
 .Pp
 .Em children
 - The expected number of children.
 Useful as a cross-check when listing a large number of devices.
 An error is returned when the provided number of children differs.
 .Pp
 .Em spares
 - The number of distributed hot spares.
 Defaults to zero.
 .Pp
 .Pp
 .It Sy spare
 A pseudo-vdev which keeps track of available hot spares for a pool.
 For more information, see the
@ -273,6 +337,14 @@ If the original faulted device is detached, then the hot spare assumes its
 place in the configuration, and is removed from the spare list of all active
 pools.
 .Pp
 The
 .Sy draid
 vdev type provides distributed hot spares.
 These hot spares are named after the dRAID vdev they're a part of (
 .Qq draid1-2-3 specifies spare 3 of vdev 2, which is a single parity dRAID
 ) and may only be used by that dRAID vdev.
 Otherwise, they behave the same as normal hot spares.
 .Pp
 Spares cannot replace log devices.
 .Ss Intent Log
 The ZFS Intent Log (ZIL) satisfies POSIX requirements for synchronous
--- a/module/Makefile.bsd
+++ b/module/Makefile.bsd
@ -243,6 +243,8 @@ SRCS+=	abd.c \
 	unique.c \
 	vdev.c \
 	vdev_cache.c \
 	vdev_draid.c \
 	vdev_draid_rand.c \
 	vdev_indirect.c \
 	vdev_indirect_births.c \
 	vdev_indirect_mapping.c \
@ -341,6 +343,7 @@ CFLAGS.lz4.c= -Wno-cast-qual
 CFLAGS.spa.c= -Wno-cast-qual
 CFLAGS.spa_misc.c= -Wno-cast-qual
 CFLAGS.sysctl_os.c= -include ../zfs_config.h
 CFLAGS.vdev_draid.c= -Wno-cast-qual
 CFLAGS.vdev_raidz.c= -Wno-cast-qual
 CFLAGS.vdev_raidz_math.c= -Wno-cast-qual
 CFLAGS.vdev_raidz_math_scalar.c= -Wno-cast-qual
--- a/module/os/freebsd/zfs/vdev_file.c
+++ b/module/os/freebsd/zfs/vdev_file.c
@ -292,19 +292,28 @@ vdev_file_io_done(zio_t *zio)
 }
 vdev_ops_t vdev_file_ops = {
-	vdev_file_open,
+	.vdev_op_init = NULL,
-	vdev_file_close,
+	.vdev_op_fini = NULL,
-	vdev_default_asize,
+	.vdev_op_open = vdev_file_open,
-	vdev_file_io_start,
+	.vdev_op_close = vdev_file_close,
-	vdev_file_io_done,
+	.vdev_op_asize = vdev_default_asize,
-	NULL,
+	.vdev_op_min_asize = vdev_default_min_asize,
-	NULL,
+	.vdev_op_min_alloc = NULL,
-	vdev_file_hold,
+	.vdev_op_io_start = vdev_file_io_start,
-	vdev_file_rele,
+	.vdev_op_io_done = vdev_file_io_done,
-	NULL,
+	.vdev_op_state_change = NULL,
-	vdev_default_xlate,
+	.vdev_op_need_resilver = NULL,
-	VDEV_TYPE_FILE,		/* name of this vdev type */
+	.vdev_op_hold = vdev_file_hold,
-	B_TRUE			/* leaf vdev */
+	.vdev_op_rele = vdev_file_rele,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = vdev_default_xlate,
 	.vdev_op_rebuild_asize = NULL,
 	.vdev_op_metaslab_init = NULL,
 	.vdev_op_config_generate = NULL,
 	.vdev_op_nparity = NULL,
 	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_FILE,		/* name of this vdev type */
 	.vdev_op_leaf = B_TRUE			/* leaf vdev */
 };
 /*
@ -313,19 +322,28 @@ vdev_ops_t vdev_file_ops = {
 #ifndef _KERNEL
 vdev_ops_t vdev_disk_ops = {
-	vdev_file_open,
+	.vdev_op_init = NULL,
-	vdev_file_close,
+	.vdev_op_fini = NULL,
-	vdev_default_asize,
+	.vdev_op_open = vdev_file_open,
-	vdev_file_io_start,
+	.vdev_op_close = vdev_file_close,
-	vdev_file_io_done,
+	.vdev_op_asize = vdev_default_asize,
-	NULL,
+	.vdev_op_min_asize = vdev_default_min_asize,
-	NULL,
+	.vdev_op_min_alloc = NULL,
-	vdev_file_hold,
+	.vdev_op_io_start = vdev_file_io_start,
-	vdev_file_rele,
+	.vdev_op_io_done = vdev_file_io_done,
-	NULL,
+	.vdev_op_state_change = NULL,
-	vdev_default_xlate,
+	.vdev_op_need_resilver = NULL,
-	VDEV_TYPE_DISK,		/* name of this vdev type */
+	.vdev_op_hold = vdev_file_hold,
-	B_TRUE			/* leaf vdev */
+	.vdev_op_rele = vdev_file_rele,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = vdev_default_xlate,
 	.vdev_op_rebuild_asize = NULL,
 	.vdev_op_metaslab_init = NULL,
 	.vdev_op_config_generate = NULL,
 	.vdev_op_nparity = NULL,
 	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_DISK,		/* name of this vdev type */
 	.vdev_op_leaf = B_TRUE			/* leaf vdev */
 };
 #endif
--- a/module/os/freebsd/zfs/vdev_geom.c
+++ b/module/os/freebsd/zfs/vdev_geom.c
@ -1189,17 +1189,26 @@ vdev_geom_rele(vdev_t *vd)
 }
 vdev_ops_t vdev_disk_ops = {
-	vdev_geom_open,
+	.vdev_op_init = NULL,
-	vdev_geom_close,
+	.vdev_op_fini = NULL,
-	vdev_default_asize,
+	.vdev_op_open = vdev_geom_open,
-	vdev_geom_io_start,
+	.vdev_op_close = vdev_geom_close,
-	vdev_geom_io_done,
+	.vdev_op_asize = vdev_default_asize,
-	NULL,
+	.vdev_op_min_asize = vdev_default_min_asize,
-	NULL,
+	.vdev_op_min_alloc = NULL,
-	vdev_geom_hold,
+	.vdev_op_io_start = vdev_geom_io_start,
-	vdev_geom_rele,
+	.vdev_op_io_done = vdev_geom_io_done,
-	NULL,
+	.vdev_op_state_change = NULL,
-	vdev_default_xlate,
+	.vdev_op_need_resilver = NULL,
-	VDEV_TYPE_DISK,		/* name of this vdev type */
+	.vdev_op_hold = vdev_geom_hold,
-	B_TRUE			/* leaf vdev */
+	.vdev_op_rele = vdev_geom_rele,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = vdev_default_xlate,
 	.vdev_op_rebuild_asize = NULL,
 	.vdev_op_metaslab_init = NULL,
 	.vdev_op_config_generate = NULL,
 	.vdev_op_nparity = NULL,
 	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_DISK,		/* name of this vdev type */
 	.vdev_op_leaf = B_TRUE			/* leaf vdev */
 };
--- a/module/os/linux/zfs/vdev_disk.c
+++ b/module/os/linux/zfs/vdev_disk.c
@ -826,9 +826,13 @@ vdev_disk_rele(vdev_t *vd)
 }
 vdev_ops_t vdev_disk_ops = {
 	.vdev_op_init = NULL,
 	.vdev_op_fini = NULL,
 	.vdev_op_open = vdev_disk_open,
 	.vdev_op_close = vdev_disk_close,
 	.vdev_op_asize = vdev_default_asize,
 	.vdev_op_min_asize = vdev_default_min_asize,
 	.vdev_op_min_alloc = NULL,
 	.vdev_op_io_start = vdev_disk_io_start,
 	.vdev_op_io_done = vdev_disk_io_done,
 	.vdev_op_state_change = NULL,
@ -837,6 +841,11 @@ vdev_ops_t vdev_disk_ops = {
 	.vdev_op_rele = vdev_disk_rele,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = vdev_default_xlate,
 	.vdev_op_rebuild_asize = NULL,
 	.vdev_op_metaslab_init = NULL,
 	.vdev_op_config_generate = NULL,
 	.vdev_op_nparity = NULL,
 	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_DISK,		/* name of this vdev type */
 	.vdev_op_leaf = B_TRUE			/* leaf vdev */
 };
--- a/module/os/linux/zfs/vdev_file.c
+++ b/module/os/linux/zfs/vdev_file.c
@ -305,9 +305,13 @@ vdev_file_io_done(zio_t *zio)
 }
 vdev_ops_t vdev_file_ops = {
 	.vdev_op_init = NULL,
 	.vdev_op_fini = NULL,
 	.vdev_op_open = vdev_file_open,
 	.vdev_op_close = vdev_file_close,
 	.vdev_op_asize = vdev_default_asize,
 	.vdev_op_min_asize = vdev_default_min_asize,
 	.vdev_op_min_alloc = NULL,
 	.vdev_op_io_start = vdev_file_io_start,
 	.vdev_op_io_done = vdev_file_io_done,
 	.vdev_op_state_change = NULL,
@ -316,6 +320,11 @@ vdev_ops_t vdev_file_ops = {
 	.vdev_op_rele = vdev_file_rele,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = vdev_default_xlate,
 	.vdev_op_rebuild_asize = NULL,
 	.vdev_op_metaslab_init = NULL,
 	.vdev_op_config_generate = NULL,
 	.vdev_op_nparity = NULL,
 	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_FILE,		/* name of this vdev type */
 	.vdev_op_leaf = B_TRUE			/* leaf vdev */
 };
@ -341,9 +350,13 @@ vdev_file_fini(void)
 #ifndef _KERNEL
 vdev_ops_t vdev_disk_ops = {
 	.vdev_op_init = NULL,
 	.vdev_op_fini = NULL,
 	.vdev_op_open = vdev_file_open,
 	.vdev_op_close = vdev_file_close,
 	.vdev_op_asize = vdev_default_asize,
 	.vdev_op_min_asize = vdev_default_min_asize,
 	.vdev_op_min_alloc = NULL,
 	.vdev_op_io_start = vdev_file_io_start,
 	.vdev_op_io_done = vdev_file_io_done,
 	.vdev_op_state_change = NULL,
@ -352,6 +365,11 @@ vdev_ops_t vdev_disk_ops = {
 	.vdev_op_rele = vdev_file_rele,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = vdev_default_xlate,
 	.vdev_op_rebuild_asize = NULL,
 	.vdev_op_metaslab_init = NULL,
 	.vdev_op_config_generate = NULL,
 	.vdev_op_nparity = NULL,
 	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_DISK,		/* name of this vdev type */
 	.vdev_op_leaf = B_TRUE			/* leaf vdev */
 };
--- a/module/zcommon/zfeature_common.c
+++ b/module/zcommon/zfeature_common.c
@ -576,7 +576,7 @@ zpool_feature_init(void)
 	zfeature_register(SPA_FEATURE_DEVICE_REBUILD,
 	    "org.openzfs:device_rebuild", "device_rebuild",
-	    "Support for sequential device rebuilds",
+	    "Support for sequential mirror/dRAID device rebuilds",
 	    ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL);
 	{
@ -589,6 +589,10 @@ zpool_feature_init(void)
 	    "zstd compression algorithm support.",
 	    ZFEATURE_FLAG_PER_DATASET, ZFEATURE_TYPE_BOOLEAN, zstd_deps);
 	}
 	zfeature_register(SPA_FEATURE_DRAID,
 	    "org.openzfs:draid", "draid", "Support for distributed parity RAID",
 	    ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL);
 }
 #if defined(_KERNEL)
--- a/module/zcommon/zfs_namecheck.c
+++ b/module/zcommon/zfs_namecheck.c
@ -442,7 +442,9 @@ pool_namecheck(const char *pool, namecheck_err_t *why, char *what)
 		return (-1);
 	}
-	if (strcmp(pool, "mirror") == 0 || strcmp(pool, "raidz") == 0) {
+	if (strcmp(pool, "mirror") == 0 ||
 	    strcmp(pool, "raidz") == 0 ||
 	    strcmp(pool, "draid") == 0) {
 		if (why)
 			*why = NAME_ERR_RESERVED;
 		return (-1);
--- a/module/zfs/Makefile.in
+++ b/module/zfs/Makefile.in
@ -84,6 +84,8 @@ $(MODULE)-objs += uberblock.o
 $(MODULE)-objs += unique.o
 $(MODULE)-objs += vdev.o
 $(MODULE)-objs += vdev_cache.o
 $(MODULE)-objs += vdev_draid.o
 $(MODULE)-objs += vdev_draid_rand.o
 $(MODULE)-objs += vdev_indirect.o
 $(MODULE)-objs += vdev_indirect_births.o
 $(MODULE)-objs += vdev_indirect_mapping.o
--- a/module/zfs/abd.c
+++ b/module/zfs/abd.c
@ -781,16 +781,17 @@ int
 abd_iterate_func(abd_t *abd, size_t off, size_t size,
    abd_iter_func_t *func, void *private)
 {
 	int ret = 0;
 	struct abd_iter aiter;
-	boolean_t abd_multi;
+	int ret = 0;
-	abd_t *c_abd;
+
 	if (size == 0)
 		return (0);
 	abd_verify(abd);
 	ASSERT3U(off + size, <=, abd->abd_size);
-	abd_multi = abd_is_gang(abd);
+	boolean_t abd_multi = abd_is_gang(abd);
-	c_abd = abd_init_abd_iter(abd, &aiter, off);
+	abd_t *c_abd = abd_init_abd_iter(abd, &aiter, off);
 	while (size > 0) {
 		/* If we are at the end of the gang ABD we are done */
@ -920,6 +921,9 @@ abd_iterate_func2(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff,
 	boolean_t dabd_is_gang_abd, sabd_is_gang_abd;
 	abd_t *c_dabd, *c_sabd;
 	if (size == 0)
 		return (0);
 	abd_verify(dabd);
 	abd_verify(sabd);
--- a/module/zfs/dsl_scan.c
+++ b/module/zfs/dsl_scan.c
@ -713,7 +713,7 @@ dsl_scan_setup_check(void *arg, dmu_tx_t *tx)
 	return (0);
 }
-static void
+void
 dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
 {
 	dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
@ -3327,20 +3327,13 @@ dsl_scan_need_resilver(spa_t *spa, const dva_t *dva, size_t psize,
 		return (B_TRUE);
 	}
 	/*
 	 * Check if the txg falls within the range which must be
 	 * resilvered.  DVAs outside this range can always be skipped.
 	 */
 	if (!vdev_dtl_contains(vd, DTL_PARTIAL, phys_birth, 1))
 		return (B_FALSE);
 	/*
 	 * Check if the top-level vdev must resilver this offset.
 	 * When the offset does not intersect with a dirty leaf DTL
 	 * then it may be possible to skip the resilver IO.  The psize
 	 * is provided instead of asize to simplify the check for RAIDZ.
 	 */
-	if (!vdev_dtl_need_resilver(vd, DVA_GET_OFFSET(dva), psize))
+	if (!vdev_dtl_need_resilver(vd, dva, psize, phys_birth))
 		return (B_FALSE);
 	/*
--- a/module/zfs/metaslab.c
+++ b/module/zfs/metaslab.c
@ -32,6 +32,7 @@
 #include <sys/space_map.h>
 #include <sys/metaslab_impl.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_draid.h>
 #include <sys/zio.h>
 #include <sys/spa_impl.h>
 #include <sys/zfeature.h>
@ -1563,6 +1564,7 @@ metaslab_block_find(zfs_btree_t *t, range_tree_t *rt, uint64_t start,
 #if defined(WITH_DF_BLOCK_ALLOCATOR) || \
    defined(WITH_CF_BLOCK_ALLOCATOR)
 /*
 * This is a helper function that can be used by the allocator to find a
 * suitable block to allocate. This will search the specified B-tree looking
@ -1654,6 +1656,7 @@ metaslab_df_alloc(metaslab_t *msp, uint64_t size)
 		range_seg_t *rs;
 		if (zfs_btree_numnodes(&msp->ms_allocatable_by_size) == 0)
 			metaslab_size_tree_full_load(msp->ms_allocatable);
 		if (metaslab_df_use_largest_segment) {
 			/* use largest free segment */
 			rs = zfs_btree_last(&msp->ms_allocatable_by_size, NULL);
@ -2616,6 +2619,10 @@ metaslab_init(metaslab_group_t *mg, uint64_t id, uint64_t object,
 	ms->ms_allocator = -1;
 	ms->ms_new = B_TRUE;
 	vdev_ops_t *ops = vd->vdev_ops;
 	if (ops->vdev_op_metaslab_init != NULL)
 		ops->vdev_op_metaslab_init(vd, &ms->ms_start, &ms->ms_size);
 	/*
 	 * We only open space map objects that already exist. All others
 	 * will be opened when we finally allocate an object for it.
@ -5813,7 +5820,6 @@ metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp,
 			metaslab_group_alloc_increment(spa,
 			    DVA_GET_VDEV(&dva[d]), zio, flags, allocator);
 		}
 	}
 	ASSERT(error == 0);
 	ASSERT(BP_GET_NDVAS(bp) == ndvas);
--- a/module/zfs/mmp.c
+++ b/module/zfs/mmp.c
@ -307,8 +307,17 @@ mmp_next_leaf(spa_t *spa)
 		if (leaf == NULL)
 			leaf = list_head(&spa->spa_leaf_list);
-		if (!vdev_writeable(leaf)) {
+		/*
 		 * We skip unwritable, offline, detached, and dRAID spare
 		 * devices as they are either not legal targets or the write
 		 * may fail or not be seen by other hosts.  Skipped dRAID
 		 * spares can never be written so the fail mask is not set.
 		 */
 		if (!vdev_writeable(leaf) || leaf->vdev_offline ||
 		    leaf->vdev_detached) {
 			fail_mask |= MMP_FAIL_NOT_WRITABLE;
 		} else if (leaf->vdev_ops == &vdev_draid_spare_ops) {
 			continue;
 		} else if (leaf->vdev_mmp_pending != 0) {
 			fail_mask |= MMP_FAIL_WRITE_PENDING;
 		} else {
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@ -60,6 +60,7 @@
 #include <sys/vdev_rebuild.h>
 #include <sys/vdev_trim.h>
 #include <sys/vdev_disk.h>
 #include <sys/vdev_draid.h>
 #include <sys/metaslab.h>
 #include <sys/metaslab_impl.h>
 #include <sys/mmp.h>
@ -3681,7 +3682,14 @@ spa_ld_trusted_config(spa_t *spa, spa_import_type_t type,
 	/*
 	 * Build a new vdev tree from the trusted config
 	 */
-	VERIFY(spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0);
+	error = spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD);
 	if (error != 0) {
 		nvlist_free(mos_config);
 		spa_config_exit(spa, SCL_ALL, FTAG);
 		spa_load_failed(spa, "spa_config_parse failed [error=%d]",
 		    error);
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error));
 	}
 	/*
 	 * Vdev paths in the MOS may be obsolete. If the untrusted config was
@ -5631,7 +5639,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
 	uint64_t txg = TXG_INITIAL;
 	nvlist_t **spares, **l2cache;
 	uint_t nspares, nl2cache;
-	uint64_t version, obj;
+	uint64_t version, obj, ndraid = 0;
 	boolean_t has_features;
 	boolean_t has_encryption;
 	boolean_t has_allocclass;
@ -5753,8 +5761,8 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
 	if (error == 0 &&
 	    (error = vdev_create(rvd, txg, B_FALSE)) == 0 &&
-	    (error = spa_validate_aux(spa, nvroot, txg,
+	    (error = vdev_draid_spare_create(nvroot, rvd, &ndraid, 0)) == 0 &&
-	    VDEV_ALLOC_ADD)) == 0) {
+	    (error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) == 0) {
 		/*
 		 * instantiate the metaslab groups (this will dirty the vdevs)
 		 * we can no longer error exit past this point
@ -5895,6 +5903,9 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
 		spa_sync_props(props, tx);
 	}
 	for (int i = 0; i < ndraid; i++)
 		spa_feature_incr(spa, SPA_FEATURE_DRAID, tx);
 	dmu_tx_commit(tx);
 	spa->spa_sync_on = B_TRUE;
@ -6403,13 +6414,26 @@ spa_reset(const char *pool)
 * ==========================================================================
 */
 /*
 * This is called as a synctask to increment the draid feature flag
 */
 static void
 spa_draid_feature_incr(void *arg, dmu_tx_t *tx)
 {
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 	int draid = (int)(uintptr_t)arg;
 	for (int c = 0; c < draid; c++)
 		spa_feature_incr(spa, SPA_FEATURE_DRAID, tx);
 }
 /*
 * Add a device to a storage pool.
 */
 int
 spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
 {
-	uint64_t txg;
+	uint64_t txg, ndraid = 0;
 	int error;
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *vd, *tvd;
@ -6438,8 +6462,23 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
 		return (spa_vdev_exit(spa, vd, txg, EINVAL));
 	if (vd->vdev_children != 0 &&
-	    (error = vdev_create(vd, txg, B_FALSE)) != 0)
+	    (error = vdev_create(vd, txg, B_FALSE)) != 0) {
 		return (spa_vdev_exit(spa, vd, txg, error));
 	}
 	/*
 	 * The virtual dRAID spares must be added after vdev tree is created
 	 * and the vdev guids are generated.  The guid of their assoicated
 	 * dRAID is stored in the config and used when opening the spare.
 	 */
 	if ((error = vdev_draid_spare_create(nvroot, vd, &ndraid,
 	    rvd->vdev_children)) == 0) {
 		if (ndraid > 0 && nvlist_lookup_nvlist_array(nvroot,
 		    ZPOOL_CONFIG_SPARES, &spares, &nspares) != 0)
 			nspares = 0;
 	} else {
 		return (spa_vdev_exit(spa, vd, txg, error));
 	}
 	/*
 	 * We must validate the spares and l2cache devices after checking the
@ -6452,7 +6491,7 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
 	 * If we are in the middle of a device removal, we can only add
 	 * devices which match the existing devices in the pool.
 	 * If we are in the middle of a removal, or have some indirect
-	 * vdevs, we can not add raidz toplevels.
+	 * vdevs, we can not add raidz or dRAID top levels.
 	 */
 	if (spa->spa_vdev_removal != NULL ||
 	    spa->spa_removing_phys.sr_prev_indirect_vdev != -1) {
@ -6462,10 +6501,10 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
 			    tvd->vdev_ashift != spa->spa_max_ashift) {
 				return (spa_vdev_exit(spa, vd, txg, EINVAL));
 			}
-			/* Fail if top level vdev is raidz */
+			/* Fail if top level vdev is raidz or a dRAID */
-			if (tvd->vdev_ops == &vdev_raidz_ops) {
+			if (vdev_get_nparity(tvd) != 0)
 				return (spa_vdev_exit(spa, vd, txg, EINVAL));
-			}
+
 			/*
 			 * Need the top level mirror to be
 			 * a mirror of leaf vdevs only
@ -6505,6 +6544,19 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
 		spa->spa_l2cache.sav_sync = B_TRUE;
 	}
 	/*
 	 * We can't increment a feature while holding spa_vdev so we
 	 * have to do it in a synctask.
 	 */
 	if (ndraid != 0) {
 		dmu_tx_t *tx;
 		tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
 		dsl_sync_task_nowait(spa->spa_dsl_pool, spa_draid_feature_incr,
 		    (void *)(uintptr_t)ndraid, tx);
 		dmu_tx_commit(tx);
 	}
 	/*
 	 * We have to be careful when adding new vdevs to an existing pool.
 	 * If other threads start allocating from these vdevs before we
@ -6615,14 +6667,27 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
 	if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare)
 		return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
 	/*
 	 * A dRAID spare can only replace a child of its parent dRAID vdev.
 	 */
 	if (newvd->vdev_ops == &vdev_draid_spare_ops &&
 	    oldvd->vdev_top != vdev_draid_spare_get_parent(newvd)) {
 		return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
 	}
 	if (rebuild) {
 		/*
-		 * For rebuilds, the parent vdev must support reconstruction
+		 * For rebuilds, the top vdev must support reconstruction
 		 * using only space maps.  This means the only allowable
-		 * parents are the root vdev or a mirror vdev.
+		 * vdevs types are the root vdev, a mirror, or dRAID.
 		 */
-		if (pvd->vdev_ops != &vdev_mirror_ops &&
+		tvd = pvd;
-		    pvd->vdev_ops != &vdev_root_ops) {
+		if (pvd->vdev_top != NULL)
 			tvd = pvd->vdev_top;
 		if (tvd->vdev_ops != &vdev_mirror_ops &&
 		    tvd->vdev_ops != &vdev_root_ops &&
 		    tvd->vdev_ops != &vdev_draid_ops) {
 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
 		}
 	}
@ -6915,14 +6980,20 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
 	}
 	/*
-	 * If we are detaching the original disk from a spare, then it implies
+	 * If we are detaching the original disk from a normal spare, then it
-	 * that the spare should become a real disk, and be removed from the
+	 * implies that the spare should become a real disk, and be removed
-	 * active spare list for the pool.
+	 * from the active spare list for the pool.  dRAID spares on the
 	 * other hand are coupled to the pool and thus should never be removed
 	 * from the spares list.
 	 */
-	if (pvd->vdev_ops == &vdev_spare_ops &&
+	if (pvd->vdev_ops == &vdev_spare_ops && vd->vdev_id == 0) {
-	    vd->vdev_id == 0 &&
+		vdev_t *last_cvd = pvd->vdev_child[pvd->vdev_children - 1];
-	    pvd->vdev_child[pvd->vdev_children - 1]->vdev_isspare)
+
 		if (last_cvd->vdev_isspare &&
 		    last_cvd->vdev_ops != &vdev_draid_spare_ops) {
 			unspare = B_TRUE;
 		}
 	}
 	/*
 	 * Erase the disk labels so the disk can be used for other things.
@ -8013,18 +8084,9 @@ spa_async_thread(void *arg)
 	/*
 	 * If any devices are done replacing, detach them.
 	 */
-	if (tasks & SPA_ASYNC_RESILVER_DONE)
+	if (tasks & SPA_ASYNC_RESILVER_DONE ||
 	    tasks & SPA_ASYNC_REBUILD_DONE) {
 		spa_vdev_resilver_done(spa);
 	/*
 	 * If any devices are done replacing, detach them.  Then if no
 	 * top-level vdevs are rebuilding attempt to kick off a scrub.
 	 */
 	if (tasks & SPA_ASYNC_REBUILD_DONE) {
 		spa_vdev_resilver_done(spa);
 		if (!vdev_rebuild_active(spa->spa_root_vdev))
 			(void) dsl_scan(spa->spa_dsl_pool, POOL_SCAN_SCRUB);
 	}
 	/*
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@ -741,6 +741,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
 	spa->spa_min_ashift = INT_MAX;
 	spa->spa_max_ashift = 0;
 	spa->spa_min_alloc = INT_MAX;
 	/* Reset cached value */
 	spa->spa_dedup_dspace = ~0ULL;
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@ -40,6 +40,7 @@
 #include <sys/dsl_dir.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_rebuild.h>
 #include <sys/vdev_draid.h>
 #include <sys/uberblock_impl.h>
 #include <sys/metaslab.h>
 #include <sys/metaslab_impl.h>
@ -51,6 +52,7 @@
 #include <sys/arc.h>
 #include <sys/zil.h>
 #include <sys/dsl_scan.h>
 #include <sys/vdev_raidz.h>
 #include <sys/abd.h>
 #include <sys/vdev_initialize.h>
 #include <sys/vdev_trim.h>
@ -193,6 +195,8 @@ vdev_dbgmsg_print_tree(vdev_t *vd, int indent)
 static vdev_ops_t *vdev_ops_table[] = {
 	&vdev_root_ops,
 	&vdev_raidz_ops,
 	&vdev_draid_ops,
 	&vdev_draid_spare_ops,
 	&vdev_mirror_ops,
 	&vdev_replacing_ops,
 	&vdev_spare_ops,
@ -221,10 +225,11 @@ vdev_getops(const char *type)
 /* ARGSUSED */
 void
-vdev_default_xlate(vdev_t *vd, const range_seg64_t *in, range_seg64_t *res)
+vdev_default_xlate(vdev_t *vd, const range_seg64_t *logical_rs,
    range_seg64_t *physical_rs, range_seg64_t *remain_rs)
 {
-	res->rs_start = in->rs_start;
+	physical_rs->rs_start = logical_rs->rs_start;
-	res->rs_end = in->rs_end;
+	physical_rs->rs_end = logical_rs->rs_end;
 }
 /*
@ -264,6 +269,12 @@ vdev_default_asize(vdev_t *vd, uint64_t psize)
 	return (asize);
 }
 uint64_t
 vdev_default_min_asize(vdev_t *vd)
 {
 	return (vd->vdev_min_asize);
 }
 /*
 * Get the minimum allocatable size. We define the allocatable size as
 * the vdev's asize rounded to the nearest metaslab. This allows us to
@ -289,15 +300,7 @@ vdev_get_min_asize(vdev_t *vd)
 	if (vd == vd->vdev_top)
 		return (P2ALIGN(vd->vdev_asize, 1ULL << vd->vdev_ms_shift));
-	/*
+	return (pvd->vdev_ops->vdev_op_min_asize(pvd));
 	 * The allocatable space for a raidz vdev is N * sizeof(smallest child),
 	 * so each child must provide at least 1/Nth of its asize.
 	 */
 	if (pvd->vdev_ops == &vdev_raidz_ops)
 		return ((pvd->vdev_min_asize + pvd->vdev_children - 1) /
 		    pvd->vdev_children);
 	return (pvd->vdev_min_asize);
 }
 void
@ -309,6 +312,48 @@ vdev_set_min_asize(vdev_t *vd)
 		vdev_set_min_asize(vd->vdev_child[c]);
 }
 /*
 * Get the minimal allocation size for the top-level vdev.
 */
 uint64_t
 vdev_get_min_alloc(vdev_t *vd)
 {
 	uint64_t min_alloc = 1ULL << vd->vdev_ashift;
 	if (vd->vdev_ops->vdev_op_min_alloc != NULL)
 		min_alloc = vd->vdev_ops->vdev_op_min_alloc(vd);
 	return (min_alloc);
 }
 /*
 * Get the parity level for a top-level vdev.
 */
 uint64_t
 vdev_get_nparity(vdev_t *vd)
 {
 	uint64_t nparity = 0;
 	if (vd->vdev_ops->vdev_op_nparity != NULL)
 		nparity = vd->vdev_ops->vdev_op_nparity(vd);
 	return (nparity);
 }
 /*
 * Get the number of data disks for a top-level vdev.
 */
 uint64_t
 vdev_get_ndisks(vdev_t *vd)
 {
 	uint64_t ndisks = 1;
 	if (vd->vdev_ops->vdev_op_ndisks != NULL)
 		ndisks = vd->vdev_ops->vdev_op_ndisks(vd);
 	return (ndisks);
 }
 vdev_t *
 vdev_lookup_top(spa_t *spa, uint64_t vdev)
 {
@ -551,6 +596,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
 	list_link_init(&vd->vdev_initialize_node);
 	list_link_init(&vd->vdev_leaf_node);
 	list_link_init(&vd->vdev_trim_node);
 	mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_NOLOCKDEP, NULL);
 	mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&vd->vdev_probe_lock, NULL, MUTEX_DEFAULT, NULL);
@ -569,9 +615,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
 	cv_init(&vd->vdev_trim_io_cv, NULL, CV_DEFAULT, NULL);
 	mutex_init(&vd->vdev_rebuild_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&vd->vdev_rebuild_io_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&vd->vdev_rebuild_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&vd->vdev_rebuild_io_cv, NULL, CV_DEFAULT, NULL);
 	for (int t = 0; t < DTL_TYPES; t++) {
 		vd->vdev_dtl[t] = range_tree_create(NULL, RANGE_SEG64, NULL, 0,
@ -600,7 +644,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
 {
 	vdev_ops_t *ops;
 	char *type;
-	uint64_t guid = 0, islog, nparity;
+	uint64_t guid = 0, islog;
 	vdev_t *vd;
 	vdev_indirect_config_t *vic;
 	char *tmp = NULL;
@ -657,48 +701,13 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
 	if (ops == &vdev_hole_ops && spa_version(spa) < SPA_VERSION_HOLES)
 		return (SET_ERROR(ENOTSUP));
 	/*
 	 * Set the nparity property for RAID-Z vdevs.
 	 */
 	nparity = -1ULL;
 	if (ops == &vdev_raidz_ops) {
 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
 		    &nparity) == 0) {
 			if (nparity == 0 || nparity > VDEV_RAIDZ_MAXPARITY)
 				return (SET_ERROR(EINVAL));
 			/*
 			 * Previous versions could only support 1 or 2 parity
 			 * device.
 			 */
 			if (nparity > 1 &&
 			    spa_version(spa) < SPA_VERSION_RAIDZ2)
 				return (SET_ERROR(ENOTSUP));
 			if (nparity > 2 &&
 			    spa_version(spa) < SPA_VERSION_RAIDZ3)
 				return (SET_ERROR(ENOTSUP));
 		} else {
 			/*
 			 * We require the parity to be specified for SPAs that
 			 * support multiple parity levels.
 			 */
 			if (spa_version(spa) >= SPA_VERSION_RAIDZ2)
 				return (SET_ERROR(EINVAL));
 			/*
 			 * Otherwise, we default to 1 parity device for RAID-Z.
 			 */
 			nparity = 1;
 		}
 	} else {
 		nparity = 0;
 	}
 	ASSERT(nparity != -1ULL);
 	/*
 	 * If creating a top-level vdev, check for allocation classes input
 	 */
 	if (top_level && alloctype == VDEV_ALLOC_ADD) {
 		char *bias;
 		/*
 		 * If creating a top-level vdev, check for allocation
 		 * classes input.
 		 */
 		if (nvlist_lookup_string(nv, ZPOOL_CONFIG_ALLOCATION_BIAS,
 		    &bias) == 0) {
 			alloc_bias = vdev_derive_alloc_bias(bias);
@ -710,13 +719,32 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
 				return (SET_ERROR(ENOTSUP));
 			}
 		}
 		/* spa_vdev_add() expects feature to be enabled */
 		if (ops == &vdev_draid_ops &&
 		    spa->spa_load_state != SPA_LOAD_CREATE &&
 		    !spa_feature_is_enabled(spa, SPA_FEATURE_DRAID)) {
 			return (SET_ERROR(ENOTSUP));
 		}
 	}
 	/*
 	 * Initialize the vdev specific data.  This is done before calling
 	 * vdev_alloc_common() since it may fail and this simplifies the
 	 * error reporting and cleanup code paths.
 	 */
 	void *tsd = NULL;
 	if (ops->vdev_op_init != NULL) {
 		rc = ops->vdev_op_init(spa, nv, &tsd);
 		if (rc != 0) {
 			return (rc);
 		}
 	}
 	vd = vdev_alloc_common(spa, id, guid, ops);
-	vic = &vd->vdev_indirect_config;
+	vd->vdev_tsd = tsd;
 	vd->vdev_islog = islog;
-	vd->vdev_nparity = nparity;
+
 	if (top_level && alloc_bias != VDEV_BIAS_NONE)
 		vd->vdev_alloc_bias = alloc_bias;
@ -756,6 +784,8 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
 	    &vd->vdev_wholedisk) != 0)
 		vd->vdev_wholedisk = -1ULL;
 	vic = &vd->vdev_indirect_config;
 	ASSERT0(vic->vic_mapping_object);
 	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_INDIRECT_OBJECT,
 	    &vic->vic_mapping_object);
@ -937,6 +967,9 @@ vdev_free(vdev_t *vd)
 	ASSERT(vd->vdev_child == NULL);
 	ASSERT(vd->vdev_guid_sum == vd->vdev_guid);
 	if (vd->vdev_ops->vdev_op_fini != NULL)
 		vd->vdev_ops->vdev_op_fini(vd);
 	/*
 	 * Discard allocation state.
 	 */
@ -1028,9 +1061,7 @@ vdev_free(vdev_t *vd)
 	cv_destroy(&vd->vdev_trim_io_cv);
 	mutex_destroy(&vd->vdev_rebuild_lock);
 	mutex_destroy(&vd->vdev_rebuild_io_lock);
 	cv_destroy(&vd->vdev_rebuild_cv);
 	cv_destroy(&vd->vdev_rebuild_io_cv);
 	zfs_ratelimit_fini(&vd->vdev_delay_rl);
 	zfs_ratelimit_fini(&vd->vdev_checksum_rl);
@ -1161,7 +1192,8 @@ vdev_top_update(vdev_t *tvd, vdev_t *vd)
 }
 /*
- * Add a mirror/replacing vdev above an existing vdev.
+ * Add a mirror/replacing vdev above an existing vdev.  There is no need to
 * call .vdev_op_init() since mirror/replacing vdevs do not have private state.
 */
 vdev_t *
 vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops)
@ -1296,6 +1328,10 @@ vdev_metaslab_group_create(vdev_t *vd)
 				spa->spa_max_ashift = vd->vdev_ashift;
 			if (vd->vdev_ashift < spa->spa_min_ashift)
 				spa->spa_min_ashift = vd->vdev_ashift;
 			uint64_t min_alloc = vdev_get_min_alloc(vd);
 			if (min_alloc < spa->spa_min_alloc)
 				spa->spa_min_alloc = min_alloc;
 		}
 	}
 }
@ -1622,39 +1658,67 @@ vdev_uses_zvols(vdev_t *vd)
 	return (B_FALSE);
 }
 /*
 * Returns B_TRUE if the passed child should be opened.
 */
 static boolean_t
 vdev_default_open_children_func(vdev_t *vd)
 {
 	return (B_TRUE);
 }
 /*
 * Open the requested child vdevs.  If any of the leaf vdevs are using
 * a ZFS volume then do the opens in a single thread.  This avoids a
 * deadlock when the current thread is holding the spa_namespace_lock.
 */
 static void
 vdev_open_children_impl(vdev_t *vd, vdev_open_children_func_t *open_func)
 {
 	int children = vd->vdev_children;
 	taskq_t *tq = taskq_create("vdev_open", children, minclsyspri,
 	    children, children, TASKQ_PREPOPULATE);
 	vd->vdev_nonrot = B_TRUE;
 	for (int c = 0; c < children; c++) {
 		vdev_t *cvd = vd->vdev_child[c];
 		if (open_func(cvd) == B_FALSE)
 			continue;
 		if (tq == NULL || vdev_uses_zvols(vd)) {
 			cvd->vdev_open_error = vdev_open(cvd);
 		} else {
 			VERIFY(taskq_dispatch(tq, vdev_open_child,
 			    cvd, TQ_SLEEP) != TASKQID_INVALID);
 		}
 		vd->vdev_nonrot &= cvd->vdev_nonrot;
 	}
 	if (tq != NULL) {
 		taskq_wait(tq);
 		taskq_destroy(tq);
 	}
 }
 /*
 * Open all child vdevs.
 */
 void
 vdev_open_children(vdev_t *vd)
 {
-	taskq_t *tq;
+	vdev_open_children_impl(vd, vdev_default_open_children_func);
 	int children = vd->vdev_children;
 	/*
 	 * in order to handle pools on top of zvols, do the opens
 	 * in a single thread so that the same thread holds the
 	 * spa_namespace_lock
 	 */
 	if (vdev_uses_zvols(vd)) {
 retry_sync:
 		for (int c = 0; c < children; c++)
 			vd->vdev_child[c]->vdev_open_error =
 			    vdev_open(vd->vdev_child[c]);
 	} else {
 		tq = taskq_create("vdev_open", children, minclsyspri,
 		    children, children, TASKQ_PREPOPULATE);
 		if (tq == NULL)
 			goto retry_sync;
 		for (int c = 0; c < children; c++)
 			VERIFY(taskq_dispatch(tq, vdev_open_child,
 			    vd->vdev_child[c], TQ_SLEEP) != TASKQID_INVALID);
 		taskq_destroy(tq);
 }
-	vd->vdev_nonrot = B_TRUE;
+/*
-
+ * Conditionally open a subset of child vdevs.
-	for (int c = 0; c < children; c++)
+ */
-		vd->vdev_nonrot &= vd->vdev_child[c]->vdev_nonrot;
+void
 vdev_open_children_subset(vdev_t *vd, vdev_open_children_func_t *open_func)
 {
 	vdev_open_children_impl(vd, open_func);
 }
 /*
@ -1952,6 +2016,16 @@ vdev_open(vdev_t *vd)
 		return (error);
 	}
 	/*
 	 * Track the the minimum allocation size.
 	 */
 	if (vd->vdev_top == vd && vd->vdev_ashift != 0 &&
 	    vd->vdev_islog == 0 && vd->vdev_aux == NULL) {
 		uint64_t min_alloc = vdev_get_min_alloc(vd);
 		if (min_alloc < spa->spa_min_alloc)
 			spa->spa_min_alloc = min_alloc;
 	}
 	/*
 	 * If this is a leaf vdev, assess whether a resilver is needed.
 	 * But don't do this if we are doing a reopen for a scrub, since
@ -2278,7 +2352,9 @@ vdev_close(vdev_t *vd)
 	vdev_t *pvd = vd->vdev_parent;
 	spa_t *spa __maybe_unused = vd->vdev_spa;
-	ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
+	ASSERT(vd != NULL);
 	ASSERT(vd->vdev_open_thread == curthread ||
 	    spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
 	/*
 	 * If our parent is reopening, then we are as well, unless we are
@ -2606,10 +2682,26 @@ vdev_dtl_empty(vdev_t *vd, vdev_dtl_type_t t)
 }
 /*
- * Returns B_TRUE if vdev determines offset needs to be resilvered.
+ * Check if the txg falls within the range which must be
 * resilvered.  DVAs outside this range can always be skipped.
 */
 boolean_t
-vdev_dtl_need_resilver(vdev_t *vd, uint64_t offset, size_t psize)
+vdev_default_need_resilver(vdev_t *vd, const dva_t *dva, size_t psize,
    uint64_t phys_birth)
 {
 	/* Set by sequential resilver. */
 	if (phys_birth == TXG_UNKNOWN)
 		return (B_TRUE);
 	return (vdev_dtl_contains(vd, DTL_PARTIAL, phys_birth, 1));
 }
 /*
 * Returns B_TRUE if the vdev determines the DVA needs to be resilvered.
 */
 boolean_t
 vdev_dtl_need_resilver(vdev_t *vd, const dva_t *dva, size_t psize,
    uint64_t phys_birth)
 {
 	ASSERT(vd != vd->vdev_spa->spa_root_vdev);
@ -2617,7 +2709,8 @@ vdev_dtl_need_resilver(vdev_t *vd, uint64_t offset, size_t psize)
 	    vd->vdev_ops->vdev_op_leaf)
 		return (B_TRUE);
-	return (vd->vdev_ops->vdev_op_need_resilver(vd, offset, psize));
+	return (vd->vdev_ops->vdev_op_need_resilver(vd, dva, psize,
 	    phys_birth));
 }
 /*
@ -2862,8 +2955,8 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
 			continue;			/* leaf vdevs only */
 		if (t == DTL_PARTIAL)
 			minref = 1;			/* i.e. non-zero */
-		else if (vd->vdev_nparity != 0)
+		else if (vdev_get_nparity(vd) != 0)
-			minref = vd->vdev_nparity + 1;	/* RAID-Z */
+			minref = vdev_get_nparity(vd) + 1; /* RAID-Z, dRAID */
 		else
 			minref = vd->vdev_children;	/* any kind of mirror */
 		space_reftree_create(&reftree);
@ -3727,6 +3820,9 @@ top:
 	if (!vd->vdev_ops->vdev_op_leaf)
 		return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENOTSUP)));
 	if (vd->vdev_ops == &vdev_draid_spare_ops)
 		return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
 	tvd = vd->vdev_top;
 	mg = tvd->vdev_mg;
 	generation = spa->spa_config_generation + 1;
@ -3971,6 +4067,13 @@ vdev_accessible(vdev_t *vd, zio_t *zio)
 static void
 vdev_get_child_stat(vdev_t *cvd, vdev_stat_t *vs, vdev_stat_t *cvs)
 {
 	/*
 	 * Exclude the dRAID spare when aggregating to avoid double counting
 	 * the ops and bytes.  These IOs are counted by the physical leaves.
 	 */
 	if (cvd->vdev_ops == &vdev_draid_spare_ops)
 		return;
 	for (int t = 0; t < VS_ZIO_TYPES; t++) {
 		vs->vs_ops[t] += cvs->vs_ops[t];
 		vs->vs_bytes[t] += cvs->vs_bytes[t];
@ -4063,7 +4166,6 @@ vdev_get_stats_ex_impl(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
 				vdev_get_child_stat(cvd, vs, cvs);
 			if (vsx)
 				vdev_get_child_stat_ex(cvd, vsx, cvsx);
 		}
 	} else {
 		/*
@ -4248,7 +4350,9 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
 			/*
 			 * Repair is the result of a rebuild issued by the
-			 * rebuild thread (vdev_rebuild_thread).
+			 * rebuild thread (vdev_rebuild_thread).  To avoid
 			 * double counting repaired bytes the virtual dRAID
 			 * spare vdev is excluded from the processed bytes.
 			 */
 			if (zio->io_priority == ZIO_PRIORITY_REBUILD) {
 				vdev_t *tvd = vd->vdev_top;
@ -4256,8 +4360,10 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
 				vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
 				uint64_t *rebuilt = &vrp->vrp_bytes_rebuilt;
-				if (vd->vdev_ops->vdev_op_leaf)
+				if (vd->vdev_ops->vdev_op_leaf &&
 				    vd->vdev_ops != &vdev_draid_spare_ops) {
 					atomic_add_64(rebuilt, psize);
 				}
 				vs->vs_rebuild_processed += psize;
 			}
@ -4981,31 +5087,42 @@ vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx)
 	    vdev_resilver_needed(vd, NULL, NULL));
 }
 boolean_t
 vdev_xlate_is_empty(range_seg64_t *rs)
 {
 	return (rs->rs_start == rs->rs_end);
 }
 /*
- * Translate a logical range to the physical range for the specified vdev_t.
+ * Translate a logical range to the first contiguous physical range for the
- * This function is initially called with a leaf vdev and will walk each
+ * specified vdev_t.  This function is initially called with a leaf vdev and
- * parent vdev until it reaches a top-level vdev. Once the top-level is
+ * will walk each parent vdev until it reaches a top-level vdev. Once the
- * reached the physical range is initialized and the recursive function
+ * top-level is reached the physical range is initialized and the recursive
- * begins to unwind. As it unwinds it calls the parent's vdev specific
+ * function begins to unwind. As it unwinds it calls the parent's vdev
- * translation function to do the real conversion.
+ * specific translation function to do the real conversion.
 */
 void
 vdev_xlate(vdev_t *vd, const range_seg64_t *logical_rs,
-    range_seg64_t *physical_rs)
+    range_seg64_t *physical_rs, range_seg64_t *remain_rs)
 {
 	/*
 	 * Walk up the vdev tree
 	 */
 	if (vd != vd->vdev_top) {
-		vdev_xlate(vd->vdev_parent, logical_rs, physical_rs);
+		vdev_xlate(vd->vdev_parent, logical_rs, physical_rs,
 		    remain_rs);
 	} else {
 		/*
-		 * We've reached the top-level vdev, initialize the
+		 * We've reached the top-level vdev, initialize the physical
-		 * physical range to the logical range and start to
+		 * range to the logical range and set an empty remaining
-		 * unwind.
+		 * range then start to unwind.
 		 */
 		physical_rs->rs_start = logical_rs->rs_start;
 		physical_rs->rs_end = logical_rs->rs_end;
 		remain_rs->rs_start = logical_rs->rs_start;
 		remain_rs->rs_end = logical_rs->rs_start;
 		return;
 	}
@ -5015,16 +5132,40 @@ vdev_xlate(vdev_t *vd, const range_seg64_t *logical_rs,
 	/*
 	 * As this recursive function unwinds, translate the logical
-	 * range into its physical components by calling the
+	 * range into its physical and any remaining components by calling
-	 * vdev specific translate function.
+	 * the vdev specific translate function.
 	 */
 	range_seg64_t intermediate = { 0 };
-	pvd->vdev_ops->vdev_op_xlate(vd, physical_rs, &intermediate);
+	pvd->vdev_ops->vdev_op_xlate(vd, physical_rs, &intermediate, remain_rs);
 	physical_rs->rs_start = intermediate.rs_start;
 	physical_rs->rs_end = intermediate.rs_end;
 }
 void
 vdev_xlate_walk(vdev_t *vd, const range_seg64_t *logical_rs,
    vdev_xlate_func_t *func, void *arg)
 {
 	range_seg64_t iter_rs = *logical_rs;
 	range_seg64_t physical_rs;
 	range_seg64_t remain_rs;
 	while (!vdev_xlate_is_empty(&iter_rs)) {
 		vdev_xlate(vd, &iter_rs, &physical_rs, &remain_rs);
 		/*
 		 * With raidz and dRAID, it's possible that the logical range
 		 * does not live on this leaf vdev. Only when there is a non-
 		 * zero physical size call the provided function.
 		 */
 		if (!vdev_xlate_is_empty(&physical_rs))
 			func(arg, &physical_rs);
 		iter_rs = remain_rs;
 	}
 }
 /*
 * Look at the vdev tree and determine whether any devices are currently being
 * replaced.
--- a/module/zfs/vdev_draid.c
+++ b/module/zfs/vdev_draid.c
--- a/module/zfs/vdev_draid_rand.c
+++ b/module/zfs/vdev_draid_rand.c
@ -0,0 +1,40 @@
 /*
 * Xorshift Pseudo Random Number Generator based on work by David Blackman
 * and Sebastiano Vigna (vigna@acm.org).
 *
 *   "Further scramblings of Marsaglia's xorshift generators"
 *   http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf
 *   http://prng.di.unimi.it/xoroshiro128plusplus.c
 *
 * To the extent possible under law, the author has dedicated all copyright
 * and related and neighboring rights to this software to the public domain
 * worldwide. This software is distributed without any warranty.
 *
 * See <http://creativecommons.org/publicdomain/zero/1.0/>.
 *
 * This is xoroshiro128++ 1.0, one of our all-purpose, rock-solid,
 * small-state generators. It is extremely (sub-ns) fast and it passes all
 * tests we are aware of, but its state space is large enough only for
 * mild parallelism.
 */
 #include <sys/vdev_draid.h>
 static inline uint64_t rotl(const uint64_t x, int k)
 {
 	return (x << k) | (x >> (64 - k));
 }
 uint64_t
 vdev_draid_rand(uint64_t *s)
 {
 	const uint64_t s0 = s[0];
 	uint64_t s1 = s[1];
 	const uint64_t result = rotl(s0 + s1, 17) + s0;
 	s1 ^= s0;
 	s[0] = rotl(s0, 49) ^ s1 ^ (s1 << 21); // a, b
 	s[1] = rotl(s1, 28); // c
 	return (result);
 }
--- a/module/zfs/vdev_indirect.c
+++ b/module/zfs/vdev_indirect.c
@ -1844,9 +1844,13 @@ vdev_indirect_io_done(zio_t *zio)
 }
 vdev_ops_t vdev_indirect_ops = {
 	.vdev_op_init = NULL,
 	.vdev_op_fini = NULL,
 	.vdev_op_open = vdev_indirect_open,
 	.vdev_op_close = vdev_indirect_close,
 	.vdev_op_asize = vdev_default_asize,
 	.vdev_op_min_asize = vdev_default_min_asize,
 	.vdev_op_min_alloc = NULL,
 	.vdev_op_io_start = vdev_indirect_io_start,
 	.vdev_op_io_done = vdev_indirect_io_done,
 	.vdev_op_state_change = NULL,
@ -1855,6 +1859,11 @@ vdev_ops_t vdev_indirect_ops = {
 	.vdev_op_rele = NULL,
 	.vdev_op_remap = vdev_indirect_remap,
 	.vdev_op_xlate = NULL,
 	.vdev_op_rebuild_asize = NULL,
 	.vdev_op_metaslab_init = NULL,
 	.vdev_op_config_generate = NULL,
 	.vdev_op_nparity = NULL,
 	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_INDIRECT,	/* name of this vdev type */
 	.vdev_op_leaf = B_FALSE			/* leaf vdev */
 };
--- a/module/zfs/vdev_initialize.c
+++ b/module/zfs/vdev_initialize.c
@ -121,6 +121,8 @@ vdev_initialize_change_state(vdev_t *vd, vdev_initializing_state_t new_state)
 	if (vd->vdev_initialize_state != VDEV_INITIALIZE_SUSPENDED) {
 		vd->vdev_initialize_action_time = gethrestime_sec();
 	}
 	vdev_initializing_state_t old_state = vd->vdev_initialize_state;
 	vd->vdev_initialize_state = new_state;
 	dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
@ -138,6 +140,8 @@ vdev_initialize_change_state(vdev_t *vd, vdev_initializing_state_t new_state)
 		    "vdev=%s suspended", vd->vdev_path);
 		break;
 	case VDEV_INITIALIZE_CANCELED:
 		if (old_state == VDEV_INITIALIZE_ACTIVE ||
 		    old_state == VDEV_INITIALIZE_SUSPENDED)
 			spa_history_log_internal(spa, "initialize", tx,
 			    "vdev=%s canceled", vd->vdev_path);
 		break;
@ -317,6 +321,32 @@ vdev_initialize_ranges(vdev_t *vd, abd_t *data)
 	return (0);
 }
 static void
 vdev_initialize_xlate_last_rs_end(void *arg, range_seg64_t *physical_rs)
 {
 	uint64_t *last_rs_end = (uint64_t *)arg;
 	if (physical_rs->rs_end > *last_rs_end)
 		*last_rs_end = physical_rs->rs_end;
 }
 static void
 vdev_initialize_xlate_progress(void *arg, range_seg64_t *physical_rs)
 {
 	vdev_t *vd = (vdev_t *)arg;
 	uint64_t size = physical_rs->rs_end - physical_rs->rs_start;
 	vd->vdev_initialize_bytes_est += size;
 	if (vd->vdev_initialize_last_offset > physical_rs->rs_end) {
 		vd->vdev_initialize_bytes_done += size;
 	} else if (vd->vdev_initialize_last_offset > physical_rs->rs_start &&
 	    vd->vdev_initialize_last_offset < physical_rs->rs_end) {
 		vd->vdev_initialize_bytes_done +=
 		    vd->vdev_initialize_last_offset - physical_rs->rs_start;
 	}
 }
 static void
 vdev_initialize_calculate_progress(vdev_t *vd)
 {
@ -331,28 +361,35 @@ vdev_initialize_calculate_progress(vdev_t *vd)
 		metaslab_t *msp = vd->vdev_top->vdev_ms[i];
 		mutex_enter(&msp->ms_lock);
-		uint64_t ms_free = msp->ms_size -
+		uint64_t ms_free = (msp->ms_size -
-		    metaslab_allocated_space(msp);
+		    metaslab_allocated_space(msp)) /
-
+		    vdev_get_ndisks(vd->vdev_top);
 		if (vd->vdev_top->vdev_ops == &vdev_raidz_ops)
 			ms_free /= vd->vdev_top->vdev_children;
 		/*
 		 * Convert the metaslab range to a physical range
 		 * on our vdev. We use this to determine if we are
 		 * in the middle of this metaslab range.
 		 */
-		range_seg64_t logical_rs, physical_rs;
+		range_seg64_t logical_rs, physical_rs, remain_rs;
 		logical_rs.rs_start = msp->ms_start;
 		logical_rs.rs_end = msp->ms_start + msp->ms_size;
 		vdev_xlate(vd, &logical_rs, &physical_rs);
 		/* Metaslab space after this offset has not been initialized */
 		vdev_xlate(vd, &logical_rs, &physical_rs, &remain_rs);
 		if (vd->vdev_initialize_last_offset <= physical_rs.rs_start) {
 			vd->vdev_initialize_bytes_est += ms_free;
 			mutex_exit(&msp->ms_lock);
 			continue;
-		} else if (vd->vdev_initialize_last_offset >
+		}
-		    physical_rs.rs_end) {
+
 		/* Metaslab space before this offset has been initialized */
 		uint64_t last_rs_end = physical_rs.rs_end;
 		if (!vdev_xlate_is_empty(&remain_rs)) {
 			vdev_xlate_walk(vd, &remain_rs,
 			    vdev_initialize_xlate_last_rs_end, &last_rs_end);
 		}
 		if (vd->vdev_initialize_last_offset > last_rs_end) {
 			vd->vdev_initialize_bytes_done += ms_free;
 			vd->vdev_initialize_bytes_est += ms_free;
 			mutex_exit(&msp->ms_lock);
@ -374,22 +411,9 @@ vdev_initialize_calculate_progress(vdev_t *vd)
 		    &where)) {
 			logical_rs.rs_start = rs_get_start(rs, rt);
 			logical_rs.rs_end = rs_get_end(rs, rt);
 			vdev_xlate(vd, &logical_rs, &physical_rs);
-			uint64_t size = physical_rs.rs_end -
+			vdev_xlate_walk(vd, &logical_rs,
-			    physical_rs.rs_start;
+			    vdev_initialize_xlate_progress, vd);
 			vd->vdev_initialize_bytes_est += size;
 			if (vd->vdev_initialize_last_offset >
 			    physical_rs.rs_end) {
 				vd->vdev_initialize_bytes_done += size;
 			} else if (vd->vdev_initialize_last_offset >
 			    physical_rs.rs_start &&
 			    vd->vdev_initialize_last_offset <
 			    physical_rs.rs_end) {
 				vd->vdev_initialize_bytes_done +=
 				    vd->vdev_initialize_last_offset -
 				    physical_rs.rs_start;
 			}
 		}
 		mutex_exit(&msp->ms_lock);
 	}
@ -419,6 +443,34 @@ vdev_initialize_load(vdev_t *vd)
 	return (err);
 }
 static void
 vdev_initialize_xlate_range_add(void *arg, range_seg64_t *physical_rs)
 {
 	vdev_t *vd = arg;
 	/* Only add segments that we have not visited yet */
 	if (physical_rs->rs_end <= vd->vdev_initialize_last_offset)
 		return;
 	/* Pick up where we left off mid-range. */
 	if (vd->vdev_initialize_last_offset > physical_rs->rs_start) {
 		zfs_dbgmsg("range write: vd %s changed (%llu, %llu) to "
 		    "(%llu, %llu)", vd->vdev_path,
 		    (u_longlong_t)physical_rs->rs_start,
 		    (u_longlong_t)physical_rs->rs_end,
 		    (u_longlong_t)vd->vdev_initialize_last_offset,
 		    (u_longlong_t)physical_rs->rs_end);
 		ASSERT3U(physical_rs->rs_end, >,
 		    vd->vdev_initialize_last_offset);
 		physical_rs->rs_start = vd->vdev_initialize_last_offset;
 	}
 	ASSERT3U(physical_rs->rs_end, >, physical_rs->rs_start);
 	range_tree_add(vd->vdev_initialize_tree, physical_rs->rs_start,
 	    physical_rs->rs_end - physical_rs->rs_start);
 }
 /*
 * Convert the logical range into a physical range and add it to our
 * avl tree.
@ -427,47 +479,12 @@ static void
 vdev_initialize_range_add(void *arg, uint64_t start, uint64_t size)
 {
 	vdev_t *vd = arg;
-	range_seg64_t logical_rs, physical_rs;
+	range_seg64_t logical_rs;
 	logical_rs.rs_start = start;
 	logical_rs.rs_end = start + size;
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
-	vdev_xlate(vd, &logical_rs, &physical_rs);
+	vdev_xlate_walk(vd, &logical_rs, vdev_initialize_xlate_range_add, arg);
 	IMPLY(vd->vdev_top == vd,
 	    logical_rs.rs_start == physical_rs.rs_start);
 	IMPLY(vd->vdev_top == vd,
 	    logical_rs.rs_end == physical_rs.rs_end);
 	/* Only add segments that we have not visited yet */
 	if (physical_rs.rs_end <= vd->vdev_initialize_last_offset)
 		return;
 	/* Pick up where we left off mid-range. */
 	if (vd->vdev_initialize_last_offset > physical_rs.rs_start) {
 		zfs_dbgmsg("range write: vd %s changed (%llu, %llu) to "
 		    "(%llu, %llu)", vd->vdev_path,
 		    (u_longlong_t)physical_rs.rs_start,
 		    (u_longlong_t)physical_rs.rs_end,
 		    (u_longlong_t)vd->vdev_initialize_last_offset,
 		    (u_longlong_t)physical_rs.rs_end);
 		ASSERT3U(physical_rs.rs_end, >,
 		    vd->vdev_initialize_last_offset);
 		physical_rs.rs_start = vd->vdev_initialize_last_offset;
 	}
 	ASSERT3U(physical_rs.rs_end, >=, physical_rs.rs_start);
 	/*
 	 * With raidz, it's possible that the logical range does not live on
 	 * this leaf vdev. We only add the physical range to this vdev's if it
 	 * has a length greater than 0.
 	 */
 	if (physical_rs.rs_end > physical_rs.rs_start) {
 		range_tree_add(vd->vdev_initialize_tree, physical_rs.rs_start,
 		    physical_rs.rs_end - physical_rs.rs_start);
 	} else {
 		ASSERT3U(physical_rs.rs_end, ==, physical_rs.rs_start);
 	}
 }
 static void
--- a/module/zfs/vdev_label.c
+++ b/module/zfs/vdev_label.c
@ -142,6 +142,7 @@
 #include <sys/zap.h>
 #include <sys/vdev.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_draid.h>
 #include <sys/uberblock_impl.h>
 #include <sys/metaslab.h>
 #include <sys/metaslab_impl.h>
@ -453,31 +454,13 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
 	if (vd->vdev_fru != NULL)
 		fnvlist_add_string(nv, ZPOOL_CONFIG_FRU, vd->vdev_fru);
-	if (vd->vdev_nparity != 0) {
+	if (vd->vdev_ops->vdev_op_config_generate != NULL)
-		ASSERT(strcmp(vd->vdev_ops->vdev_op_type,
+		vd->vdev_ops->vdev_op_config_generate(vd, nv);
 		    VDEV_TYPE_RAIDZ) == 0);
-		/*
+	if (vd->vdev_wholedisk != -1ULL) {
 		 * Make sure someone hasn't managed to sneak a fancy new vdev
 		 * into a crufty old storage pool.
 		 */
 		ASSERT(vd->vdev_nparity == 1 ||
 		    (vd->vdev_nparity <= 2 &&
 		    spa_version(spa) >= SPA_VERSION_RAIDZ2) ||
 		    (vd->vdev_nparity <= 3 &&
 		    spa_version(spa) >= SPA_VERSION_RAIDZ3));
 		/*
 		 * Note that we'll add the nparity tag even on storage pools
 		 * that only support a single parity device -- older software
 		 * will just ignore it.
 		 */
 		fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, vd->vdev_nparity);
 	}
 	if (vd->vdev_wholedisk != -1ULL)
 		fnvlist_add_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
 		    vd->vdev_wholedisk);
 	}
 	if (vd->vdev_not_present && !(flags & VDEV_CONFIG_MISSING))
 		fnvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1);
@ -785,6 +768,14 @@ vdev_label_read_config(vdev_t *vd, uint64_t txg)
 	if (!vdev_readable(vd))
 		return (NULL);
 	/*
 	 * The label for a dRAID distributed spare is not stored on disk.
 	 * Instead it is generated when needed which allows us to bypass
 	 * the pipeline when reading the config from the label.
 	 */
 	if (vd->vdev_ops == &vdev_draid_spare_ops)
 		return (vdev_draid_read_config_spare(vd));
 	vp_abd = abd_alloc_linear(sizeof (vdev_phys_t), B_TRUE);
 	vp = abd_to_buf(vp_abd);
@ -1497,7 +1488,8 @@ vdev_uberblock_load_impl(zio_t *zio, vdev_t *vd, int flags,
 	for (int c = 0; c < vd->vdev_children; c++)
 		vdev_uberblock_load_impl(zio, vd->vdev_child[c], flags, cbp);
-	if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) {
+	if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd) &&
 	    vd->vdev_ops != &vdev_draid_spare_ops) {
 		for (int l = 0; l < VDEV_LABELS; l++) {
 			for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
 				vdev_label_read(zio, vd, l,
@ -1586,6 +1578,13 @@ vdev_copy_uberblocks(vdev_t *vd)
 	    SCL_STATE);
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 	/*
 	 * No uberblocks are stored on distributed spares, they may be
 	 * safely skipped when expanding a leaf vdev.
 	 */
 	if (vd->vdev_ops == &vdev_draid_spare_ops)
 		return;
 	spa_config_enter(vd->vdev_spa, locks, FTAG, RW_READER);
 	ub_abd = abd_alloc_linear(VDEV_UBERBLOCK_SIZE(vd), B_TRUE);
@ -1647,6 +1646,15 @@ vdev_uberblock_sync(zio_t *zio, uint64_t *good_writes,
 	if (!vdev_writeable(vd))
 		return;
 	/*
 	 * There's no need to write uberblocks to a distributed spare, they
 	 * are already stored on all the leaves of the parent dRAID.  For
 	 * this same reason vdev_uberblock_load_impl() skips distributed
 	 * spares when reading uberblocks.
 	 */
 	if (vd->vdev_ops == &vdev_draid_spare_ops)
 		return;
 	/* If the vdev was expanded, need to copy uberblock rings. */
 	if (vd->vdev_state == VDEV_STATE_HEALTHY &&
 	    vd->vdev_copy_uberblocks == B_TRUE) {
@ -1763,6 +1771,14 @@ vdev_label_sync(zio_t *zio, uint64_t *good_writes,
 	if (!vdev_writeable(vd))
 		return;
 	/*
 	 * The top-level config never needs to be written to a distributed
 	 * spare.  When read vdev_dspare_label_read_config() will generate
 	 * the config for the vdev_label_read_config().
 	 */
 	if (vd->vdev_ops == &vdev_draid_spare_ops)
 		return;
 	/*
 	 * Generate a label describing the top-level config to which we belong.
 	 */
--- a/module/zfs/vdev_mirror.c
+++ b/module/zfs/vdev_mirror.c
@ -33,6 +33,7 @@
 #include <sys/dsl_pool.h>
 #include <sys/dsl_scan.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_draid.h>
 #include <sys/zio.h>
 #include <sys/abd.h>
 #include <sys/fs/zfs.h>
@ -99,7 +100,6 @@ vdev_mirror_stat_fini(void)
 /*
 * Virtual device vector for mirroring.
 */
 typedef struct mirror_child {
 	vdev_t		*mc_vd;
 	uint64_t	mc_offset;
@ -108,6 +108,7 @@ typedef struct mirror_child {
 	uint8_t		mc_tried;
 	uint8_t		mc_skipped;
 	uint8_t		mc_speculative;
 	uint8_t		mc_rebuilding;
 } mirror_child_t;
 typedef struct mirror_map {
@ -115,6 +116,7 @@ typedef struct mirror_map {
 	int		mm_preferred_cnt;
 	int		mm_children;
 	boolean_t	mm_resilvering;
 	boolean_t	mm_rebuilding;
 	boolean_t	mm_root;
 	mirror_child_t	mm_child[];
 } mirror_map_t;
@ -239,6 +241,21 @@ vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset)
 	return (load + zfs_vdev_mirror_rotating_seek_inc);
 }
 static boolean_t
 vdev_mirror_rebuilding(vdev_t *vd)
 {
 	if (vd->vdev_ops->vdev_op_leaf && vd->vdev_rebuild_txg)
 		return (B_TRUE);
 	for (int i = 0; i < vd->vdev_children; i++) {
 		if (vdev_mirror_rebuilding(vd->vdev_child[i])) {
 			return (B_TRUE);
 		}
 	}
 	return (B_FALSE);
 }
 /*
 * Avoid inlining the function to keep vdev_mirror_io_start(), which
 * is this functions only caller, as small as possible on the stack.
@ -356,6 +373,9 @@ vdev_mirror_map_init(zio_t *zio)
 			mc = &mm->mm_child[c];
 			mc->mc_vd = vd->vdev_child[c];
 			mc->mc_offset = zio->io_offset;
 			if (vdev_mirror_rebuilding(mc->mc_vd))
 				mm->mm_rebuilding = mc->mc_rebuilding = B_TRUE;
 		}
 	}
@ -493,12 +513,37 @@ vdev_mirror_preferred_child_randomize(zio_t *zio)
 	return (mm->mm_preferred[p]);
 }
 static boolean_t
 vdev_mirror_child_readable(mirror_child_t *mc)
 {
 	vdev_t *vd = mc->mc_vd;
 	if (vd->vdev_top != NULL && vd->vdev_top->vdev_ops == &vdev_draid_ops)
 		return (vdev_draid_readable(vd, mc->mc_offset));
 	else
 		return (vdev_readable(vd));
 }
 static boolean_t
 vdev_mirror_child_missing(mirror_child_t *mc, uint64_t txg, uint64_t size)
 {
 	vdev_t *vd = mc->mc_vd;
 	if (vd->vdev_top != NULL && vd->vdev_top->vdev_ops == &vdev_draid_ops)
 		return (vdev_draid_missing(vd, mc->mc_offset, txg, size));
 	else
 		return (vdev_dtl_contains(vd, DTL_MISSING, txg, size));
 }
 /*
 * Try to find a vdev whose DTL doesn't contain the block we want to read
- * preferring vdevs based on determined load.
+ * preferring vdevs based on determined load. If we can't, try the read on
 * any vdev we haven't already tried.
 *
- * Try to find a child whose DTL doesn't contain the block we want to read.
+ * Distributed spares are an exception to the above load rule. They are
- * If we can't, try the read on any vdev we haven't already tried.
+ * always preferred in order to detect gaps in the distributed spare which
 * are created when another disk in the dRAID fails. In order to restore
 * redundancy those gaps must be read to trigger the required repair IO.
 */
 static int
 vdev_mirror_child_select(zio_t *zio)
@ -518,20 +563,27 @@ vdev_mirror_child_select(zio_t *zio)
 		if (mc->mc_tried || mc->mc_skipped)
 			continue;
-		if (mc->mc_vd == NULL || !vdev_readable(mc->mc_vd)) {
+		if (mc->mc_vd == NULL ||
 		    !vdev_mirror_child_readable(mc)) {
 			mc->mc_error = SET_ERROR(ENXIO);
 			mc->mc_tried = 1;	/* don't even try */
 			mc->mc_skipped = 1;
 			continue;
 		}
-		if (vdev_dtl_contains(mc->mc_vd, DTL_MISSING, txg, 1)) {
+		if (vdev_mirror_child_missing(mc, txg, 1)) {
 			mc->mc_error = SET_ERROR(ESTALE);
 			mc->mc_skipped = 1;
 			mc->mc_speculative = 1;
 			continue;
 		}
 		if (mc->mc_vd->vdev_ops == &vdev_draid_spare_ops) {
 			mm->mm_preferred[0] = c;
 			mm->mm_preferred_cnt = 1;
 			break;
 		}
 		mc->mc_load = vdev_mirror_load(mm, mc->mc_vd, mc->mc_offset);
 		if (mc->mc_load > lowest_load)
 			continue;
@ -625,11 +677,25 @@ vdev_mirror_io_start(zio_t *zio)
 	while (children--) {
 		mc = &mm->mm_child[c];
 		c++;
 		/*
 		 * When sequentially resilvering only issue write repair
 		 * IOs to the vdev which is being rebuilt since performance
 		 * is limited by the slowest child.  This is an issue for
 		 * faster replacement devices such as distributed spares.
 		 */
 		if ((zio->io_priority == ZIO_PRIORITY_REBUILD) &&
 		    (zio->io_flags & ZIO_FLAG_IO_REPAIR) &&
 		    !(zio->io_flags & ZIO_FLAG_SCRUB) &&
 		    mm->mm_rebuilding && !mc->mc_rebuilding) {
 			continue;
 		}
 		zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
 		    mc->mc_vd, mc->mc_offset, zio->io_abd, zio->io_size,
 		    zio->io_type, zio->io_priority, 0,
 		    vdev_mirror_child_done, mc));
 		c++;
 	}
 	zio_execute(zio);
@ -744,6 +810,8 @@ vdev_mirror_io_done(zio_t *zio)
 			mc = &mm->mm_child[c];
 			if (mc->mc_error == 0) {
 				vdev_ops_t *ops = mc->mc_vd->vdev_ops;
 				if (mc->mc_tried)
 					continue;
 				/*
@ -752,15 +820,16 @@ vdev_mirror_io_done(zio_t *zio)
 				 * 1. it's a scrub (in which case we have
 				 * tried everything that was healthy)
 				 *  - or -
-				 * 2. it's an indirect vdev (in which case
+				 * 2. it's an indirect or distributed spare
-				 * it could point to any other vdev, which
+				 * vdev (in which case it could point to any
-				 * might have a bad DTL)
+				 * other vdev, which might have a bad DTL)
 				 *  - or -
 				 * 3. the DTL indicates that this data is
 				 * missing from this vdev
 				 */
 				if (!(zio->io_flags & ZIO_FLAG_SCRUB) &&
-				    mc->mc_vd->vdev_ops != &vdev_indirect_ops &&
+				    ops != &vdev_indirect_ops &&
 				    ops != &vdev_draid_spare_ops &&
 				    !vdev_dtl_contains(mc->mc_vd, DTL_PARTIAL,
 				    zio->io_txg, 1))
 					continue;
@ -796,50 +865,90 @@ vdev_mirror_state_change(vdev_t *vd, int faulted, int degraded)
 	}
 }
 /*
 * Return the maximum asize for a rebuild zio in the provided range.
 */
 static uint64_t
 vdev_mirror_rebuild_asize(vdev_t *vd, uint64_t start, uint64_t asize,
    uint64_t max_segment)
 {
 	uint64_t psize = MIN(P2ROUNDUP(max_segment, 1 << vd->vdev_ashift),
 	    SPA_MAXBLOCKSIZE);
 	return (MIN(asize, vdev_psize_to_asize(vd, psize)));
 }
 vdev_ops_t vdev_mirror_ops = {
 	.vdev_op_init = NULL,
 	.vdev_op_fini = NULL,
 	.vdev_op_open = vdev_mirror_open,
 	.vdev_op_close = vdev_mirror_close,
 	.vdev_op_asize = vdev_default_asize,
 	.vdev_op_min_asize = vdev_default_min_asize,
 	.vdev_op_min_alloc = NULL,
 	.vdev_op_io_start = vdev_mirror_io_start,
 	.vdev_op_io_done = vdev_mirror_io_done,
 	.vdev_op_state_change = vdev_mirror_state_change,
-	.vdev_op_need_resilver = NULL,
+	.vdev_op_need_resilver = vdev_default_need_resilver,
 	.vdev_op_hold = NULL,
 	.vdev_op_rele = NULL,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = vdev_default_xlate,
 	.vdev_op_rebuild_asize = vdev_mirror_rebuild_asize,
 	.vdev_op_metaslab_init = NULL,
 	.vdev_op_config_generate = NULL,
 	.vdev_op_nparity = NULL,
 	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_MIRROR,	/* name of this vdev type */
 	.vdev_op_leaf = B_FALSE			/* not a leaf vdev */
 };
 vdev_ops_t vdev_replacing_ops = {
 	.vdev_op_init = NULL,
 	.vdev_op_fini = NULL,
 	.vdev_op_open = vdev_mirror_open,
 	.vdev_op_close = vdev_mirror_close,
 	.vdev_op_asize = vdev_default_asize,
 	.vdev_op_min_asize = vdev_default_min_asize,
 	.vdev_op_min_alloc = NULL,
 	.vdev_op_io_start = vdev_mirror_io_start,
 	.vdev_op_io_done = vdev_mirror_io_done,
 	.vdev_op_state_change = vdev_mirror_state_change,
-	.vdev_op_need_resilver = NULL,
+	.vdev_op_need_resilver = vdev_default_need_resilver,
 	.vdev_op_hold = NULL,
 	.vdev_op_rele = NULL,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = vdev_default_xlate,
 	.vdev_op_rebuild_asize = vdev_mirror_rebuild_asize,
 	.vdev_op_metaslab_init = NULL,
 	.vdev_op_config_generate = NULL,
 	.vdev_op_nparity = NULL,
 	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_REPLACING,	/* name of this vdev type */
 	.vdev_op_leaf = B_FALSE			/* not a leaf vdev */
 };
 vdev_ops_t vdev_spare_ops = {
 	.vdev_op_init = NULL,
 	.vdev_op_fini = NULL,
 	.vdev_op_open = vdev_mirror_open,
 	.vdev_op_close = vdev_mirror_close,
 	.vdev_op_asize = vdev_default_asize,
 	.vdev_op_min_asize = vdev_default_min_asize,
 	.vdev_op_min_alloc = NULL,
 	.vdev_op_io_start = vdev_mirror_io_start,
 	.vdev_op_io_done = vdev_mirror_io_done,
 	.vdev_op_state_change = vdev_mirror_state_change,
-	.vdev_op_need_resilver = NULL,
+	.vdev_op_need_resilver = vdev_default_need_resilver,
 	.vdev_op_hold = NULL,
 	.vdev_op_rele = NULL,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = vdev_default_xlate,
 	.vdev_op_rebuild_asize = vdev_mirror_rebuild_asize,
 	.vdev_op_metaslab_init = NULL,
 	.vdev_op_config_generate = NULL,
 	.vdev_op_nparity = NULL,
 	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_SPARE,	/* name of this vdev type */
 	.vdev_op_leaf = B_FALSE			/* not a leaf vdev */
 };
--- a/module/zfs/vdev_missing.c
+++ b/module/zfs/vdev_missing.c
@ -81,9 +81,13 @@ vdev_missing_io_done(zio_t *zio)
 }
 vdev_ops_t vdev_missing_ops = {
 	.vdev_op_init = NULL,
 	.vdev_op_fini = NULL,
 	.vdev_op_open = vdev_missing_open,
 	.vdev_op_close = vdev_missing_close,
 	.vdev_op_asize = vdev_default_asize,
 	.vdev_op_min_asize = vdev_default_min_asize,
 	.vdev_op_min_alloc = NULL,
 	.vdev_op_io_start = vdev_missing_io_start,
 	.vdev_op_io_done = vdev_missing_io_done,
 	.vdev_op_state_change = NULL,
@ -92,14 +96,23 @@ vdev_ops_t vdev_missing_ops = {
 	.vdev_op_rele = NULL,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = NULL,
 	.vdev_op_rebuild_asize = NULL,
 	.vdev_op_metaslab_init = NULL,
 	.vdev_op_config_generate = NULL,
 	.vdev_op_nparity = NULL,
 	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_MISSING,	/* name of this vdev type */
 	.vdev_op_leaf = B_TRUE			/* leaf vdev */
 };
 vdev_ops_t vdev_hole_ops = {
 	.vdev_op_init = NULL,
 	.vdev_op_fini = NULL,
 	.vdev_op_open = vdev_missing_open,
 	.vdev_op_close = vdev_missing_close,
 	.vdev_op_asize = vdev_default_asize,
 	.vdev_op_min_asize = vdev_default_min_asize,
 	.vdev_op_min_alloc = NULL,
 	.vdev_op_io_start = vdev_missing_io_start,
 	.vdev_op_io_done = vdev_missing_io_done,
 	.vdev_op_state_change = NULL,
@ -108,6 +121,11 @@ vdev_ops_t vdev_hole_ops = {
 	.vdev_op_rele = NULL,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = NULL,
 	.vdev_op_rebuild_asize = NULL,
 	.vdev_op_metaslab_init = NULL,
 	.vdev_op_config_generate = NULL,
 	.vdev_op_nparity = NULL,
 	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_HOLE,		/* name of this vdev type */
 	.vdev_op_leaf = B_TRUE			/* leaf vdev */
 };
--- a/module/zfs/vdev_queue.c
+++ b/module/zfs/vdev_queue.c
@ -593,6 +593,13 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
 	if (zio->io_type == ZIO_TYPE_TRIM && !zfs_vdev_aggregate_trim)
 		return (NULL);
 	/*
 	 * I/Os to distributed spares are directly dispatched to the dRAID
 	 * leaf vdevs for aggregation.  See the comment at the end of the
 	 * zio_vdev_io_start() function.
 	 */
 	ASSERT(vq->vq_vdev->vdev_ops != &vdev_draid_spare_ops);
 	first = last = zio;
 	if (zio->io_type == ZIO_TYPE_READ)
--- a/module/zfs/vdev_raidz.c
+++ b/module/zfs/vdev_raidz.c
--- a/module/zfs/vdev_raidz_math.c
+++ b/module/zfs/vdev_raidz_math.c
@ -149,7 +149,7 @@ vdev_raidz_math_get_ops(void)
 * Select parity generation method for raidz_map
 */
 int
-vdev_raidz_math_generate(raidz_map_t *rm)
+vdev_raidz_math_generate(raidz_map_t *rm, raidz_row_t *rr)
 {
 	raidz_gen_f gen_parity = NULL;
@ -174,7 +174,7 @@ vdev_raidz_math_generate(raidz_map_t *rm)
 	if (gen_parity == NULL)
 		return (RAIDZ_ORIGINAL_IMPL);
-	gen_parity(rm);
+	gen_parity(rr);
 	return (0);
 }
@ -241,8 +241,8 @@ reconstruct_fun_pqr_sel(raidz_map_t *rm, const int *parity_valid,
 * @nbaddata     - Number of failed data columns
 */
 int
-vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid,
+vdev_raidz_math_reconstruct(raidz_map_t *rm, raidz_row_t *rr,
-    const int *dt, const int nbaddata)
+    const int *parity_valid, const int *dt, const int nbaddata)
 {
 	raidz_rec_f rec_fn = NULL;
@ -265,7 +265,7 @@ vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid,
 	if (rec_fn == NULL)
 		return (RAIDZ_ORIGINAL_IMPL);
 	else
-		return (rec_fn(rm, dt));
+		return (rec_fn(rr, dt));
 }
 const char *raidz_gen_name[] = {
--- a/module/zfs/vdev_raidz_math_impl.h
+++ b/module/zfs/vdev_raidz_math_impl.h
@ -26,6 +26,7 @@
 #define	_VDEV_RAIDZ_MATH_IMPL_H
 #include <sys/types.h>
 #include <sys/vdev_raidz_impl.h>
 #define	raidz_inline inline __attribute__((always_inline))
 #ifndef noinline
@ -36,33 +37,33 @@
 * Functions calculate multiplication constants for data reconstruction.
 * Coefficients depend on RAIDZ geometry, indexes of failed child vdevs, and
 * used parity columns for reconstruction.
- * @rm			RAIDZ map
+ * @rr			RAIDZ row
 * @tgtidx		array of missing data indexes
 * @coeff		output array of coefficients. Array must be provided by
 *         		user and must hold minimum MUL_CNT values.
 */
 static noinline void
-raidz_rec_q_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+raidz_rec_q_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
 {
-	const unsigned ncols = raidz_ncols(rm);
+	const unsigned ncols = rr->rr_cols;
 	const unsigned x = tgtidx[TARGET_X];
 	coeff[MUL_Q_X] = gf_exp2(255 - (ncols - x - 1));
 }
 static noinline void
-raidz_rec_r_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+raidz_rec_r_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
 {
-	const unsigned ncols = raidz_ncols(rm);
+	const unsigned ncols = rr->rr_cols;
 	const unsigned x = tgtidx[TARGET_X];
 	coeff[MUL_R_X] = gf_exp4(255 - (ncols - x - 1));
 }
 static noinline void
-raidz_rec_pq_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+raidz_rec_pq_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
 {
-	const unsigned ncols = raidz_ncols(rm);
+	const unsigned ncols = rr->rr_cols;
 	const unsigned x = tgtidx[TARGET_X];
 	const unsigned y = tgtidx[TARGET_Y];
 	gf_t a, b, e;
@ -76,9 +77,9 @@ raidz_rec_pq_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
 }
 static noinline void
-raidz_rec_pr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+raidz_rec_pr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
 {
-	const unsigned ncols = raidz_ncols(rm);
+	const unsigned ncols = rr->rr_cols;
 	const unsigned x = tgtidx[TARGET_X];
 	const unsigned y = tgtidx[TARGET_Y];
@ -93,9 +94,9 @@ raidz_rec_pr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
 }
 static noinline void
-raidz_rec_qr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+raidz_rec_qr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
 {
-	const unsigned ncols = raidz_ncols(rm);
+	const unsigned ncols = rr->rr_cols;
 	const unsigned x = tgtidx[TARGET_X];
 	const unsigned y = tgtidx[TARGET_Y];
@ -114,9 +115,9 @@ raidz_rec_qr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
 }
 static noinline void
-raidz_rec_pqr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+raidz_rec_pqr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
 {
-	const unsigned ncols = raidz_ncols(rm);
+	const unsigned ncols = rr->rr_cols;
 	const unsigned x = tgtidx[TARGET_X];
 	const unsigned y = tgtidx[TARGET_Y];
 	const unsigned z = tgtidx[TARGET_Z];
@ -347,26 +348,26 @@ raidz_mul_abd_cb(void *dc, size_t size, void *private)
 /*
 * Generate P parity (RAIDZ1)
 *
- * @rm	RAIDZ map
+ * @rr	RAIDZ row
 */
 static raidz_inline void
-raidz_generate_p_impl(raidz_map_t * const rm)
+raidz_generate_p_impl(raidz_row_t * const rr)
 {
 	size_t c;
-	const size_t ncols = raidz_ncols(rm);
+	const size_t ncols = rr->rr_cols;
-	const size_t psize = rm->rm_col[CODE_P].rc_size;
+	const size_t psize = rr->rr_col[CODE_P].rc_size;
-	abd_t *pabd = rm->rm_col[CODE_P].rc_abd;
+	abd_t *pabd = rr->rr_col[CODE_P].rc_abd;
 	size_t size;
 	abd_t *dabd;
 	raidz_math_begin();
 	/* start with first data column */
-	raidz_copy(pabd, rm->rm_col[1].rc_abd, psize);
+	raidz_copy(pabd, rr->rr_col[1].rc_abd, psize);
 	for (c = 2; c < ncols; c++) {
-		dabd = rm->rm_col[c].rc_abd;
+		dabd = rr->rr_col[c].rc_abd;
-		size = rm->rm_col[c].rc_size;
+		size = rr->rr_col[c].rc_size;
 		/* add data column */
 		raidz_add(pabd, dabd, size);
@ -414,29 +415,29 @@ raidz_gen_pq_add(void **c, const void *dc, const size_t csize,
 /*
 * Generate PQ parity (RAIDZ2)
 *
- * @rm	RAIDZ map
+ * @rr	RAIDZ row
 */
 static raidz_inline void
-raidz_generate_pq_impl(raidz_map_t * const rm)
+raidz_generate_pq_impl(raidz_row_t * const rr)
 {
 	size_t c;
-	const size_t ncols = raidz_ncols(rm);
+	const size_t ncols = rr->rr_cols;
-	const size_t csize = rm->rm_col[CODE_P].rc_size;
+	const size_t csize = rr->rr_col[CODE_P].rc_size;
 	size_t dsize;
 	abd_t *dabd;
 	abd_t *cabds[] = {
-		rm->rm_col[CODE_P].rc_abd,
+		rr->rr_col[CODE_P].rc_abd,
-		rm->rm_col[CODE_Q].rc_abd
+		rr->rr_col[CODE_Q].rc_abd
 	};
 	raidz_math_begin();
-	raidz_copy(cabds[CODE_P], rm->rm_col[2].rc_abd, csize);
+	raidz_copy(cabds[CODE_P], rr->rr_col[2].rc_abd, csize);
-	raidz_copy(cabds[CODE_Q], rm->rm_col[2].rc_abd, csize);
+	raidz_copy(cabds[CODE_Q], rr->rr_col[2].rc_abd, csize);
 	for (c = 3; c < ncols; c++) {
-		dabd = rm->rm_col[c].rc_abd;
+		dabd = rr->rr_col[c].rc_abd;
-		dsize = rm->rm_col[c].rc_size;
+		dsize = rr->rr_col[c].rc_size;
 		abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 2,
 		    raidz_gen_pq_add);
@ -487,31 +488,31 @@ raidz_gen_pqr_add(void **c, const void *dc, const size_t csize,
 /*
 * Generate PQR parity (RAIDZ2)
 *
- * @rm	RAIDZ map
+ * @rr	RAIDZ row
 */
 static raidz_inline void
-raidz_generate_pqr_impl(raidz_map_t * const rm)
+raidz_generate_pqr_impl(raidz_row_t * const rr)
 {
 	size_t c;
-	const size_t ncols = raidz_ncols(rm);
+	const size_t ncols = rr->rr_cols;
-	const size_t csize = rm->rm_col[CODE_P].rc_size;
+	const size_t csize = rr->rr_col[CODE_P].rc_size;
 	size_t dsize;
 	abd_t *dabd;
 	abd_t *cabds[] = {
-		rm->rm_col[CODE_P].rc_abd,
+		rr->rr_col[CODE_P].rc_abd,
-		rm->rm_col[CODE_Q].rc_abd,
+		rr->rr_col[CODE_Q].rc_abd,
-		rm->rm_col[CODE_R].rc_abd
+		rr->rr_col[CODE_R].rc_abd
 	};
 	raidz_math_begin();
-	raidz_copy(cabds[CODE_P], rm->rm_col[3].rc_abd, csize);
+	raidz_copy(cabds[CODE_P], rr->rr_col[3].rc_abd, csize);
-	raidz_copy(cabds[CODE_Q], rm->rm_col[3].rc_abd, csize);
+	raidz_copy(cabds[CODE_Q], rr->rr_col[3].rc_abd, csize);
-	raidz_copy(cabds[CODE_R], rm->rm_col[3].rc_abd, csize);
+	raidz_copy(cabds[CODE_R], rr->rr_col[3].rc_abd, csize);
 	for (c = 4; c < ncols; c++) {
-		dabd = rm->rm_col[c].rc_abd;
+		dabd = rr->rr_col[c].rc_abd;
-		dsize = rm->rm_col[c].rc_size;
+		dsize = rr->rr_col[c].rc_size;
 		abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 3,
 		    raidz_gen_pqr_add);
@ -579,33 +580,36 @@ raidz_generate_pqr_impl(raidz_map_t * const rm)
 * @syn_method	raidz_add_abd()
 * @rec_method	not applicable
 *
- * @rm		RAIDZ map
+ * @rr		RAIDZ row
 * @tgtidx	array of missing data indexes
 */
 static raidz_inline int
-raidz_reconstruct_p_impl(raidz_map_t *rm, const int *tgtidx)
+raidz_reconstruct_p_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
-	const size_t firstdc = raidz_parity(rm);
+	const size_t firstdc = rr->rr_firstdatacol;
-	const size_t ncols = raidz_ncols(rm);
+	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[TARGET_X];
-	const size_t xsize = rm->rm_col[x].rc_size;
+	const size_t xsize = rr->rr_col[x].rc_size;
-	abd_t *xabd = rm->rm_col[x].rc_abd;
+	abd_t *xabd = rr->rr_col[x].rc_abd;
 	size_t size;
 	abd_t *dabd;
 	if (xabd == NULL)
 		return (1 << CODE_P);
 	raidz_math_begin();
 	/* copy P into target */
-	raidz_copy(xabd, rm->rm_col[CODE_P].rc_abd, xsize);
+	raidz_copy(xabd, rr->rr_col[CODE_P].rc_abd, xsize);
 	/* generate p_syndrome */
 	for (c = firstdc; c < ncols; c++) {
 		if (c == x)
 			continue;
-		dabd = rm->rm_col[c].rc_abd;
+		dabd = rr->rr_col[c].rc_abd;
-		size = MIN(rm->rm_col[c].rc_size, xsize);
+		size = MIN(rr->rr_col[c].rc_size, xsize);
 		raidz_add(xabd, dabd, size);
 	}
@ -653,30 +657,33 @@ raidz_syn_q_abd(void **xc, const void *dc, const size_t xsize,
 * @syn_method	raidz_add_abd()
 * @rec_method	raidz_mul_abd_cb()
 *
- * @rm		RAIDZ map
+ * @rr		RAIDZ row
 * @tgtidx	array of missing data indexes
 */
 static raidz_inline int
-raidz_reconstruct_q_impl(raidz_map_t *rm, const int *tgtidx)
+raidz_reconstruct_q_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
 	size_t dsize;
 	abd_t *dabd;
-	const size_t firstdc = raidz_parity(rm);
+	const size_t firstdc = rr->rr_firstdatacol;
-	const size_t ncols = raidz_ncols(rm);
+	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[TARGET_X];
-	abd_t *xabd = rm->rm_col[x].rc_abd;
+	abd_t *xabd = rr->rr_col[x].rc_abd;
-	const size_t xsize = rm->rm_col[x].rc_size;
+	const size_t xsize = rr->rr_col[x].rc_size;
 	abd_t *tabds[] = { xabd };
 	if (xabd == NULL)
 		return (1 << CODE_Q);
 	unsigned coeff[MUL_CNT];
-	raidz_rec_q_coeff(rm, tgtidx, coeff);
+	raidz_rec_q_coeff(rr, tgtidx, coeff);
 	raidz_math_begin();
 	/* Start with first data column if present */
 	if (firstdc != x) {
-		raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
 	} else {
 		raidz_zero(xabd, xsize);
 	}
@ -687,8 +694,8 @@ raidz_reconstruct_q_impl(raidz_map_t *rm, const int *tgtidx)
 			dabd = NULL;
 			dsize = 0;
 		} else {
-			dabd = rm->rm_col[c].rc_abd;
+			dabd = rr->rr_col[c].rc_abd;
-			dsize = rm->rm_col[c].rc_size;
+			dsize = rr->rr_col[c].rc_size;
 		}
 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
@ -696,7 +703,7 @@ raidz_reconstruct_q_impl(raidz_map_t *rm, const int *tgtidx)
 	}
 	/* add Q to the syndrome */
-	raidz_add(xabd, rm->rm_col[CODE_Q].rc_abd, xsize);
+	raidz_add(xabd, rr->rr_col[CODE_Q].rc_abd, xsize);
 	/* transform the syndrome */
 	abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void*) coeff);
@ -744,30 +751,33 @@ raidz_syn_r_abd(void **xc, const void *dc, const size_t tsize,
 * @syn_method	raidz_add_abd()
 * @rec_method	raidz_mul_abd_cb()
 *
- * @rm		RAIDZ map
+ * @rr		RAIDZ rr
 * @tgtidx	array of missing data indexes
 */
 static raidz_inline int
-raidz_reconstruct_r_impl(raidz_map_t *rm, const int *tgtidx)
+raidz_reconstruct_r_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
 	size_t dsize;
 	abd_t *dabd;
-	const size_t firstdc = raidz_parity(rm);
+	const size_t firstdc = rr->rr_firstdatacol;
-	const size_t ncols = raidz_ncols(rm);
+	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[TARGET_X];
-	const size_t xsize = rm->rm_col[x].rc_size;
+	const size_t xsize = rr->rr_col[x].rc_size;
-	abd_t *xabd = rm->rm_col[x].rc_abd;
+	abd_t *xabd = rr->rr_col[x].rc_abd;
 	abd_t *tabds[] = { xabd };
 	if (xabd == NULL)
 		return (1 << CODE_R);
 	unsigned coeff[MUL_CNT];
-	raidz_rec_r_coeff(rm, tgtidx, coeff);
+	raidz_rec_r_coeff(rr, tgtidx, coeff);
 	raidz_math_begin();
 	/* Start with first data column if present */
 	if (firstdc != x) {
-		raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
 	} else {
 		raidz_zero(xabd, xsize);
 	}
@ -779,8 +789,8 @@ raidz_reconstruct_r_impl(raidz_map_t *rm, const int *tgtidx)
 			dabd = NULL;
 			dsize = 0;
 		} else {
-			dabd = rm->rm_col[c].rc_abd;
+			dabd = rr->rr_col[c].rc_abd;
-			dsize = rm->rm_col[c].rc_size;
+			dsize = rr->rr_col[c].rc_size;
 		}
 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
@ -788,7 +798,7 @@ raidz_reconstruct_r_impl(raidz_map_t *rm, const int *tgtidx)
 	}
 	/* add R to the syndrome */
-	raidz_add(xabd, rm->rm_col[CODE_R].rc_abd, xsize);
+	raidz_add(xabd, rr->rr_col[CODE_R].rc_abd, xsize);
 	/* transform the syndrome */
 	abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void *)coeff);
@ -881,31 +891,34 @@ raidz_rec_pq_abd(void **tc, const size_t tsize, void **c,
 * @syn_method	raidz_syn_pq_abd()
 * @rec_method	raidz_rec_pq_abd()
 *
- * @rm		RAIDZ map
+ * @rr		RAIDZ row
 * @tgtidx	array of missing data indexes
 */
 static raidz_inline int
-raidz_reconstruct_pq_impl(raidz_map_t *rm, const int *tgtidx)
+raidz_reconstruct_pq_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
 	size_t dsize;
 	abd_t *dabd;
-	const size_t firstdc = raidz_parity(rm);
+	const size_t firstdc = rr->rr_firstdatacol;
-	const size_t ncols = raidz_ncols(rm);
+	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[TARGET_X];
 	const size_t y = tgtidx[TARGET_Y];
-	const size_t xsize = rm->rm_col[x].rc_size;
+	const size_t xsize = rr->rr_col[x].rc_size;
-	const size_t ysize = rm->rm_col[y].rc_size;
+	const size_t ysize = rr->rr_col[y].rc_size;
-	abd_t *xabd = rm->rm_col[x].rc_abd;
+	abd_t *xabd = rr->rr_col[x].rc_abd;
-	abd_t *yabd = rm->rm_col[y].rc_abd;
+	abd_t *yabd = rr->rr_col[y].rc_abd;
 	abd_t *tabds[2] = { xabd, yabd };
 	abd_t *cabds[] = {
-		rm->rm_col[CODE_P].rc_abd,
+		rr->rr_col[CODE_P].rc_abd,
-		rm->rm_col[CODE_Q].rc_abd
+		rr->rr_col[CODE_Q].rc_abd
 	};
 	if (xabd == NULL)
 		return ((1 << CODE_P) | (1 << CODE_Q));
 	unsigned coeff[MUL_CNT];
-	raidz_rec_pq_coeff(rm, tgtidx, coeff);
+	raidz_rec_pq_coeff(rr, tgtidx, coeff);
 	/*
 	 * Check if some of targets is shorter then others
@ -921,8 +934,8 @@ raidz_reconstruct_pq_impl(raidz_map_t *rm, const int *tgtidx)
 	/* Start with first data column if present */
 	if (firstdc != x) {
-		raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
-		raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
 	} else {
 		raidz_zero(xabd, xsize);
 		raidz_zero(yabd, xsize);
@ -934,8 +947,8 @@ raidz_reconstruct_pq_impl(raidz_map_t *rm, const int *tgtidx)
 			dabd = NULL;
 			dsize = 0;
 		} else {
-			dabd = rm->rm_col[c].rc_abd;
+			dabd = rr->rr_col[c].rc_abd;
-			dsize = rm->rm_col[c].rc_size;
+			dsize = rr->rr_col[c].rc_size;
 		}
 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
@ -946,7 +959,7 @@ raidz_reconstruct_pq_impl(raidz_map_t *rm, const int *tgtidx)
 	/* Copy shorter targets back to the original abd buffer */
 	if (ysize < xsize)
-		raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+		raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
 	raidz_math_end();
@ -1038,30 +1051,34 @@ raidz_rec_pr_abd(void **t, const size_t tsize, void **c,
 * @syn_method	raidz_syn_pr_abd()
 * @rec_method	raidz_rec_pr_abd()
 *
- * @rm		RAIDZ map
+ * @rr		RAIDZ row
 * @tgtidx	array of missing data indexes
 */
 static raidz_inline int
-raidz_reconstruct_pr_impl(raidz_map_t *rm, const int *tgtidx)
+raidz_reconstruct_pr_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
 	size_t dsize;
 	abd_t *dabd;
-	const size_t firstdc = raidz_parity(rm);
+	const size_t firstdc = rr->rr_firstdatacol;
-	const size_t ncols = raidz_ncols(rm);
+	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[0];
 	const size_t y = tgtidx[1];
-	const size_t xsize = rm->rm_col[x].rc_size;
+	const size_t xsize = rr->rr_col[x].rc_size;
-	const size_t ysize = rm->rm_col[y].rc_size;
+	const size_t ysize = rr->rr_col[y].rc_size;
-	abd_t *xabd = rm->rm_col[x].rc_abd;
+	abd_t *xabd = rr->rr_col[x].rc_abd;
-	abd_t *yabd = rm->rm_col[y].rc_abd;
+	abd_t *yabd = rr->rr_col[y].rc_abd;
 	abd_t *tabds[2] = { xabd, yabd };
 	abd_t *cabds[] = {
-		rm->rm_col[CODE_P].rc_abd,
+		rr->rr_col[CODE_P].rc_abd,
-		rm->rm_col[CODE_R].rc_abd
+		rr->rr_col[CODE_R].rc_abd
 	};
 	if (xabd == NULL)
 		return ((1 << CODE_P) | (1 << CODE_R));
 	unsigned coeff[MUL_CNT];
-	raidz_rec_pr_coeff(rm, tgtidx, coeff);
+	raidz_rec_pr_coeff(rr, tgtidx, coeff);
 	/*
 	 * Check if some of targets are shorter then others.
@ -1077,8 +1094,8 @@ raidz_reconstruct_pr_impl(raidz_map_t *rm, const int *tgtidx)
 	/* Start with first data column if present */
 	if (firstdc != x) {
-		raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
-		raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
 	} else {
 		raidz_zero(xabd, xsize);
 		raidz_zero(yabd, xsize);
@ -1090,8 +1107,8 @@ raidz_reconstruct_pr_impl(raidz_map_t *rm, const int *tgtidx)
 			dabd = NULL;
 			dsize = 0;
 		} else {
-			dabd = rm->rm_col[c].rc_abd;
+			dabd = rr->rr_col[c].rc_abd;
-			dsize = rm->rm_col[c].rc_size;
+			dsize = rr->rr_col[c].rc_size;
 		}
 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
@ -1104,14 +1121,14 @@ raidz_reconstruct_pr_impl(raidz_map_t *rm, const int *tgtidx)
 	 * Copy shorter targets back to the original abd buffer
 	 */
 	if (ysize < xsize)
-		raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+		raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
 	raidz_math_end();
 	if (ysize < xsize)
 		abd_free(yabd);
-	return ((1 << CODE_P) | (1 << CODE_Q));
+	return ((1 << CODE_P) | (1 << CODE_R));
 }
@ -1201,30 +1218,34 @@ raidz_rec_qr_abd(void **t, const size_t tsize, void **c,
 * @syn_method	raidz_syn_qr_abd()
 * @rec_method	raidz_rec_qr_abd()
 *
- * @rm		RAIDZ map
+ * @rr		RAIDZ row
 * @tgtidx	array of missing data indexes
 */
 static raidz_inline int
-raidz_reconstruct_qr_impl(raidz_map_t *rm, const int *tgtidx)
+raidz_reconstruct_qr_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
 	size_t dsize;
 	abd_t *dabd;
-	const size_t firstdc = raidz_parity(rm);
+	const size_t firstdc = rr->rr_firstdatacol;
-	const size_t ncols = raidz_ncols(rm);
+	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[TARGET_X];
 	const size_t y = tgtidx[TARGET_Y];
-	const size_t xsize = rm->rm_col[x].rc_size;
+	const size_t xsize = rr->rr_col[x].rc_size;
-	const size_t ysize = rm->rm_col[y].rc_size;
+	const size_t ysize = rr->rr_col[y].rc_size;
-	abd_t *xabd = rm->rm_col[x].rc_abd;
+	abd_t *xabd = rr->rr_col[x].rc_abd;
-	abd_t *yabd = rm->rm_col[y].rc_abd;
+	abd_t *yabd = rr->rr_col[y].rc_abd;
 	abd_t *tabds[2] = { xabd, yabd };
 	abd_t *cabds[] = {
-		rm->rm_col[CODE_Q].rc_abd,
+		rr->rr_col[CODE_Q].rc_abd,
-		rm->rm_col[CODE_R].rc_abd
+		rr->rr_col[CODE_R].rc_abd
 	};
 	if (xabd == NULL)
 		return ((1 << CODE_Q) | (1 << CODE_R));
 	unsigned coeff[MUL_CNT];
-	raidz_rec_qr_coeff(rm, tgtidx, coeff);
+	raidz_rec_qr_coeff(rr, tgtidx, coeff);
 	/*
 	 * Check if some of targets is shorter then others
@ -1240,8 +1261,8 @@ raidz_reconstruct_qr_impl(raidz_map_t *rm, const int *tgtidx)
 	/* Start with first data column if present */
 	if (firstdc != x) {
-		raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
-		raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
 	} else {
 		raidz_zero(xabd, xsize);
 		raidz_zero(yabd, xsize);
@ -1253,8 +1274,8 @@ raidz_reconstruct_qr_impl(raidz_map_t *rm, const int *tgtidx)
 			dabd = NULL;
 			dsize = 0;
 		} else {
-			dabd = rm->rm_col[c].rc_abd;
+			dabd = rr->rr_col[c].rc_abd;
-			dsize = rm->rm_col[c].rc_size;
+			dsize = rr->rr_col[c].rc_size;
 		}
 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
@ -1267,7 +1288,7 @@ raidz_reconstruct_qr_impl(raidz_map_t *rm, const int *tgtidx)
 	 * Copy shorter targets back to the original abd buffer
 	 */
 	if (ysize < xsize)
-		raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+		raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
 	raidz_math_end();
@ -1384,34 +1405,38 @@ raidz_rec_pqr_abd(void **t, const size_t tsize, void **c,
 * @syn_method	raidz_syn_pqr_abd()
 * @rec_method	raidz_rec_pqr_abd()
 *
- * @rm		RAIDZ map
+ * @rr		RAIDZ row
 * @tgtidx	array of missing data indexes
 */
 static raidz_inline int
-raidz_reconstruct_pqr_impl(raidz_map_t *rm, const int *tgtidx)
+raidz_reconstruct_pqr_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
 	size_t dsize;
 	abd_t *dabd;
-	const size_t firstdc = raidz_parity(rm);
+	const size_t firstdc = rr->rr_firstdatacol;
-	const size_t ncols = raidz_ncols(rm);
+	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[TARGET_X];
 	const size_t y = tgtidx[TARGET_Y];
 	const size_t z = tgtidx[TARGET_Z];
-	const size_t xsize = rm->rm_col[x].rc_size;
+	const size_t xsize = rr->rr_col[x].rc_size;
-	const size_t ysize = rm->rm_col[y].rc_size;
+	const size_t ysize = rr->rr_col[y].rc_size;
-	const size_t zsize = rm->rm_col[z].rc_size;
+	const size_t zsize = rr->rr_col[z].rc_size;
-	abd_t *xabd = rm->rm_col[x].rc_abd;
+	abd_t *xabd = rr->rr_col[x].rc_abd;
-	abd_t *yabd = rm->rm_col[y].rc_abd;
+	abd_t *yabd = rr->rr_col[y].rc_abd;
-	abd_t *zabd = rm->rm_col[z].rc_abd;
+	abd_t *zabd = rr->rr_col[z].rc_abd;
 	abd_t *tabds[] = { xabd, yabd, zabd };
 	abd_t *cabds[] = {
-		rm->rm_col[CODE_P].rc_abd,
+		rr->rr_col[CODE_P].rc_abd,
-		rm->rm_col[CODE_Q].rc_abd,
+		rr->rr_col[CODE_Q].rc_abd,
-		rm->rm_col[CODE_R].rc_abd
+		rr->rr_col[CODE_R].rc_abd
 	};
 	if (xabd == NULL)
 		return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
 	unsigned coeff[MUL_CNT];
-	raidz_rec_pqr_coeff(rm, tgtidx, coeff);
+	raidz_rec_pqr_coeff(rr, tgtidx, coeff);
 	/*
 	 * Check if some of targets is shorter then others
@ -1431,9 +1456,9 @@ raidz_reconstruct_pqr_impl(raidz_map_t *rm, const int *tgtidx)
 	/* Start with first data column if present */
 	if (firstdc != x) {
-		raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
-		raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
-		raidz_copy(zabd, rm->rm_col[firstdc].rc_abd, xsize);
+		raidz_copy(zabd, rr->rr_col[firstdc].rc_abd, xsize);
 	} else {
 		raidz_zero(xabd, xsize);
 		raidz_zero(yabd, xsize);
@ -1446,8 +1471,8 @@ raidz_reconstruct_pqr_impl(raidz_map_t *rm, const int *tgtidx)
 			dabd = NULL;
 			dsize = 0;
 		} else {
-			dabd = rm->rm_col[c].rc_abd;
+			dabd = rr->rr_col[c].rc_abd;
-			dsize = rm->rm_col[c].rc_size;
+			dsize = rr->rr_col[c].rc_size;
 		}
 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 3,
@ -1460,9 +1485,9 @@ raidz_reconstruct_pqr_impl(raidz_map_t *rm, const int *tgtidx)
 	 * Copy shorter targets back to the original abd buffer
 	 */
 	if (ysize < xsize)
-		raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+		raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
 	if (zsize < xsize)
-		raidz_copy(rm->rm_col[z].rc_abd, zabd, zsize);
+		raidz_copy(rr->rr_col[z].rc_abd, zabd, zsize);
 	raidz_math_end();
--- a/module/zfs/vdev_rebuild.c
+++ b/module/zfs/vdev_rebuild.c
@ -25,6 +25,7 @@
 */
 #include <sys/vdev_impl.h>
 #include <sys/vdev_draid.h>
 #include <sys/dsl_scan.h>
 #include <sys/spa_impl.h>
 #include <sys/metaslab_impl.h>
@ -63,13 +64,15 @@
 *
 * Limitations:
 *
- *   - Only supported for mirror vdev types.  Due to the variable stripe
+ *   - Sequential reconstruction is not possible on RAIDZ due to its
- *     width used by raidz sequential reconstruction is not possible.
+ *     variable stripe width.  Note dRAID uses a fixed stripe width which
 *     avoids this issue, but comes at the expense of some usable capacity.
 *
- *   - Block checksums are not verified during sequential reconstuction.
+ *   - Block checksums are not verified during sequential reconstruction.
 *     Similar to traditional RAID the parity/mirror data is reconstructed
 *     but cannot be immediately double checked.  For this reason when the
- *     last active resilver completes the pool is automatically scrubbed.
+ *     last active resilver completes the pool is automatically scrubbed
 *     by default.
 *
 *   - Deferred resilvers using sequential reconstruction are not currently
 *     supported.  When adding another vdev to an active top-level resilver
@ -77,8 +80,8 @@
 *
 * Advantages:
 *
- *   - Sequential reconstuction is performed in LBA order which may be faster
+ *   - Sequential reconstruction is performed in LBA order which may be faster
- *     than healing reconstuction particularly when using using HDDs (or
+ *     than healing reconstruction particularly when using using HDDs (or
 *     especially with SMR devices).  Only allocated capacity is resilvered.
 *
 *   - Sequential reconstruction is not constrained by ZFS block boundaries.
@ -86,9 +89,9 @@
 *     allowing all of these logical blocks to be repaired with a single IO.
 *
 *   - Unlike a healing resilver or scrub which are pool wide operations,
- *     sequential reconstruction is handled by the top-level mirror vdevs.
+ *     sequential reconstruction is handled by the top-level vdevs.  This
- *     This allows for it to be started or canceled on a top-level vdev
+ *     allows for it to be started or canceled on a top-level vdev without
- *     without impacting any other top-level vdevs in the pool.
+ *     impacting any other top-level vdevs in the pool.
 *
 *   - Data only referenced by a pool checkpoint will be repaired because
 *     that space is reflected in the space maps.  This differs for a
@ -97,18 +100,36 @@
 /*
- * Maximum number of queued rebuild I/Os top-level vdev.  The number of
+ * Size of rebuild reads; defaults to 1MiB per data disk and is capped at
- * concurrent rebuild I/Os issued to the device is controlled by the
+ * SPA_MAXBLOCKSIZE.
 * zfs_vdev_rebuild_min_active and zfs_vdev_rebuild_max_active module
 * options.
 */
 unsigned int zfs_rebuild_queue_limit = 20;
 /*
 * Size of rebuild reads; defaults to 1MiB and is capped at SPA_MAXBLOCKSIZE.
 */
 unsigned long zfs_rebuild_max_segment = 1024 * 1024;
 /*
 * Maximum number of parallelly executed bytes per leaf vdev caused by a
 * sequential resilver.  We attempt to strike a balance here between keeping
 * the vdev queues full of I/Os at all times and not overflowing the queues
 * to cause long latency, which would cause long txg sync times.
 *
 * A large default value can be safely used here because the default target
 * segment size is also large (zfs_rebuild_max_segment=1M).  This helps keep
 * the queue depth short.
 *
 * 32MB was selected as the default value to achieve good performance with
 * a large 90-drive dRAID HDD configuration (draid2:8d:90c:2s). A sequential
 * rebuild was unable to saturate all of the drives using smaller values.
 * With a value of 32MB the sequential resilver write rate was measured at
 * 800MB/s sustained while rebuilding to a distributed spare.
 */
 unsigned long zfs_rebuild_vdev_limit = 32 << 20;
 /*
 * Automatically start a pool scrub when the last active sequential resilver
 * completes in order to verify the checksums of all blocks which have been
 * resilvered. This option is enabled by default and is strongly recommended.
 */
 int zfs_rebuild_scrub_enabled = 1;
 /*
 * For vdev_rebuild_initiate_sync() and vdev_rebuild_reset_sync().
 */
@ -306,7 +327,16 @@ vdev_rebuild_complete_sync(void *arg, dmu_tx_t *tx)
 	vd->vdev_rebuilding = B_FALSE;
 	mutex_exit(&vd->vdev_rebuild_lock);
-	spa_notify_waiters(spa);
+	/*
 	 * While we're in syncing context take the opportunity to
 	 * setup the scrub when there are no more active rebuilds.
 	 */
 	if (!vdev_rebuild_active(spa->spa_root_vdev) &&
 	    zfs_rebuild_scrub_enabled) {
 		pool_scan_func_t func = POOL_SCAN_SCRUB;
 		dsl_scan_setup_sync(&func, tx);
 	}
 	cv_broadcast(&vd->vdev_rebuild_cv);
 }
@ -438,7 +468,7 @@ vdev_rebuild_cb(zio_t *zio)
 	vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
 	vdev_t *vd = vr->vr_top_vdev;
-	mutex_enter(&vd->vdev_rebuild_io_lock);
+	mutex_enter(&vr->vr_io_lock);
 	if (zio->io_error == ENXIO && !vdev_writeable(vd)) {
 		/*
 		 * The I/O failed because the top-level vdev was unavailable.
@ -455,34 +485,30 @@ vdev_rebuild_cb(zio_t *zio)
 	abd_free(zio->io_abd);
-	ASSERT3U(vd->vdev_rebuild_inflight, >, 0);
+	ASSERT3U(vr->vr_bytes_inflight, >, 0);
-	vd->vdev_rebuild_inflight--;
+	vr->vr_bytes_inflight -= zio->io_size;
-	cv_broadcast(&vd->vdev_rebuild_io_cv);
+	cv_broadcast(&vr->vr_io_cv);
-	mutex_exit(&vd->vdev_rebuild_io_lock);
+	mutex_exit(&vr->vr_io_lock);
 	spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd);
 }
 /*
- * Rebuild the data in this range by constructing a special dummy block
+ * Initialize a block pointer that can be used to read the given segment
- * pointer for the given range.  It has no relation to any existing blocks
+ * for sequential rebuild.
 * in the pool.  But by disabling checksum verification and issuing a scrub
 * I/O mirrored vdevs will replicate the block using any available mirror
 * leaf vdevs.
 */
 static void
-vdev_rebuild_rebuild_block(vdev_rebuild_t *vr, uint64_t start, uint64_t asize,
+vdev_rebuild_blkptr_init(blkptr_t *bp, vdev_t *vd, uint64_t start,
-    uint64_t txg)
+    uint64_t asize)
 {
-	vdev_t *vd = vr->vr_top_vdev;
+	ASSERT(vd->vdev_ops == &vdev_draid_ops ||
-	spa_t *spa = vd->vdev_spa;
+	    vd->vdev_ops == &vdev_mirror_ops ||
 	uint64_t psize = asize;
 	ASSERT(vd->vdev_ops == &vdev_mirror_ops ||
 	    vd->vdev_ops == &vdev_replacing_ops ||
 	    vd->vdev_ops == &vdev_spare_ops);
-	blkptr_t blk, *bp = &blk;
+	uint64_t psize = vd->vdev_ops == &vdev_draid_ops ?
 	    vdev_draid_asize_to_psize(vd, asize) : asize;
 	BP_ZERO(bp);
 	DVA_SET_VDEV(&bp->blk_dva[0], vd->vdev_id);
@ -499,19 +525,6 @@ vdev_rebuild_rebuild_block(vdev_rebuild_t *vr, uint64_t start, uint64_t asize,
 	BP_SET_LEVEL(bp, 0);
 	BP_SET_DEDUP(bp, 0);
 	BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
 	/*
 	 * We increment the issued bytes by the asize rather than the psize
 	 * so the scanned and issued bytes may be directly compared.  This
 	 * is consistent with the scrub/resilver issued reporting.
 	 */
 	vr->vr_pass_bytes_issued += asize;
 	vr->vr_rebuild_phys.vrp_bytes_issued += asize;
 	zio_nowait(zio_read(spa->spa_txg_zio[txg & TXG_MASK], spa, bp,
 	    abd_alloc(psize, B_FALSE), psize, vdev_rebuild_cb, vr,
 	    ZIO_PRIORITY_REBUILD, ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL |
 	    ZIO_FLAG_RESILVER, NULL));
 }
 /*
@ -525,6 +538,7 @@ vdev_rebuild_range(vdev_rebuild_t *vr, uint64_t start, uint64_t size)
 	uint64_t ms_id __maybe_unused = vr->vr_scan_msp->ms_id;
 	vdev_t *vd = vr->vr_top_vdev;
 	spa_t *spa = vd->vdev_spa;
 	blkptr_t blk;
 	ASSERT3U(ms_id, ==, start >> vd->vdev_ms_shift);
 	ASSERT3U(ms_id, ==, (start + size - 1) >> vd->vdev_ms_shift);
@ -532,14 +546,26 @@ vdev_rebuild_range(vdev_rebuild_t *vr, uint64_t start, uint64_t size)
 	vr->vr_pass_bytes_scanned += size;
 	vr->vr_rebuild_phys.vrp_bytes_scanned += size;
-	mutex_enter(&vd->vdev_rebuild_io_lock);
+	/*
 	 * Rebuild the data in this range by constructing a special block
 	 * pointer.  It has no relation to any existing blocks in the pool.
 	 * However, by disabling checksum verification and issuing a scrub IO
 	 * we can reconstruct and repair any children with missing data.
 	 */
 	vdev_rebuild_blkptr_init(&blk, vd, start, size);
 	uint64_t psize = BP_GET_PSIZE(&blk);
 	if (!vdev_dtl_need_resilver(vd, &blk.blk_dva[0], psize, TXG_UNKNOWN))
 		return (0);
 	mutex_enter(&vr->vr_io_lock);
 	/* Limit in flight rebuild I/Os */
-	while (vd->vdev_rebuild_inflight >= zfs_rebuild_queue_limit)
+	while (vr->vr_bytes_inflight >= vr->vr_bytes_inflight_max)
-		cv_wait(&vd->vdev_rebuild_io_cv, &vd->vdev_rebuild_io_lock);
+		cv_wait(&vr->vr_io_cv, &vr->vr_io_lock);
-	vd->vdev_rebuild_inflight++;
+	vr->vr_bytes_inflight += psize;
-	mutex_exit(&vd->vdev_rebuild_io_lock);
+	mutex_exit(&vr->vr_io_lock);
 	dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
 	VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
@ -558,45 +584,29 @@ vdev_rebuild_range(vdev_rebuild_t *vr, uint64_t start, uint64_t size)
 	/* When exiting write out our progress. */
 	if (vdev_rebuild_should_stop(vd)) {
-		mutex_enter(&vd->vdev_rebuild_io_lock);
+		mutex_enter(&vr->vr_io_lock);
-		vd->vdev_rebuild_inflight--;
+		vr->vr_bytes_inflight -= psize;
-		mutex_exit(&vd->vdev_rebuild_io_lock);
+		mutex_exit(&vr->vr_io_lock);
 		spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd);
 		mutex_exit(&vd->vdev_rebuild_lock);
 		dmu_tx_commit(tx);
 		return (SET_ERROR(EINTR));
 	}
 	mutex_exit(&vd->vdev_rebuild_lock);
 	vr->vr_scan_offset[txg & TXG_MASK] = start + size;
 	vdev_rebuild_rebuild_block(vr, start, size, txg);
 	dmu_tx_commit(tx);
 	vr->vr_scan_offset[txg & TXG_MASK] = start + size;
 	vr->vr_pass_bytes_issued += size;
 	vr->vr_rebuild_phys.vrp_bytes_issued += size;
 	zio_nowait(zio_read(spa->spa_txg_zio[txg & TXG_MASK], spa, &blk,
 	    abd_alloc(psize, B_FALSE), psize, vdev_rebuild_cb, vr,
 	    ZIO_PRIORITY_REBUILD, ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL |
 	    ZIO_FLAG_RESILVER, NULL));
 	return (0);
 }
 /*
 * Split range into legally-sized logical chunks given the constraints of the
 * top-level mirror vdev type.
 */
 static uint64_t
 vdev_rebuild_chunk_size(vdev_t *vd, uint64_t start, uint64_t size)
 {
 	uint64_t chunk_size, max_asize, max_segment;
 	ASSERT(vd->vdev_ops == &vdev_mirror_ops ||
 	    vd->vdev_ops == &vdev_replacing_ops ||
 	    vd->vdev_ops == &vdev_spare_ops);
 	max_segment = MIN(P2ROUNDUP(zfs_rebuild_max_segment,
 	    1 << vd->vdev_ashift), SPA_MAXBLOCKSIZE);
 	max_asize = vdev_psize_to_asize(vd, max_segment);
 	chunk_size = MIN(size, max_asize);
 	return (chunk_size);
 }
 /*
 * Issues rebuild I/Os for all ranges in the provided vr->vr_tree range tree.
 */
@ -625,7 +635,14 @@ vdev_rebuild_ranges(vdev_rebuild_t *vr)
 		while (size > 0) {
 			uint64_t chunk_size;
-			chunk_size = vdev_rebuild_chunk_size(vd, start, size);
+			/*
 			 * Split range into legally-sized logical chunks
 			 * given the constraints of the top-level vdev
 			 * being rebuilt (dRAID or mirror).
 			 */
 			ASSERT3P(vd->vdev_ops, !=, NULL);
 			chunk_size = vd->vdev_ops->vdev_op_rebuild_asize(vd,
 			    start, size, zfs_rebuild_max_segment);
 			error = vdev_rebuild_range(vr, start, chunk_size);
 			if (error != 0)
@ -747,10 +764,16 @@ vdev_rebuild_thread(void *arg)
 	vr->vr_top_vdev = vd;
 	vr->vr_scan_msp = NULL;
 	vr->vr_scan_tree = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
 	mutex_init(&vr->vr_io_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&vr->vr_io_cv, NULL, CV_DEFAULT, NULL);
 	vr->vr_pass_start_time = gethrtime();
 	vr->vr_pass_bytes_scanned = 0;
 	vr->vr_pass_bytes_issued = 0;
 	vr->vr_bytes_inflight_max = MAX(1ULL << 20,
 	    zfs_rebuild_vdev_limit * vd->vdev_children);
 	uint64_t update_est_time = gethrtime();
 	vdev_rebuild_update_bytes_est(vd, 0);
@ -780,18 +803,29 @@ vdev_rebuild_thread(void *arg)
 		ASSERT0(range_tree_space(vr->vr_scan_tree));
-		/*
+		/* Disable any new allocations to this metaslab */
 		 * Disable any new allocations to this metaslab and wait
 		 * for any writes inflight to complete.  This is needed to
 		 * ensure all allocated ranges are rebuilt.
 		 */
 		metaslab_disable(msp);
 		spa_config_exit(spa, SCL_CONFIG, FTAG);
 		txg_wait_synced(dsl, 0);
 		mutex_enter(&msp->ms_sync_lock);
 		mutex_enter(&msp->ms_lock);
 		/*
 		 * If there are outstanding allocations wait for them to be
 		 * synced.  This is needed to ensure all allocated ranges are
 		 * on disk and therefore will be rebuilt.
 		 */
 		for (int j = 0; j < TXG_SIZE; j++) {
 			if (range_tree_space(msp->ms_allocating[j])) {
 				mutex_exit(&msp->ms_lock);
 				mutex_exit(&msp->ms_sync_lock);
 				txg_wait_synced(dsl, 0);
 				mutex_enter(&msp->ms_sync_lock);
 				mutex_enter(&msp->ms_lock);
 				break;
 			}
 		}
 		/*
 		 * When a metaslab has been allocated from read its allocated
 		 * ranges from the space map object into the vr_scan_tree.
@ -827,7 +861,7 @@ vdev_rebuild_thread(void *arg)
 		/*
 		 * To provide an accurate estimate re-calculate the estimated
 		 * size every 5 minutes to account for recent allocations and
-		 * frees made space maps which have not yet been rebuilt.
+		 * frees made to space maps which have not yet been rebuilt.
 		 */
 		if (gethrtime() > update_est_time + SEC2NSEC(300)) {
 			update_est_time = gethrtime();
@ -851,11 +885,14 @@ vdev_rebuild_thread(void *arg)
 	spa_config_exit(spa, SCL_CONFIG, FTAG);
 	/* Wait for any remaining rebuild I/O to complete */
-	mutex_enter(&vd->vdev_rebuild_io_lock);
+	mutex_enter(&vr->vr_io_lock);
-	while (vd->vdev_rebuild_inflight > 0)
+	while (vr->vr_bytes_inflight > 0)
-		cv_wait(&vd->vdev_rebuild_io_cv, &vd->vdev_rebuild_io_lock);
+		cv_wait(&vr->vr_io_cv, &vr->vr_io_lock);
-	mutex_exit(&vd->vdev_rebuild_io_lock);
+	mutex_exit(&vr->vr_io_lock);
 	mutex_destroy(&vr->vr_io_lock);
 	cv_destroy(&vr->vr_io_cv);
 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
@ -1101,4 +1138,10 @@ vdev_rebuild_get_stats(vdev_t *tvd, vdev_rebuild_stat_t *vrs)
 /* BEGIN CSTYLED */
 ZFS_MODULE_PARAM(zfs, zfs_, rebuild_max_segment, ULONG, ZMOD_RW,
 	"Max segment size in bytes of rebuild reads");
 ZFS_MODULE_PARAM(zfs, zfs_, rebuild_vdev_limit, ULONG, ZMOD_RW,
 	"Max bytes in flight per leaf vdev for sequential resilvers");
 ZFS_MODULE_PARAM(zfs, zfs_, rebuild_scrub_enabled, INT, ZMOD_RW,
 	"Automatically scrub after sequential resilver completes");
 /* END CSTYLED */
--- a/module/zfs/vdev_removal.c
+++ b/module/zfs/vdev_removal.c
@ -250,7 +250,7 @@ vdev_remove_initiate_sync(void *arg, dmu_tx_t *tx)
 	spa_vdev_removal_t *svr = NULL;
 	uint64_t txg __maybe_unused = dmu_tx_get_txg(tx);
-	ASSERT3P(vd->vdev_ops, !=, &vdev_raidz_ops);
+	ASSERT0(vdev_get_nparity(vd));
 	svr = spa_vdev_removal_create(vd);
 	ASSERT(vd->vdev_removing);
@ -1120,7 +1120,7 @@ static void
 vdev_remove_enlist_zaps(vdev_t *vd, nvlist_t *zlist)
 {
 	ASSERT3P(zlist, !=, NULL);
-	ASSERT3P(vd->vdev_ops, !=, &vdev_raidz_ops);
+	ASSERT0(vdev_get_nparity(vd));
 	if (vd->vdev_leaf_zap != 0) {
 		char zkey[32];
@ -2041,7 +2041,7 @@ spa_vdev_remove_top_check(vdev_t *vd)
 	/*
 	 * All vdevs in normal class must have the same ashift
-	 * and not be raidz.
+	 * and not be raidz or draid.
 	 */
 	vdev_t *rvd = spa->spa_root_vdev;
 	int num_indirect = 0;
@ -2064,7 +2064,7 @@ spa_vdev_remove_top_check(vdev_t *vd)
 			num_indirect++;
 		if (!vdev_is_concrete(cvd))
 			continue;
-		if (cvd->vdev_ops == &vdev_raidz_ops)
+		if (vdev_get_nparity(cvd) != 0)
 			return (SET_ERROR(EINVAL));
 		/*
 		 * Need the mirror to be mirror of leaf vdevs only
@ -2217,8 +2217,19 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
 		 * in this pool.
 		 */
 		if (vd == NULL || unspare) {
 			char *type;
 			boolean_t draid_spare = B_FALSE;
 			if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type)
 			    == 0 && strcmp(type, VDEV_TYPE_DRAID_SPARE) == 0)
 				draid_spare = B_TRUE;
 			if (vd == NULL && draid_spare) {
 				error = SET_ERROR(ENOTSUP);
 			} else {
 				if (vd == NULL)
-				vd = spa_lookup_by_guid(spa, guid, B_TRUE);
+					vd = spa_lookup_by_guid(spa,
 					    guid, B_TRUE);
 				ev = spa_event_create(spa, vd, NULL,
 				    ESC_ZFS_VDEV_REMOVE_AUX);
@ -2229,6 +2240,7 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
 				    ZPOOL_CONFIG_SPARES, spares, nspares, nv);
 				spa_load_spares(spa);
 				spa->spa_spares.sav_sync = B_TRUE;
 			}
 		} else {
 			error = SET_ERROR(EBUSY);
 		}
--- a/module/zfs/vdev_root.c
+++ b/module/zfs/vdev_root.c
@ -142,9 +142,13 @@ vdev_root_state_change(vdev_t *vd, int faulted, int degraded)
 }
 vdev_ops_t vdev_root_ops = {
 	.vdev_op_init = NULL,
 	.vdev_op_fini = NULL,
 	.vdev_op_open = vdev_root_open,
 	.vdev_op_close = vdev_root_close,
 	.vdev_op_asize = vdev_default_asize,
 	.vdev_op_min_asize = vdev_default_min_asize,
 	.vdev_op_min_alloc = NULL,
 	.vdev_op_io_start = NULL,	/* not applicable to the root */
 	.vdev_op_io_done = NULL,	/* not applicable to the root */
 	.vdev_op_state_change = vdev_root_state_change,
@ -153,6 +157,11 @@ vdev_ops_t vdev_root_ops = {
 	.vdev_op_rele = NULL,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = NULL,
 	.vdev_op_rebuild_asize = NULL,
 	.vdev_op_metaslab_init = NULL,
 	.vdev_op_config_generate = NULL,
 	.vdev_op_nparity = NULL,
 	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_ROOT,	/* name of this vdev type */
 	.vdev_op_leaf = B_FALSE		/* not a leaf vdev */
 };
--- a/module/zfs/vdev_trim.c
+++ b/module/zfs/vdev_trim.c
@ -311,7 +311,8 @@ vdev_trim_change_state(vdev_t *vd, vdev_trim_state_t new_state,
 			vd->vdev_trim_secure = secure;
 	}
-	boolean_t resumed = !!(vd->vdev_trim_state == VDEV_TRIM_SUSPENDED);
+	vdev_trim_state_t old_state = vd->vdev_trim_state;
 	boolean_t resumed = (old_state == VDEV_TRIM_SUSPENDED);
 	vd->vdev_trim_state = new_state;
 	dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
@ -332,9 +333,12 @@ vdev_trim_change_state(vdev_t *vd, vdev_trim_state_t new_state,
 		    "vdev=%s suspended", vd->vdev_path);
 		break;
 	case VDEV_TRIM_CANCELED:
 		if (old_state == VDEV_TRIM_ACTIVE ||
 		    old_state == VDEV_TRIM_SUSPENDED) {
 			spa_event_notify(spa, vd, NULL, ESC_ZFS_TRIM_CANCEL);
 			spa_history_log_internal(spa, "trim", tx,
 			    "vdev=%s canceled", vd->vdev_path);
 		}
 		break;
 	case VDEV_TRIM_COMPLETE:
 		spa_event_notify(spa, vd, NULL, ESC_ZFS_TRIM_FINISH);
@ -601,6 +605,32 @@ vdev_trim_ranges(trim_args_t *ta)
 	return (0);
 }
 static void
 vdev_trim_xlate_last_rs_end(void *arg, range_seg64_t *physical_rs)
 {
 	uint64_t *last_rs_end = (uint64_t *)arg;
 	if (physical_rs->rs_end > *last_rs_end)
 		*last_rs_end = physical_rs->rs_end;
 }
 static void
 vdev_trim_xlate_progress(void *arg, range_seg64_t *physical_rs)
 {
 	vdev_t *vd = (vdev_t *)arg;
 	uint64_t size = physical_rs->rs_end - physical_rs->rs_start;
 	vd->vdev_trim_bytes_est += size;
 	if (vd->vdev_trim_last_offset >= physical_rs->rs_end) {
 		vd->vdev_trim_bytes_done += size;
 	} else if (vd->vdev_trim_last_offset > physical_rs->rs_start &&
 	    vd->vdev_trim_last_offset <= physical_rs->rs_end) {
 		vd->vdev_trim_bytes_done +=
 		    vd->vdev_trim_last_offset - physical_rs->rs_start;
 	}
 }
 /*
 * Calculates the completion percentage of a manual TRIM.
 */
@ -618,27 +648,35 @@ vdev_trim_calculate_progress(vdev_t *vd)
 		metaslab_t *msp = vd->vdev_top->vdev_ms[i];
 		mutex_enter(&msp->ms_lock);
-		uint64_t ms_free = msp->ms_size -
+		uint64_t ms_free = (msp->ms_size -
-		    metaslab_allocated_space(msp);
+		    metaslab_allocated_space(msp)) /
-
+		    vdev_get_ndisks(vd->vdev_top);
 		if (vd->vdev_top->vdev_ops == &vdev_raidz_ops)
 			ms_free /= vd->vdev_top->vdev_children;
 		/*
 		 * Convert the metaslab range to a physical range
 		 * on our vdev. We use this to determine if we are
 		 * in the middle of this metaslab range.
 		 */
-		range_seg64_t logical_rs, physical_rs;
+		range_seg64_t logical_rs, physical_rs, remain_rs;
 		logical_rs.rs_start = msp->ms_start;
 		logical_rs.rs_end = msp->ms_start + msp->ms_size;
 		vdev_xlate(vd, &logical_rs, &physical_rs);
 		/* Metaslab space after this offset has not been trimmed. */
 		vdev_xlate(vd, &logical_rs, &physical_rs, &remain_rs);
 		if (vd->vdev_trim_last_offset <= physical_rs.rs_start) {
 			vd->vdev_trim_bytes_est += ms_free;
 			mutex_exit(&msp->ms_lock);
 			continue;
-		} else if (vd->vdev_trim_last_offset > physical_rs.rs_end) {
+		}
 		/* Metaslab space before this offset has been trimmed */
 		uint64_t last_rs_end = physical_rs.rs_end;
 		if (!vdev_xlate_is_empty(&remain_rs)) {
 			vdev_xlate_walk(vd, &remain_rs,
 			    vdev_trim_xlate_last_rs_end, &last_rs_end);
 		}
 		if (vd->vdev_trim_last_offset > last_rs_end) {
 			vd->vdev_trim_bytes_done += ms_free;
 			vd->vdev_trim_bytes_est += ms_free;
 			mutex_exit(&msp->ms_lock);
@ -659,21 +697,9 @@ vdev_trim_calculate_progress(vdev_t *vd)
 		    rs != NULL; rs = zfs_btree_next(bt, &idx, &idx)) {
 			logical_rs.rs_start = rs_get_start(rs, rt);
 			logical_rs.rs_end = rs_get_end(rs, rt);
 			vdev_xlate(vd, &logical_rs, &physical_rs);
-			uint64_t size = physical_rs.rs_end -
+			vdev_xlate_walk(vd, &logical_rs,
-			    physical_rs.rs_start;
+			    vdev_trim_xlate_progress, vd);
 			vd->vdev_trim_bytes_est += size;
 			if (vd->vdev_trim_last_offset >= physical_rs.rs_end) {
 				vd->vdev_trim_bytes_done += size;
 			} else if (vd->vdev_trim_last_offset >
 			    physical_rs.rs_start &&
 			    vd->vdev_trim_last_offset <=
 			    physical_rs.rs_end) {
 				vd->vdev_trim_bytes_done +=
 				    vd->vdev_trim_last_offset -
 				    physical_rs.rs_start;
 			}
 		}
 		mutex_exit(&msp->ms_lock);
 	}
@ -741,8 +767,38 @@ vdev_trim_load(vdev_t *vd)
 	return (err);
 }
 static void
 vdev_trim_xlate_range_add(void *arg, range_seg64_t *physical_rs)
 {
 	trim_args_t *ta = arg;
 	vdev_t *vd = ta->trim_vdev;
 	/*
- * Convert the logical range into a physical range and add it to the
+	 * Only a manual trim will be traversing the vdev sequentially.
 	 * For an auto trim all valid ranges should be added.
 	 */
 	if (ta->trim_type == TRIM_TYPE_MANUAL) {
 		/* Only add segments that we have not visited yet */
 		if (physical_rs->rs_end <= vd->vdev_trim_last_offset)
 			return;
 		/* Pick up where we left off mid-range. */
 		if (vd->vdev_trim_last_offset > physical_rs->rs_start) {
 			ASSERT3U(physical_rs->rs_end, >,
 			    vd->vdev_trim_last_offset);
 			physical_rs->rs_start = vd->vdev_trim_last_offset;
 		}
 	}
 	ASSERT3U(physical_rs->rs_end, >, physical_rs->rs_start);
 	range_tree_add(ta->trim_tree, physical_rs->rs_start,
 	    physical_rs->rs_end - physical_rs->rs_start);
 }
 /*
 * Convert the logical range into physical ranges and add them to the
 * range tree passed in the trim_args_t.
 */
 static void
@ -750,7 +806,7 @@ vdev_trim_range_add(void *arg, uint64_t start, uint64_t size)
 {
 	trim_args_t *ta = arg;
 	vdev_t *vd = ta->trim_vdev;
-	range_seg64_t logical_rs, physical_rs;
+	range_seg64_t logical_rs;
 	logical_rs.rs_start = start;
 	logical_rs.rs_end = start + size;
@ -767,44 +823,7 @@ vdev_trim_range_add(void *arg, uint64_t start, uint64_t size)
 	}
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
-	vdev_xlate(vd, &logical_rs, &physical_rs);
+	vdev_xlate_walk(vd, &logical_rs, vdev_trim_xlate_range_add, arg);
 	IMPLY(vd->vdev_top == vd,
 	    logical_rs.rs_start == physical_rs.rs_start);
 	IMPLY(vd->vdev_top == vd,
 	    logical_rs.rs_end == physical_rs.rs_end);
 	/*
 	 * Only a manual trim will be traversing the vdev sequentially.
 	 * For an auto trim all valid ranges should be added.
 	 */
 	if (ta->trim_type == TRIM_TYPE_MANUAL) {
 		/* Only add segments that we have not visited yet */
 		if (physical_rs.rs_end <= vd->vdev_trim_last_offset)
 			return;
 		/* Pick up where we left off mid-range. */
 		if (vd->vdev_trim_last_offset > physical_rs.rs_start) {
 			ASSERT3U(physical_rs.rs_end, >,
 			    vd->vdev_trim_last_offset);
 			physical_rs.rs_start = vd->vdev_trim_last_offset;
 		}
 	}
 	ASSERT3U(physical_rs.rs_end, >=, physical_rs.rs_start);
 	/*
 	 * With raidz, it's possible that the logical range does not live on
 	 * this leaf vdev. We only add the physical range to this vdev's if it
 	 * has a length greater than 0.
 	 */
 	if (physical_rs.rs_end > physical_rs.rs_start) {
 		range_tree_add(ta->trim_tree, physical_rs.rs_start,
 		    physical_rs.rs_end - physical_rs.rs_start);
 	} else {
 		ASSERT3U(physical_rs.rs_end, ==, physical_rs.rs_start);
 	}
 }
 /*
--- a/module/zfs/zfs_fm.c
+++ b/module/zfs/zfs_fm.c
@ -1111,7 +1111,9 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,
 		bcopy(info, report->zcr_ckinfo, sizeof (*info));
 	}
-	report->zcr_align = 1ULL << vd->vdev_top->vdev_ashift;
+	report->zcr_sector = 1ULL << vd->vdev_top->vdev_ashift;
 	report->zcr_align =
 	    vdev_psize_to_asize(vd->vdev_top, report->zcr_sector);
 	report->zcr_length = length;
 #ifdef _KERNEL
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@ -1702,16 +1702,16 @@ zio_write_compress(zio_t *zio)
 			return (zio);
 		} else {
 			/*
-			 * Round up compressed size up to the ashift
+			 * Round compressed size up to the minimum allocation
-			 * of the smallest-ashift device, and zero the tail.
+			 * size of the smallest-ashift device, and zero the
-			 * This ensures that the compressed size of the BP
+			 * tail. This ensures that the compressed size of the
-			 * (and thus compressratio property) are correct,
+			 * BP (and thus compressratio property) are correct,
 			 * in that we charge for the padding used to fill out
 			 * the last sector.
 			 */
-			ASSERT3U(spa->spa_min_ashift, >=, SPA_MINBLOCKSHIFT);
+			ASSERT3U(spa->spa_min_alloc, >=, SPA_MINBLOCKSHIFT);
-			size_t rounded = (size_t)P2ROUNDUP(psize,
+			size_t rounded = (size_t)roundup(psize,
-			    1ULL << spa->spa_min_ashift);
+			    spa->spa_min_alloc);
 			if (rounded >= lsize) {
 				compress = ZIO_COMPRESS_OFF;
 				zio_buf_free(cbuf, lsize);
@ -3754,19 +3754,37 @@ zio_vdev_io_start(zio_t *zio)
 	 * However, indirect vdevs point off to other vdevs which may have
 	 * DTL's, so we never bypass them.  The child i/os on concrete vdevs
 	 * will be properly bypassed instead.
 	 *
 	 * Leaf DTL_PARTIAL can be empty when a legitimate write comes from
 	 * a dRAID spare vdev. For example, when a dRAID spare is first
 	 * used, its spare blocks need to be written to but the leaf vdev's
 	 * of such blocks can have empty DTL_PARTIAL.
 	 *
 	 * There seemed no clean way to allow such writes while bypassing
 	 * spurious ones. At this point, just avoid all bypassing for dRAID
 	 * for correctness.
 	 */
 	if ((zio->io_flags & ZIO_FLAG_IO_REPAIR) &&
 	    !(zio->io_flags & ZIO_FLAG_SELF_HEAL) &&
 	    zio->io_txg != 0 &&	/* not a delegated i/o */
 	    vd->vdev_ops != &vdev_indirect_ops &&
 	    vd->vdev_top->vdev_ops != &vdev_draid_ops &&
 	    !vdev_dtl_contains(vd, DTL_PARTIAL, zio->io_txg, 1)) {
 		ASSERT(zio->io_type == ZIO_TYPE_WRITE);
 		zio_vdev_io_bypass(zio);
 		return (zio);
 	}
-	if (vd->vdev_ops->vdev_op_leaf && (zio->io_type == ZIO_TYPE_READ ||
+	/*
-	    zio->io_type == ZIO_TYPE_WRITE || zio->io_type == ZIO_TYPE_TRIM)) {
+	 * Select the next best leaf I/O to process.  Distributed spares are
 	 * excluded since they dispatch the I/O directly to a leaf vdev after
 	 * applying the dRAID mapping.
 	 */
 	if (vd->vdev_ops->vdev_op_leaf &&
 	    vd->vdev_ops != &vdev_draid_spare_ops &&
 	    (zio->io_type == ZIO_TYPE_READ ||
 	    zio->io_type == ZIO_TYPE_WRITE ||
 	    zio->io_type == ZIO_TYPE_TRIM)) {
 		if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio))
 			return (zio);
@ -3803,8 +3821,8 @@ zio_vdev_io_done(zio_t *zio)
 	if (zio->io_delay)
 		zio->io_delay = gethrtime() - zio->io_delay;
-	if (vd != NULL && vd->vdev_ops->vdev_op_leaf) {
+	if (vd != NULL && vd->vdev_ops->vdev_op_leaf &&
-
+	    vd->vdev_ops != &vdev_draid_spare_ops) {
 		vdev_queue_io_done(zio);
 		if (zio->io_type == ZIO_TYPE_WRITE)
@ -4206,7 +4224,7 @@ zio_checksum_verify(zio_t *zio)
 		if (zio->io_prop.zp_checksum == ZIO_CHECKSUM_OFF)
 			return (zio);
-		ASSERT(zio->io_prop.zp_checksum == ZIO_CHECKSUM_LABEL);
+		ASSERT3U(zio->io_prop.zp_checksum, ==, ZIO_CHECKSUM_LABEL);
 	}
 	if ((error = zio_checksum_error(zio, &info)) != 0) {
--- a/module/zfs/zio_inject.c
+++ b/module/zfs/zio_inject.c
@ -265,6 +265,12 @@ zio_handle_fault_injection(zio_t *zio, int error)
 	if (zio->io_type != ZIO_TYPE_READ)
 		return (0);
 	/*
 	 * A rebuild I/O has no checksum to verify.
 	 */
 	if (zio->io_priority == ZIO_PRIORITY_REBUILD && error == ECKSUM)
 		return (0);
 	rw_enter(&inject_lock, RW_READER);
 	for (handler = list_head(&inject_handlers); handler != NULL;
--- a/scripts/Makefile.am
+++ b/scripts/Makefile.am
@ -36,6 +36,7 @@ export ZPOOL_SCRIPT_DIR=$$CMD_DIR/zpool/zpool.d
 export ZPOOL_SCRIPTS_PATH=$$CMD_DIR/zpool/zpool.d
 export CONTRIB_DIR=@abs_top_builddir@/contrib
 export LIB_DIR=@abs_top_builddir@/lib
 export SYSCONF_DIR=@abs_top_builddir@/etc
 export INSTALL_UDEV_DIR=@udevdir@
 export INSTALL_UDEV_RULE_DIR=@udevruledir@
--- a/scripts/zfs-helpers.sh
+++ b/scripts/zfs-helpers.sh
@ -166,6 +166,8 @@ if [ "${INSTALL}" = "yes" ]; then
 	    "$INSTALL_UDEV_RULE_DIR/90-zfs.rules"
 	install "$CMD_DIR/zpool/zpool.d" \
 	    "$INSTALL_SYSCONF_DIR/zfs/zpool.d"
 	install "$SYSCONF_DIR/zfs/draid.d" \
 	    "$INSTALL_SYSCONF_DIR/zfs/draid.d"
 	install "$CONTRIB_DIR/pyzfs/libzfs_core" \
 	    "$INSTALL_PYTHON_DIR/libzfs_core"
 	# Ideally we would install these in the configured ${libdir}, which is
@ -185,6 +187,7 @@ else
 	remove "$INSTALL_UDEV_RULE_DIR/69-vdev.rules"
 	remove "$INSTALL_UDEV_RULE_DIR/90-zfs.rules"
 	remove "$INSTALL_SYSCONF_DIR/zfs/zpool.d"
 	remove "$INSTALL_SYSCONF_DIR/zfs/draid.d"
 	remove "$INSTALL_PYTHON_DIR/libzfs_core"
 	remove "/lib/libzfs_core.so"
 	remove "/lib/libnvpair.so"
--- a/scripts/zloop.sh
+++ b/scripts/zloop.sh
@ -18,6 +18,7 @@
 #
 # Copyright (c) 2015 by Delphix. All rights reserved.
 # Copyright (C) 2016 Lawrence Livermore National Security, LLC.
 # Copyright (c) 2017, Intel Corporation.
 #
 BASE_DIR=$(dirname "$0")
@ -246,27 +247,60 @@ while [[ $timeout -eq 0 ]] || [[ $curtime -le $((starttime + timeout)) ]]; do
 	or_die rm -rf "$workdir"
 	or_die mkdir "$workdir"
-	# switch between common arrangements & fully randomized
+	# switch between three types of configs
-	if [[ $((RANDOM % 2)) -eq 0 ]]; then
+	# 1/3 basic, 1/3 raidz mix, and 1/3 draid mix
-		mirrors=2
+	choice=$((RANDOM % 3))
-		raidz=0
+
-		parity=1
+	# ashift range 9 - 15
 		vdevs=2
 	else
 		mirrors=$(((RANDOM % 3) * 1))
 		parity=$(((RANDOM % 3) + 1))
 		raidz=$((((RANDOM % 9) + parity + 1) * (RANDOM % 2)))
 		vdevs=$(((RANDOM % 3) + 3))
 	fi
 	align=$(((RANDOM % 2) * 3 + 9))
-	runtime=$((RANDOM % 100))
+
 	# randomly use special classes
 	class="special=random"
 	if [[ $choice -eq 0 ]]; then
 		# basic mirror only
 		parity=1
 		mirrors=2
 		draid_data=0
 		draid_spares=0
 		raid_children=0
 		vdevs=2
 		raid_type="raidz"
 	elif [[ $choice -eq 1 ]]; then
 		# fully randomized mirror/raidz (sans dRAID)
 		parity=$(((RANDOM % 3) + 1))
 		mirrors=$(((RANDOM % 3) * 1))
 		draid_data=0
 		draid_spares=0
 		raid_children=$((((RANDOM % 9) + parity + 1) * (RANDOM % 2)))
 		vdevs=$(((RANDOM % 3) + 3))
 		raid_type="raidz"
 	else
 		# fully randomized dRAID (sans mirror/raidz)
 		parity=$(((RANDOM % 3) + 1))
 		mirrors=0
 		draid_data=$(((RANDOM % 8) + 3))
 		draid_spares=$(((RANDOM % 2) + parity))
 		stripe=$((draid_data + parity))
 		extra=$((draid_spares + (RANDOM % 4)))
 		raid_children=$(((((RANDOM % 4) + 1) * stripe) + extra))
 		vdevs=$((RANDOM % 3))
 		raid_type="draid"
 	fi
 	# run from 30 to 120 seconds
 	runtime=$(((RANDOM % 90) + 30))
 	passtime=$((RANDOM % (runtime / 3 + 1) + 10))
 	zopt="$zopt -K $raid_type"
 	zopt="$zopt -m $mirrors"
-	zopt="$zopt -r $raidz"
+	zopt="$zopt -r $raid_children"
 	zopt="$zopt -D $draid_data"
 	zopt="$zopt -S $draid_spares"
 	zopt="$zopt -R $parity"
 	zopt="$zopt -v $vdevs"
 	zopt="$zopt -a $align"
 	zopt="$zopt -C $class"
 	zopt="$zopt -T $runtime"
 	zopt="$zopt -P $passtime"
 	zopt="$zopt -s $size"
--- a/tests/runfiles/common.run
+++ b/tests/runfiles/common.run
@ -333,6 +333,8 @@ tests = ['zpool_create_001_pos', 'zpool_create_002_pos',
    'zpool_create_020_pos', 'zpool_create_021_pos', 'zpool_create_022_pos',
    'zpool_create_023_neg', 'zpool_create_024_pos',
    'zpool_create_encrypted', 'zpool_create_crypt_combos',
    'zpool_create_draid_001_pos', 'zpool_create_draid_002_pos',
    'zpool_create_draid_003_pos', 'zpool_create_draid_004_pos',
    'zpool_create_features_001_pos', 'zpool_create_features_002_pos',
    'zpool_create_features_003_pos', 'zpool_create_features_004_neg',
    'zpool_create_features_005_pos',
@ -375,7 +377,7 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos',
    'zpool_import_006_pos', 'zpool_import_007_pos', 'zpool_import_008_pos',
    'zpool_import_009_neg', 'zpool_import_010_pos', 'zpool_import_011_neg',
    'zpool_import_012_pos', 'zpool_import_013_neg', 'zpool_import_014_pos',
-    'zpool_import_015_pos',
+    'zpool_import_015_pos', 'zpool_import_016_pos', 'zpool_import_017_pos',
    'zpool_import_features_001_pos', 'zpool_import_features_002_neg',
    'zpool_import_features_003_pos', 'zpool_import_missing_001_pos',
    'zpool_import_missing_002_pos', 'zpool_import_missing_003_pos',
@ -710,12 +712,14 @@ tests = ['redacted_compressed', 'redacted_contents', 'redacted_deleted',
 tags = ['functional', 'redacted_send']
 [tests/functional/raidz]
-tests = ['raidz_001_neg', 'raidz_002_pos']
+tests = ['raidz_001_neg', 'raidz_002_pos', 'raidz_003_pos', 'raidz_004_pos']
 tags = ['functional', 'raidz']
 [tests/functional/redundancy]
-tests = ['redundancy_001_pos', 'redundancy_002_pos', 'redundancy_003_pos',
+tests = ['redundancy_draid1', 'redundancy_draid2', 'redundancy_draid3',
-    'redundancy_004_neg']
+    'redundancy_draid_spare1', 'redundancy_draid_spare2',
    'redundancy_draid_spare3', 'redundancy_mirror', 'redundancy_raidz1',
    'redundancy_raidz2', 'redundancy_raidz3', 'redundancy_stripe']
 tags = ['functional', 'redundancy']
 [tests/functional/refquota]
--- a/tests/test-runner/bin/zts-report.py.in
+++ b/tests/test-runner/bin/zts-report.py.in
@ -218,6 +218,7 @@ maybe = {
    'no_space/enospc_002_pos': ['FAIL', enospc_reason],
    'projectquota/setup': ['SKIP', exec_reason],
    'redundancy/redundancy_004_neg': ['FAIL', '7290'],
    'redundancy/redundancy_draid_spare3': ['SKIP', known_reason],
    'reservation/reservation_008_pos': ['FAIL', '7741'],
    'reservation/reservation_018_pos': ['FAIL', '5642'],
    'rsend/rsend_019_pos': ['FAIL', '6086'],
--- a/tests/zfs-tests/cmd/Makefile.am
+++ b/tests/zfs-tests/cmd/Makefile.am
@ -6,6 +6,7 @@ SUBDIRS = \
 	chg_usr_exec \
 	devname2devid \
 	dir_rd_update \
 	draid \
 	file_check \
 	file_trunc \
 	file_write \
--- a/tests/zfs-tests/cmd/draid/.gitignore
+++ b/tests/zfs-tests/cmd/draid/.gitignore
@ -0,0 +1 @@
 /draid
--- a/tests/zfs-tests/cmd/draid/Makefile.am
+++ b/tests/zfs-tests/cmd/draid/Makefile.am
@ -0,0 +1,15 @@
 include $(top_srcdir)/config/Rules.am
 pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin
 AM_CFLAGS += $(ZLIB_CFLAGS)
 pkgexec_PROGRAMS = draid
 draid_SOURCES = draid.c
 draid_LDADD = \
 	$(abs_top_builddir)/lib/libzpool/libzpool.la \
 	$(abs_top_builddir)/lib/libnvpair/libnvpair.la
 draid_LDADD += $(ZLIB_LIBS)
--- a/tests/zfs-tests/cmd/draid/draid.c
+++ b/tests/zfs-tests/cmd/draid/draid.c
--- a/tests/zfs-tests/include/commands.cfg
+++ b/tests/zfs-tests/include/commands.cfg
@ -197,6 +197,7 @@ export ZFSTEST_FILES='badsend
    chg_usr_exec
    devname2devid
    dir_rd_update
    draid
    file_check
    file_trunc
    file_write
--- a/tests/zfs-tests/include/libtest.shlib
+++ b/tests/zfs-tests/include/libtest.shlib
@ -2336,7 +2336,7 @@ function check_pool_status # pool token keyword <verbose>
 function is_pool_resilvering #pool <verbose>
 {
 	check_pool_status "$1" "scan" \
-	    "resilver[ ()0-9A-Za-z_-]* in progress since" $2
+	    "resilver[ ()0-9A-Za-z:_-]* in progress since" $2
 	return $?
 }
--- a/tests/zfs-tests/include/tunables.cfg
+++ b/tests/zfs-tests/include/tunables.cfg
@ -60,6 +60,7 @@ MULTIHOST_IMPORT_INTERVALS	multihost.import_intervals	zfs_multihost_import_inter
 MULTIHOST_INTERVAL		multihost.interval		zfs_multihost_interval
 OVERRIDE_ESTIMATE_RECORDSIZE	send.override_estimate_recordsize	zfs_override_estimate_recordsize
 PREFETCH_DISABLE		prefetch.disable		zfs_prefetch_disable
 REBUILD_SCRUB_ENABLED		rebuild_scrub_enabled		zfs_rebuild_scrub_enabled
 REMOVAL_SUSPEND_PROGRESS	removal_suspend_progress	zfs_removal_suspend_progress
 REMOVE_MAX_SEGMENT		remove_max_segment		zfs_remove_max_segment
 RESILVER_MIN_TIME_MS		resilver_min_time_ms		zfs_resilver_min_time_ms
--- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
@ -66,7 +66,8 @@ function setup_filesystem #disklist #pool #fs #mntpoint #type #vdev
 	if [[ $vdev != "" && \
 		$vdev != "mirror" && \
-		$vdev != "raidz" ]] ; then
+		$vdev != "raidz" && \
 		$vdev != "draid" ]] ; then
 		log_note "Wrong vdev: (\"$vdev\")"
 		return 1
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh
@ -55,23 +55,26 @@ log_assert "'zpool add <pool> <vdev> ...' can add devices to the pool."
 log_onexit cleanup
-set -A keywords "" "mirror" "raidz" "raidz1" "spare"
+set -A keywords "" "mirror" "raidz" "raidz1" "draid:1s" "draid1:1s" "spare"
 pooldevs="${DISK0} \
 	\"${DISK0} ${DISK1}\" \
 	\"${DISK0} ${DISK1} ${DISK2}\""
 mirrordevs="\"${DISK0} ${DISK1}\""
 raidzdevs="\"${DISK0} ${DISK1}\""
 draiddevs="\"${DISK0} ${DISK1} ${DISK2}\""
 disk0=$TEST_BASE_DIR/disk0
 disk1=$TEST_BASE_DIR/disk1
-truncate -s $MINVDEVSIZE $disk0 $disk1
+disk2=$TEST_BASE_DIR/disk2
 truncate -s $MINVDEVSIZE $disk0 $disk1 $disk2
 typeset -i i=0
 typeset vdev
 eval set -A poolarray $pooldevs
 eval set -A mirrorarray $mirrordevs
 eval set -A raidzarray $raidzdevs
 eval set -A draidarray $draiddevs
 while (( $i < ${#keywords[*]} )); do
@ -107,6 +110,19 @@ while (( $i < ${#keywords[*]} )); do
 			destroy_pool "$TESTPOOL"
 		done
 		;;
        draid:1s|draid1:1s)
 		for vdev in "${draidarray[@]}"; do
 			create_pool "$TESTPOOL" "${keywords[i]}" \
 				"$disk0" "$disk1" "$disk2"
 			log_must poolexists "$TESTPOOL"
 			log_must zpool add "$TESTPOOL" ${keywords[i]} $vdev
 			log_must vdevs_in_pool "$TESTPOOL" "$vdev"
 			log_must vdevs_in_pool "$TESTPOOL" "draid1-0-0"
 			log_must vdevs_in_pool "$TESTPOOL" "draid1-1-0"
 			destroy_pool "$TESTPOOL"
 		done
 		;;
        esac
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/Makefile.am
@ -27,6 +27,10 @@ dist_pkgdata_SCRIPTS = \
 	zpool_create_024_pos.ksh \
 	zpool_create_encrypted.ksh \
 	zpool_create_crypt_combos.ksh \
 	zpool_create_draid_001_pos.ksh \
 	zpool_create_draid_002_pos.ksh \
 	zpool_create_draid_003_pos.ksh \
 	zpool_create_draid_004_pos.ksh \
 	zpool_create_features_001_pos.ksh \
 	zpool_create_features_002_pos.ksh \
 	zpool_create_features_003_pos.ksh \
@ -36,5 +40,6 @@ dist_pkgdata_SCRIPTS = \
 	zpool_create_tempname.ksh
 dist_pkgdata_DATA = \
 	draidcfg.gz \
 	zpool_create.cfg \
 	zpool_create.shlib
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/draidcfg.gz
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/draidcfg.gz
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_001_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_001_pos.ksh
@ -64,14 +64,16 @@ pooldevs="${DISK0} \
 	\"${DISK0} ${DISK1}\" \
 	\"${DISK0} ${DISK1} ${DISK2}\" \
 	\"$disk1 $disk2\""
 raidzdevs="\"${DISK0} ${DISK1} ${DISK2}\""
 mirrordevs="\"${DISK0} ${DISK1}\" \
 	$raidzdevs \
 	\"$disk1 $disk2\""
 raidzdevs="\"${DISK0} ${DISK1} ${DISK2}\""
 draiddevs="\"${DISK0} ${DISK1} ${DISK2}\""
 create_pool_test "$TESTPOOL" "" "$pooldevs"
 create_pool_test "$TESTPOOL" "mirror" "$mirrordevs"
 create_pool_test "$TESTPOOL" "raidz" "$raidzdevs"
 create_pool_test "$TESTPOOL" "raidz1" "$raidzdevs"
 create_pool_test "$TESTPOOL" "draid" "$draiddevs"
 log_pass "'zpool create <pool> <vspec> ...' success."
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh
@ -54,7 +54,7 @@ log_assert "'zpool create [-R root][-m mountpoint] <pool> <vdev> ...' can create
 	"an alternate pool or a new pool mounted at the specified mountpoint."
 log_onexit cleanup
-set -A pooltype "" "mirror" "raidz" "raidz1" "raidz2"
+set -A pooltype "" "mirror" "raidz" "raidz1" "raidz2" "draid" "draid2"
 #
 # cleanup the pools created in previous case if zpool_create_004_pos timedout
@ -67,8 +67,8 @@ done
 rm -rf $TESTDIR
 log_must mkdir -p $TESTDIR
 typeset -i i=1
-while (( i < 4 )); do
+while (( i < 5 )); do
-	log_must mkfile $FILESIZE $TESTDIR/file.$i
+	log_must truncate -s $FILESIZE $TESTDIR/file.$i
 	(( i = i + 1 ))
 done
@ -87,7 +87,7 @@ do
 			log_must zpool destroy -f $TESTPOOL
 		[[ -d $TESTDIR1 ]] && rm -rf $TESTDIR1
 		log_must zpool create $opt $TESTPOOL ${pooltype[i]} \
-			$file.1 $file.2 $file.3
+			$file.1 $file.2 $file.3 $file.4
 		! poolexists $TESTPOOL && \
 			log_fail "Creating pool with $opt fails."
 		mpt=`zfs mount | egrep "^$TESTPOOL[^/]" | awk '{print $2}'`
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_006_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_006_pos.ksh
@ -97,6 +97,20 @@ set -A valid_args \
 	"raidz2 $vdev0 $vdev1 $vdev2 spare $vdev3 raidz2 $vdev4 $vdev5 $vdev6" \
 	"raidz3 $vdev0 $vdev1 $vdev2 $vdev3 \
 		mirror $vdev4 $vdev5 $vdev6 $vdev7" \
 	"draid $vdev0 $vdev1 $vdev2 mirror $vdev3 $vdev4" \
 	"draid $vdev0 $vdev1 $vdev2 raidz1 $vdev3 $vdev4 $vdev5" \
 	"draid $vdev0 $vdev1 $vdev2 draid1 $vdev3 $vdev4 $vdev5" \
 	"draid $vdev0 $vdev1 $vdev2 special mirror $vdev3 $vdev4" \
 	"draid2 $vdev0 $vdev1 $vdev2 $vdev3 mirror $vdev4 $vdev5 $vdev6" \
 	"draid2 $vdev0 $vdev1 $vdev2 $vdev3 raidz2 $vdev4 $vdev5 $vdev6" \
 	"draid2 $vdev0 $vdev1 $vdev2 $vdev3 draid2 $vdev4 $vdev5 $vdev6 $vdev7"\
 	"draid2 $vdev0 $vdev1 $vdev2 $vdev3 \
 		special mirror $vdev4 $vdev5 $vdev6" \
 	"draid2 $vdev0 $vdev1 $vdev2 $vdev3 \
 		special mirror $vdev4 $vdev5 $vdev6 \
 		cache $vdev7 log mirror $vdev8 $vdev9" \
 	"draid $vdev0 $vdev1 $vdev2 draid $vdev4 $vdev5 $vdev6 $vdev7 \
 		special mirror $vdev8 $vdev9" \
 	"spare $vdev0 $vdev1 $vdev2 mirror $vdev3 $vdev4 raidz $vdev5 $vdev6"
 set -A forced_args \
@ -109,11 +123,19 @@ set -A forced_args \
 	"raidz $vdev0 $vdev1 raidz2 $vdev2 $vdev3 $vdev4" \
 	"raidz $vdev0 $vdev1 raidz2 $vdev2 $vdev3 $vdev4 spare $vdev5" \
 	"raidz $vdev0 $vdev1 spare $vdev2 raidz2 $vdev3 $vdev4 $vdev5" \
 	"raidz $vdev0 $vdev1 draid2 $vdev2 $vdev3 $vdev4 $vdev5" \
 	"raidz $vdev0 $vdev1 draid3 $vdev2 $vdev3 $vdev4 $vdev5 $vdev6" \
 	"mirror $vdev0 $vdev1 raidz $vdev2 $vdev3 raidz2 $vdev4 $vdev5 $vdev6" \
 	"mirror $vdev0 $vdev1 raidz $vdev2 $vdev3 \
 		raidz2 $vdev4 $vdev5 $vdev6 spare $vdev7" \
 	"mirror $vdev0 $vdev1 raidz $vdev2 $vdev3 \
 		spare $vdev4 raidz2 $vdev5 $vdev6 $vdev7" \
 	"mirror $vdev0 $vdev1 draid $vdev2 $vdev3 $vdev4 \
 		draid2 $vdev5 $vdev6 $vdev7 $vdev8 spare $vdev9" \
 	"draid $vdev0 $vdev1 $vdev2 $vdev3 \
 		draid2 $vdev4 $vdev5 $vdev6 $vdev7 $vdev8" \
 	"draid $vdev0 $vdev1 $vdev2 draid $vdev4 $vdev5 $vdev6 \
 		special mirror $vdev7 $vdev8 $vdev9" \
 	"spare $vdev0 $vdev1 $vdev2 mirror $vdev3 $vdev4 \
 		raidz2 $vdev5 $vdev6 $vdev7"
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_007_neg.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_007_neg.ksh
@ -54,13 +54,16 @@ set -A args  "" "-?" "-n" "-f" "-nf" "-fn" "-f -n" "--f" "-e" "-s" \
 	"$TESTPOOL c0txd0" "$TESTPOOL c0t0dx" "$TESTPOOL cxtxdx" \
 	"$TESTPOOL mirror" "$TESTPOOL raidz" "$TESTPOOL mirror raidz" \
 	"$TESTPOOL raidz1" "$TESTPOOL mirror raidz1" \
 	"$TESTPOOL draid1" "$TESTPOOL mirror draid1" \
 	"$TESTPOOL mirror c?t?d?" "$TESTPOOL mirror $DISK0 c0t1d?" \
 	"$TESTPOOL RAIDZ $DISK0 $DISK1" \
 	"$TESTPOOL $DISK0 log $DISK1 log $DISK2" \
 	"$TESTPOOL $DISK0 spare $DISK1 spare $DISK2" \
-	"$TESTPOOL RAIDZ1 $DISK0 $DISK1" \
+	"$TESTPOOL RAIDZ1 $DISK0 $DISK1" "$TESTPOOL MIRROR $DISK0" \
-	"$TESTPOOL MIRROR $DISK0" "$TESTPOOL raidz $DISK0" \
+	"$TESTPOOL DRAID $DISK1 $DISK2 $DISK3" "$TESTPOOL raidz $DISK0" \
-	"$TESTPOOL raidz1 $DISK0" \
+	"$TESTPOOL raidz1 $DISK0" "$TESTPOOL draid $DISK0" \
 	"$TESTPOOL draid2 $DISK0 $DISK1" \
 	"$TESTPOOL draid $DISK0 $DISK1 $DISK2 spare s0-draid1-0" \
 	"1tank $DISK0" "1234 $DISK0" "?tank $DISK0" \
 	"tan%k $DISK0" "ta@# $DISK0" "tan+k $DISK0" \
 	"$BYND_MAX_NAME $DISK0"
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_009_neg.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_009_neg.ksh
@ -63,7 +63,7 @@ log_onexit cleanup
 unset NOINUSE_CHECK
 typeset opt
-for opt in "" "mirror" "raidz" "raidz1"; do
+for opt in "" "mirror" "raidz" "draid"; do
 	if [[ $opt == "" ]]; then
 		typeset disks=$DISK0
 	else
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_010_neg.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_010_neg.ksh
@ -63,15 +63,16 @@ log_must zfs create $TESTPOOL/$TESTFS
 log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS
 typeset -l devsize=$(($SPA_MINDEVSIZE - 1024 * 1024))
-for files in $TESTDIR/file1 $TESTDIR/file2
+for files in $TESTDIR/file1 $TESTDIR/file2 $TESTDIR/file3
 do
-	log_must mkfile $devsize $files
+	log_must truncate -s $devsize $files
 done
 set -A args \
 	"$TOOSMALL $TESTDIR/file1" "$TESTPOOL1 $TESTDIR/file1 $TESTDIR/file2" \
        "$TOOSMALL mirror $TESTDIR/file1 $TESTDIR/file2" \
-	"$TOOSMALL raidz $TESTDIR/file1 $TESTDIR/file2"
+	"$TOOSMALL raidz $TESTDIR/file1 $TESTDIR/file2" \
 	"$TOOSMALL draid $TESTDIR/file1 $TESTDIR/file2 $TESTDIR/file3"
 typeset -i i=0
 while [[ $i -lt ${#args[*]} ]]; do
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_011_neg.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_011_neg.ksh
@ -54,7 +54,7 @@ function cleanup
 		destroy_pool $pool
 	done
-	rm -rf $disk1 $disk2 $disk3
+	rm -rf $disk1 $disk2 $disk3 $disk4
 	if [[ -n $saved_dump_dev ]]; then
 		log_must dumpadm -u -d $saved_dump_dev
@ -66,12 +66,16 @@ log_onexit cleanup
 disk1=$(create_blockfile $FILESIZE)
 disk2=$(create_blockfile $FILESIZE)
-disk3=$(create_blockfile $FILESIZE1)
+disk3=$(create_blockfile $FILESIZE)
 disk4=$(create_blockfile $FILESIZE1)
 mirror1="$DISK0 $DISK1"
 mirror2="$disk1 $disk2"
 raidz1=$mirror1
 raidz2=$mirror2
-diff_size_dev="$disk2 $disk3"
+draid1="$DISK0 $DISK1 $DISK2"
 draid2="$disk1 $disk2 $disk3"
 diff_size_dev="$disk2 $disk4"
 draid_diff_size_dev="$disk1 $disk2 $disk4"
 vfstab_dev=$(find_vfstab_dev)
 if is_illumos; then
@ -91,13 +95,17 @@ set -A arg \
 	"$TESTPOOL1 mirror mirror $mirror1 mirror $mirror2" \
 	"$TESTPOOL1 raidz raidz $raidz1 raidz $raidz2" \
 	"$TESTPOOL1 raidz1 raidz1 $raidz1 raidz1 $raidz2" \
 	"$TESTPOOL1 draid draid $draid draid $draid2" \
 	"$TESTPOOL1 mirror raidz $raidz1 raidz $raidz2" \
 	"$TESTPOOL1 mirror raidz1 $raidz1 raidz1 $raidz2" \
 	"$TESTPOOL1 mirror draid $draid1 draid $draid2" \
 	"$TESTPOOL1 raidz mirror $mirror1 mirror $mirror2" \
 	"$TESTPOOL1 raidz1 mirror $mirror1 mirror $mirror2" \
 	"$TESTPOOL1 draid1 mirror $mirror1 mirror $mirror2" \
 	"$TESTPOOL1 mirror $diff_size_dev" \
 	"$TESTPOOL1 raidz $diff_size_dev" \
 	"$TESTPOOL1 raidz1 $diff_size_dev" \
 	"$TESTPOOL1 draid1 $draid_diff_size_dev" \
 	"$TESTPOOL1 mirror $mirror1 spare $mirror2 spare $diff_size_dev" \
 	"$TESTPOOL1 $vfstab_dev" \
 	"$TESTPOOL1 ${DISK0}s10" \
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_001_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_001_pos.ksh
@ -0,0 +1,75 @@
 #!/bin/ksh -p
 #
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
 # Common Development and Distribution License (the "License").
 # You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
 # See the License for the specific language governing permissions
 # and limitations under the License.
 #
 # When distributing Covered Code, include this CDDL HEADER in each
 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 # If applicable, add the following below this CDDL HEADER, with the
 # fields enclosed by brackets "[]" replaced with your own identifying
 # information: Portions Copyright [yyyy] [name of copyright owner]
 #
 # CDDL HEADER END
 #
 #
 # Copyright (c) 2020 Lawrence Livermore National Security, LLC.
 . $STF_SUITE/include/libtest.shlib
 #
 # DESCRIPTION:
 # Create a variety of dRAID pools using the minimal dRAID vdev syntax.
 #
 # STRATEGY:
 # 1) Create the required number of allowed dRAID vdevs.
 # 2) Create few pools of various sizes using the draid1|draid2|draid3 syntax.
 #
 verify_runnable "global"
 function cleanup
 {
 	poolexists $TESTPOOL && destroy_pool $TESTPOOL
 	rm -f $all_vdevs
 	rmdir $TESTDIR
 }
 log_assert "'zpool create <pool> <draid1|2|3> ...' can create a pool."
 log_onexit cleanup
 all_vdevs=$(echo $TESTDIR/file.{01..84})
 mkdir $TESTDIR
 log_must truncate -s $MINVDEVSIZE $all_vdevs
 # Verify all configurations up to 24 vdevs.
 for parity in {1..3}; do
 	for children in {$((parity + 2))..24}; do
 		vdevs=$(echo $TESTDIR/file.{01..${children}})
 		log_must zpool create $TESTPOOL draid$parity $vdevs
 		log_must poolexists $TESTPOOL
 		destroy_pool $TESTPOOL
 	done
 done
 # Spot check a few large configurations.
 children_counts="53 84"
 for children in $children_counts; do
 	vdevs=$(echo $TESTDIR/file.{01..${children}})
 	log_must zpool create $TESTPOOL draid $vdevs
 	log_must poolexists $TESTPOOL
 	destroy_pool $TESTPOOL
 done
 log_pass "'zpool create <pool> <draid1|2|3> <vdevs> ...' success."
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_002_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_002_pos.ksh
@ -0,0 +1,82 @@
 #!/bin/ksh -p
 #
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
 # Common Development and Distribution License (the "License").
 # You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
 # See the License for the specific language governing permissions
 # and limitations under the License.
 #
 # When distributing Covered Code, include this CDDL HEADER in each
 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 # If applicable, add the following below this CDDL HEADER, with the
 # fields enclosed by brackets "[]" replaced with your own identifying
 # information: Portions Copyright [yyyy] [name of copyright owner]
 #
 # CDDL HEADER END
 #
 #
 # Copyright (c) 2020 Lawrence Livermore National Security, LLC.
 . $STF_SUITE/include/libtest.shlib
 #
 # DESCRIPTION:
 # Create dRAID pool using the maximum number of vdevs (255).  Then verify
 # that creating a pool with 256 fails as expected.
 #
 # STRATEGY:
 # 1) Verify a pool with fewer than the required vdevs fails.
 # 2) Verify pools with a valid number of vdevs succeed.
 # 3) Verify a pool which exceeds the maximum number of vdevs fails.
 #
 verify_runnable "global"
 function cleanup
 {
 	poolexists $TESTPOOL && destroy_pool $TESTPOOL
 	rm -f $all_vdevs
 	rmdir $TESTDIR
 }
 log_assert "'zpool create <pool> draid <vdevs>'"
 log_onexit cleanup
 all_vdevs=$(echo $TESTDIR/file.{01..256})
 mkdir $TESTDIR
 log_must truncate -s $MINVDEVSIZE $all_vdevs
 # Below maximum dRAID vdev count for specified parity level.
 log_mustnot zpool create $TESTPOOL draid1 $(echo $TESTDIR/file.{01..01})
 log_mustnot zpool create $TESTPOOL draid2 $(echo $TESTDIR/file.{01..02})
 log_mustnot zpool create $TESTPOOL draid3 $(echo $TESTDIR/file.{01..03})
 # Verify pool sizes from 2-10.  Values in between are skipped to speed
 # up the test case but will be exercised by the random pool creation
 # done in zpool_create_draid_002_pos.ksh.
 for (( i=2; i<=10; i++ )); do
 	log_must zpool create $TESTPOOL draid:${i}c \
 	    $(echo $TESTDIR/file.{01..$i})
 	log_must destroy_pool $TESTPOOL
 done
 # Verify pool sizes from 254-255.
 for (( i=254; i<=255; i++ )); do
 	log_must zpool create $TESTPOOL draid:${i}c \
 	    $(echo $TESTDIR/file.{01..$i})
 	log_must destroy_pool $TESTPOOL
 done
 # Exceeds maximum dRAID vdev count (256).
 log_mustnot zpool create $TESTPOOL draid $(echo $TESTDIR/file.{01..256})
 log_pass "'zpool create <pool> draid <vdevs>'"
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_003_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_003_pos.ksh
@ -0,0 +1,112 @@
 #!/bin/ksh -p
 #
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
 # Common Development and Distribution License (the "License").
 # You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
 # See the License for the specific language governing permissions
 # and limitations under the License.
 #
 # When distributing Covered Code, include this CDDL HEADER in each
 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 # If applicable, add the following below this CDDL HEADER, with the
 # fields enclosed by brackets "[]" replaced with your own identifying
 # information: Portions Copyright [yyyy] [name of copyright owner]
 #
 # CDDL HEADER END
 #
 #
 # Copyright (c) 2020 Lawrence Livermore National Security, LLC.
 . $STF_SUITE/include/libtest.shlib
 #
 # DESCRIPTION:
 # Verify allowed striped widths (data+parity) and hot spares may be
 # configured at pool creation time.
 #
 # STRATEGY:
 # 1) Test valid stripe/spare combinations given the number of children.
 # 2) Test invalid stripe/spare/children combinations outside the allow limits.
 #
 verify_runnable "global"
 function cleanup
 {
 	poolexists $TESTPOOL && destroy_pool $TESTPOOL
 	rm -f $draid_vdevs
 	rmdir $TESTDIR
 }
 log_assert "'zpool create <pool> draid:#d:#c:#s <vdevs>'"
 log_onexit cleanup
 mkdir $TESTDIR
 # Generate 10 random valid configurations to test.
 for (( i=0; i<10; i++ )); do
 	parity=$(random_int_between 1 3)
 	spares=$(random_int_between 0 3)
 	data=$(random_int_between 1 16)
 	(( min_children = (data + parity + spares) ))
 	children=$(random_int_between $min_children 32)
 	draid="draid${parity}:${data}d:${children}c:${spares}s"
 	draid_vdevs=$(echo $TESTDIR/file.{01..$children})
 	log_must truncate -s $MINVDEVSIZE $draid_vdevs
 	log_must zpool create $TESTPOOL $draid $draid_vdevs
 	log_must poolexists $TESTPOOL
 	destroy_pool $TESTPOOL
 	rm -f $draid_vdevs
 done
 children=32
 draid_vdevs=$(echo $TESTDIR/file.{01..$children})
 log_must truncate -s $MINVDEVSIZE $draid_vdevs
 mkdir $TESTDIR
 log_must truncate -s $MINVDEVSIZE $draid_vdevs
 # Out of order and unknown suffixes should fail.
 log_mustnot zpool create $TESTPOOL draid:d8 $draid_vdevs
 log_mustnot zpool create $TESTPOOL draid:s3 $draid_vdevs
 log_mustnot zpool create $TESTPOOL draid:c32 $draid_vdevs
 log_mustnot zpool create $TESTPOOL draid:10x $draid_vdevs
 log_mustnot zpool create $TESTPOOL draid:x10 $draid_vdevs
 # Exceeds maximum data disks (limited by total children)
 log_must zpool create $TESTPOOL draid2:30d $draid_vdevs
 log_must destroy_pool $TESTPOOL
 log_mustnot zpool create $TESTPOOL draid2:31d $draid_vdevs
 # At least one data disk must be requested.
 log_mustnot zpool create $TESTPOOL draid2:0d $draid_vdevs
 # Check invalid parity levels.
 log_mustnot zpool create $TESTPOOL draid0 $draid_vdevs
 log_mustnot zpool create $TESTPOOL draid4 $draid_vdevs
 # Spares are limited: spares < children - (parity + data).
 log_must zpool create $TESTPOOL draid2:20d:10s $draid_vdevs
 log_must destroy_pool $TESTPOOL
 log_mustnot zpool create $TESTPOOL draid2:20d:11s $draid_vdevs
 # The required children argument is enforced.
 log_mustnot zpool create $TESTPOOL draid2:0c $draid_vdevs
 log_mustnot zpool create $TESTPOOL draid2:31c $draid_vdevs
 log_must zpool create $TESTPOOL draid2:32c $draid_vdevs
 destroy_pool $TESTPOOL
 log_pass "'zpool create <pool> draid:#d:#c:#s <vdevs>'"
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_004_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_004_pos.ksh
@ -0,0 +1,43 @@
 #!/bin/ksh -p
 #
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
 # Common Development and Distribution License (the "License").
 # You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
 # See the License for the specific language governing permissions
 # and limitations under the License.
 #
 # When distributing Covered Code, include this CDDL HEADER in each
 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 # If applicable, add the following below this CDDL HEADER, with the
 # fields enclosed by brackets "[]" replaced with your own identifying
 # information: Portions Copyright [yyyy] [name of copyright owner]
 #
 # CDDL HEADER END
 #
 #
 # Copyright (c) 2020 Lawrence Livermore National Security, LLC.
 #
 . $STF_SUITE/include/libtest.shlib
 #
 # DESCRIPTION:
 # Verify generated dRAID permutation maps against the authoritative
 # reference file contains the full permutations.
 #
 verify_runnable "global"
 log_assert "'draid verify'"
 DRAIDCFG="$STF_SUITE/tests/functional/cli_root/zpool_create/draidcfg.gz"
 log_must draid verify $DRAIDCFG
 log_pass "'draid verify'"
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh
@ -72,7 +72,7 @@ log_onexit cleanup
 log_assert "zpool can be autoexpanded after set autoexpand=on on vdev expansion"
-for type in " " mirror raidz raidz2; do
+for type in " " mirror raidz draid; do
 	log_note "Setting up loopback, scsi_debug, and file vdevs"
 	log_must truncate -s $org_size $FILE_LO
 	DEV1=$(losetup -f)
@ -144,6 +144,16 @@ for type in " " mirror raidz raidz2; do
 			if [[ $? -ne 0 ]] ; then
 				log_fail "pool $TESTPOOL1 has not expanded"
 			fi
 		elif [[ $type == "draid" ]]; then
 			typeset expansion_size=$((2*($exp_size-$org_size)))
 			zpool history -il $TESTPOOL1 | \
 			    grep "pool '$TESTPOOL1' size:" | \
 			    grep "vdev online" | \
 			    grep "(+${expansion_size})" >/dev/null 2>&1
 			if [[ $? -ne 0 ]]; then
 				log_fail "pool $TESTPOOL has not expanded"
 			fi
 		else
 			typeset expansion_size=$((3*($exp_size-$org_size)))
 			zpool history -il $TESTPOOL1 | \
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh
@ -63,7 +63,7 @@ log_onexit cleanup
 log_assert "zpool can expand after zpool online -e zvol vdevs on vdev expansion"
-for type in " " mirror raidz raidz2; do
+for type in " " mirror raidz draid:1s; do
 	# Initialize the file devices and the pool
 	for i in 1 2 3; do
 		log_must truncate -s $org_size ${TEMPFILE}.$i
@ -92,6 +92,8 @@ for type in " " mirror raidz raidz2; do
 	if [[ $type == "mirror" ]]; then
 		typeset expected_zpool_expandsize=$(($exp_size-$org_size))
 	elif [[ $type == "draid:1s" ]]; then
 		typeset expected_zpool_expandsize=$((2*($exp_size-$org_size)))
 	else
 		typeset expected_zpool_expandsize=$((3*($exp_size-$org_size)))
 	fi
@ -143,6 +145,17 @@ for type in " " mirror raidz raidz2; do
 			    grep "vdev online" | \
 			    grep "(+${expansion_size})" >/dev/null 2>&1
 			if [[ $? -ne 0 ]]; then
 				log_fail "pool $TESTPOOL1 has not expanded " \
 				    "after zpool online -e"
 			fi
 		elif [[ $type == "draid:1s" ]]; then
 			typeset expansion_size=$((2*($exp_size-$org_size)))
 			zpool history -il $TESTPOOL1 | \
 			    grep "pool '$TESTPOOL1' size:" | \
 			    grep "vdev online" | \
 			    grep "(+${expansion_size})" >/dev/null 2>&1
 			if [[ $? -ne 0 ]] ; then
 				log_fail "pool $TESTPOOL1 has not expanded " \
 				    "after zpool online -e"
@ -160,9 +173,17 @@ for type in " " mirror raidz raidz2; do
 			fi
 		fi
 	else
-		log_fail "pool $TESTPOOL1 did not expand after vdev expansion " \
+		log_fail "pool $TESTPOOL1 did not expand after vdev " \
-		    "and zpool online -e"
+		    "expansion and zpool online -e"
 	fi
 	# For dRAID pools verify the distributed spare was resized after
 	# expansion and it is large enough to be used to replace a pool vdev.
 	if [[ $type == "draid:1s" ]]; then
 		log_must zpool replace -w $TESTPOOL1 $TEMPFILE.3 draid1-0-0
 		verify_pool $TESTPOOL1
 	fi
 	log_must zpool destroy $TESTPOOL1
 done
 log_pass "zpool can expand after zpool online -e"
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_003_neg.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_003_neg.ksh
@ -73,7 +73,7 @@ log_onexit cleanup
 log_assert "zpool can not expand if set autoexpand=off after vdev expansion"
-for type in " " mirror raidz raidz2; do
+for type in " " mirror raidz draid; do
 	log_note "Setting up loopback, scsi_debug, and file vdevs"
 	log_must truncate -s $org_size $FILE_LO
 	DEV1=$(losetup -f)
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh
@ -61,7 +61,7 @@ log_onexit cleanup
 log_assert "After vdev expansion, all 4 labels have the same set of uberblocks."
-for type in " " mirror raidz raidz2; do
+for type in " " mirror raidz draid; do
 	for i in 1 2 3; do
 		log_must truncate -s $org_size ${TEMPFILE}.$i
 	done
--- a/Show More
+++ b/Show More