vdev_mirror: load balancing fixes

vdev_queue: - Track the last position of each vdev, including the io size, in order to detect linear access of the following zio. - Remove duplicate `vq_lastoffset` vdev_mirror: - Correctly calculate the zio offset (signedness issue) - Deprecate `vdev_queue_register_lastoffset()` - Add `VDEV_LABEL_START_SIZE` to zio offset of leaf vdevs Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Gvozden Neskovic <neskovic@gmail.com> Closes #6461
2017-08-04 11:29:56 +02:00 · 2017-08-04 11:29:56 +02:00 · 06acbbc429
parent 6116bbd744
commit 06acbbc429
4 changed files with 24 additions and 37 deletions
--- a/include/sys/vdev.h
+++ b/include/sys/vdev.h
@ -125,8 +125,7 @@ extern zio_t *vdev_queue_io(zio_t *zio);
 extern void vdev_queue_io_done(zio_t *zio);
 extern int vdev_queue_length(vdev_t *vd);
-extern uint64_t vdev_queue_lastoffset(vdev_t *vd);
+extern uint64_t vdev_queue_last_offset(vdev_t *vd);
 extern void vdev_queue_register_lastoffset(vdev_t *vd, zio_t *zio);
 extern void vdev_config_dirty(vdev_t *vd);
 extern void vdev_config_clean(vdev_t *vd);
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@ -127,7 +127,6 @@ struct vdev_queue {
 	hrtime_t	vq_io_delta_ts;
 	zio_t		vq_io_search; /* used as local for stack reduction */
 	kmutex_t	vq_lock;
 	uint64_t	vq_lastoffset;
 };
 /*
--- a/module/zfs/vdev_mirror.c
+++ b/module/zfs/vdev_mirror.c
@ -116,7 +116,8 @@ static const zio_vsd_ops_t vdev_mirror_vsd_ops = {
 static int
 vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset)
 {
-	uint64_t lastoffset;
+	uint64_t last_offset;
 	int64_t offset_diff;
 	int load;
 	/* All DVAs have equal weight at the root. */
@ -129,13 +130,17 @@ vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset)
 	 * worse overall when resilvering with compared to without.
 	 */
 	/* Fix zio_offset for leaf vdevs */
 	if (vd->vdev_ops->vdev_op_leaf)
 		zio_offset += VDEV_LABEL_START_SIZE;
 	/* Standard load based on pending queue length. */
 	load = vdev_queue_length(vd);
-	lastoffset = vdev_queue_lastoffset(vd);
+	last_offset = vdev_queue_last_offset(vd);
 	if (vd->vdev_nonrot) {
 		/* Non-rotating media. */
-		if (lastoffset == zio_offset)
+		if (last_offset == zio_offset)
 			return (load + zfs_vdev_mirror_non_rotating_inc);
 		/*
@ -148,16 +153,16 @@ vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset)
 	}
 	/* Rotating media I/O's which directly follow the last I/O. */
-	if (lastoffset == zio_offset)
+	if (last_offset == zio_offset)
 		return (load + zfs_vdev_mirror_rotating_inc);
 	/*
 	 * Apply half the seek increment to I/O's within seek offset
-	 * of the last I/O queued to this vdev as they should incur less
+	 * of the last I/O issued to this vdev as they should incur less
 	 * of a seek increment.
 	 */
-	if (ABS(lastoffset - zio_offset) <
+	offset_diff = (int64_t)(last_offset - zio_offset);
-	    zfs_vdev_mirror_rotating_seek_offset)
+	if (ABS(offset_diff) < zfs_vdev_mirror_rotating_seek_offset)
 		return (load + (zfs_vdev_mirror_rotating_seek_inc / 2));
 	/* Apply the full seek increment to all other I/O's. */
@ -382,29 +387,20 @@ vdev_mirror_child_select(zio_t *zio)
 		mm->mm_preferred_cnt++;
 	}
-	if (mm->mm_preferred_cnt == 1) {
+	if (mm->mm_preferred_cnt == 1)
 		vdev_queue_register_lastoffset(
 		    mm->mm_child[mm->mm_preferred[0]].mc_vd, zio);
 		return (mm->mm_preferred[0]);
 	}
 	if (mm->mm_preferred_cnt > 1) {
 		int c = vdev_mirror_preferred_child_randomize(zio);
-		vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd, zio);
+	if (mm->mm_preferred_cnt > 1)
-		return (c);
+		return (vdev_mirror_preferred_child_randomize(zio));
 	}
 	/*
 	 * Every device is either missing or has this txg in its DTL.
 	 * Look for any child we haven't already tried before giving up.
 	 */
 	for (c = 0; c < mm->mm_children; c++) {
-		if (!mm->mm_child[c].mc_tried) {
+		if (!mm->mm_child[c].mc_tried)
 			vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd,
 			    zio);
 			return (c);
 		}
 	}
 	/*
--- a/module/zfs/vdev_queue.c
+++ b/module/zfs/vdev_queue.c
@ -393,7 +393,7 @@ vdev_queue_init(vdev_t *vd)
 		    sizeof (zio_t), offsetof(struct zio, io_queue_node));
 	}
-	vq->vq_lastoffset = 0;
+	vq->vq_last_offset = 0;
 }
 void
@ -699,9 +699,8 @@ again:
 	 */
 	tree = vdev_queue_class_tree(vq, p);
 	vq->vq_io_search.io_timestamp = 0;
-	vq->vq_io_search.io_offset = vq->vq_last_offset + 1;
+	vq->vq_io_search.io_offset = vq->vq_last_offset - 1;
-	VERIFY3P(avl_find(tree, &vq->vq_io_search,
+	VERIFY3P(avl_find(tree, &vq->vq_io_search, &idx), ==, NULL);
 	    &idx), ==, NULL);
 	zio = avl_nearest(tree, idx, AVL_AFTER);
 	if (zio == NULL)
 		zio = avl_first(tree);
@ -728,7 +727,7 @@ again:
 	}
 	vdev_queue_pending_add(vq, zio);
-	vq->vq_last_offset = zio->io_offset;
+	vq->vq_last_offset = zio->io_offset + zio->io_size;
 	return (zio);
 }
@ -806,7 +805,7 @@ vdev_queue_io_done(zio_t *zio)
 }
 /*
- * As these three methods are only used for load calculations we're not
+ * As these two methods are only used for load calculations we're not
 * concerned if we get an incorrect value on 32bit platforms due to lack of
 * vq_lock mutex use here, instead we prefer to keep it lock free for
 * performance.
@ -818,15 +817,9 @@ vdev_queue_length(vdev_t *vd)
 }
 uint64_t
-vdev_queue_lastoffset(vdev_t *vd)
+vdev_queue_last_offset(vdev_t *vd)
 {
-	return (vd->vdev_queue.vq_lastoffset);
+	return (vd->vdev_queue.vq_last_offset);
 }
 void
 vdev_queue_register_lastoffset(vdev_t *vd, zio_t *zio)
 {
 	vd->vdev_queue.vq_lastoffset = zio->io_offset + zio->io_size;
 }
 #if defined(_KERNEL) && defined(HAVE_SPL)