From 06acbbc429bfe7197e5fc3a49acfeef5c37b64c8 Mon Sep 17 00:00:00 2001 From: Gvozden Neskovic Date: Fri, 4 Aug 2017 11:29:56 +0200 Subject: [PATCH] vdev_mirror: load balancing fixes vdev_queue: - Track the last position of each vdev, including the io size, in order to detect linear access of the following zio. - Remove duplicate `vq_lastoffset` vdev_mirror: - Correctly calculate the zio offset (signedness issue) - Deprecate `vdev_queue_register_lastoffset()` - Add `VDEV_LABEL_START_SIZE` to zio offset of leaf vdevs Reviewed-by: Brian Behlendorf Signed-off-by: Gvozden Neskovic Closes #6461 --- include/sys/vdev.h | 3 +-- include/sys/vdev_impl.h | 1 - module/zfs/vdev_mirror.c | 36 ++++++++++++++++-------------------- module/zfs/vdev_queue.c | 21 +++++++-------------- 4 files changed, 24 insertions(+), 37 deletions(-) diff --git a/include/sys/vdev.h b/include/sys/vdev.h index 7157ef43f6..473d2691c9 100644 --- a/include/sys/vdev.h +++ b/include/sys/vdev.h @@ -125,8 +125,7 @@ extern zio_t *vdev_queue_io(zio_t *zio); extern void vdev_queue_io_done(zio_t *zio); extern int vdev_queue_length(vdev_t *vd); -extern uint64_t vdev_queue_lastoffset(vdev_t *vd); -extern void vdev_queue_register_lastoffset(vdev_t *vd, zio_t *zio); +extern uint64_t vdev_queue_last_offset(vdev_t *vd); extern void vdev_config_dirty(vdev_t *vd); extern void vdev_config_clean(vdev_t *vd); diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 7c5e54b08e..4c2e3cd2e0 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -127,7 +127,6 @@ struct vdev_queue { hrtime_t vq_io_delta_ts; zio_t vq_io_search; /* used as local for stack reduction */ kmutex_t vq_lock; - uint64_t vq_lastoffset; }; /* diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c index 0439e4b46f..d230b4db40 100644 --- a/module/zfs/vdev_mirror.c +++ b/module/zfs/vdev_mirror.c @@ -116,7 +116,8 @@ static const zio_vsd_ops_t vdev_mirror_vsd_ops = { static int vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset) { - uint64_t lastoffset; + uint64_t last_offset; + int64_t offset_diff; int load; /* All DVAs have equal weight at the root. */ @@ -129,13 +130,17 @@ vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset) * worse overall when resilvering with compared to without. */ + /* Fix zio_offset for leaf vdevs */ + if (vd->vdev_ops->vdev_op_leaf) + zio_offset += VDEV_LABEL_START_SIZE; + /* Standard load based on pending queue length. */ load = vdev_queue_length(vd); - lastoffset = vdev_queue_lastoffset(vd); + last_offset = vdev_queue_last_offset(vd); if (vd->vdev_nonrot) { /* Non-rotating media. */ - if (lastoffset == zio_offset) + if (last_offset == zio_offset) return (load + zfs_vdev_mirror_non_rotating_inc); /* @@ -148,16 +153,16 @@ vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset) } /* Rotating media I/O's which directly follow the last I/O. */ - if (lastoffset == zio_offset) + if (last_offset == zio_offset) return (load + zfs_vdev_mirror_rotating_inc); /* * Apply half the seek increment to I/O's within seek offset - * of the last I/O queued to this vdev as they should incur less + * of the last I/O issued to this vdev as they should incur less * of a seek increment. */ - if (ABS(lastoffset - zio_offset) < - zfs_vdev_mirror_rotating_seek_offset) + offset_diff = (int64_t)(last_offset - zio_offset); + if (ABS(offset_diff) < zfs_vdev_mirror_rotating_seek_offset) return (load + (zfs_vdev_mirror_rotating_seek_inc / 2)); /* Apply the full seek increment to all other I/O's. */ @@ -382,29 +387,20 @@ vdev_mirror_child_select(zio_t *zio) mm->mm_preferred_cnt++; } - if (mm->mm_preferred_cnt == 1) { - vdev_queue_register_lastoffset( - mm->mm_child[mm->mm_preferred[0]].mc_vd, zio); + if (mm->mm_preferred_cnt == 1) return (mm->mm_preferred[0]); - } - if (mm->mm_preferred_cnt > 1) { - int c = vdev_mirror_preferred_child_randomize(zio); - vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd, zio); - return (c); - } + if (mm->mm_preferred_cnt > 1) + return (vdev_mirror_preferred_child_randomize(zio)); /* * Every device is either missing or has this txg in its DTL. * Look for any child we haven't already tried before giving up. */ for (c = 0; c < mm->mm_children; c++) { - if (!mm->mm_child[c].mc_tried) { - vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd, - zio); + if (!mm->mm_child[c].mc_tried) return (c); - } } /* diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c index 6b3e872915..40cba340aa 100644 --- a/module/zfs/vdev_queue.c +++ b/module/zfs/vdev_queue.c @@ -393,7 +393,7 @@ vdev_queue_init(vdev_t *vd) sizeof (zio_t), offsetof(struct zio, io_queue_node)); } - vq->vq_lastoffset = 0; + vq->vq_last_offset = 0; } void @@ -699,9 +699,8 @@ again: */ tree = vdev_queue_class_tree(vq, p); vq->vq_io_search.io_timestamp = 0; - vq->vq_io_search.io_offset = vq->vq_last_offset + 1; - VERIFY3P(avl_find(tree, &vq->vq_io_search, - &idx), ==, NULL); + vq->vq_io_search.io_offset = vq->vq_last_offset - 1; + VERIFY3P(avl_find(tree, &vq->vq_io_search, &idx), ==, NULL); zio = avl_nearest(tree, idx, AVL_AFTER); if (zio == NULL) zio = avl_first(tree); @@ -728,7 +727,7 @@ again: } vdev_queue_pending_add(vq, zio); - vq->vq_last_offset = zio->io_offset; + vq->vq_last_offset = zio->io_offset + zio->io_size; return (zio); } @@ -806,7 +805,7 @@ vdev_queue_io_done(zio_t *zio) } /* - * As these three methods are only used for load calculations we're not + * As these two methods are only used for load calculations we're not * concerned if we get an incorrect value on 32bit platforms due to lack of * vq_lock mutex use here, instead we prefer to keep it lock free for * performance. @@ -818,15 +817,9 @@ vdev_queue_length(vdev_t *vd) } uint64_t -vdev_queue_lastoffset(vdev_t *vd) +vdev_queue_last_offset(vdev_t *vd) { - return (vd->vdev_queue.vq_lastoffset); -} - -void -vdev_queue_register_lastoffset(vdev_t *vd, zio_t *zio) -{ - vd->vdev_queue.vq_lastoffset = zio->io_offset + zio->io_size; + return (vd->vdev_queue.vq_last_offset); } #if defined(_KERNEL) && defined(HAVE_SPL)