vdev_mirror: load balancing fixes

vdev_queue:
- Track the last position of each vdev, including the io size,
  in order to detect linear access of the following zio.
- Remove duplicate `vq_lastoffset`

vdev_mirror:
- Correctly calculate the zio offset (signedness issue)
- Deprecate `vdev_queue_register_lastoffset()`
- Add `VDEV_LABEL_START_SIZE` to zio offset of leaf vdevs

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Gvozden Neskovic <neskovic@gmail.com>
Closes #6461
This commit is contained in:
Gvozden Neskovic 2017-08-04 11:29:56 +02:00 committed by Tony Hutter
parent 6116bbd744
commit 06acbbc429
4 changed files with 24 additions and 37 deletions

View File

@ -125,8 +125,7 @@ extern zio_t *vdev_queue_io(zio_t *zio);
extern void vdev_queue_io_done(zio_t *zio); extern void vdev_queue_io_done(zio_t *zio);
extern int vdev_queue_length(vdev_t *vd); extern int vdev_queue_length(vdev_t *vd);
extern uint64_t vdev_queue_lastoffset(vdev_t *vd); extern uint64_t vdev_queue_last_offset(vdev_t *vd);
extern void vdev_queue_register_lastoffset(vdev_t *vd, zio_t *zio);
extern void vdev_config_dirty(vdev_t *vd); extern void vdev_config_dirty(vdev_t *vd);
extern void vdev_config_clean(vdev_t *vd); extern void vdev_config_clean(vdev_t *vd);

View File

@ -127,7 +127,6 @@ struct vdev_queue {
hrtime_t vq_io_delta_ts; hrtime_t vq_io_delta_ts;
zio_t vq_io_search; /* used as local for stack reduction */ zio_t vq_io_search; /* used as local for stack reduction */
kmutex_t vq_lock; kmutex_t vq_lock;
uint64_t vq_lastoffset;
}; };
/* /*

View File

@ -116,7 +116,8 @@ static const zio_vsd_ops_t vdev_mirror_vsd_ops = {
static int static int
vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset) vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset)
{ {
uint64_t lastoffset; uint64_t last_offset;
int64_t offset_diff;
int load; int load;
/* All DVAs have equal weight at the root. */ /* All DVAs have equal weight at the root. */
@ -129,13 +130,17 @@ vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset)
* worse overall when resilvering with compared to without. * worse overall when resilvering with compared to without.
*/ */
/* Fix zio_offset for leaf vdevs */
if (vd->vdev_ops->vdev_op_leaf)
zio_offset += VDEV_LABEL_START_SIZE;
/* Standard load based on pending queue length. */ /* Standard load based on pending queue length. */
load = vdev_queue_length(vd); load = vdev_queue_length(vd);
lastoffset = vdev_queue_lastoffset(vd); last_offset = vdev_queue_last_offset(vd);
if (vd->vdev_nonrot) { if (vd->vdev_nonrot) {
/* Non-rotating media. */ /* Non-rotating media. */
if (lastoffset == zio_offset) if (last_offset == zio_offset)
return (load + zfs_vdev_mirror_non_rotating_inc); return (load + zfs_vdev_mirror_non_rotating_inc);
/* /*
@ -148,16 +153,16 @@ vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset)
} }
/* Rotating media I/O's which directly follow the last I/O. */ /* Rotating media I/O's which directly follow the last I/O. */
if (lastoffset == zio_offset) if (last_offset == zio_offset)
return (load + zfs_vdev_mirror_rotating_inc); return (load + zfs_vdev_mirror_rotating_inc);
/* /*
* Apply half the seek increment to I/O's within seek offset * Apply half the seek increment to I/O's within seek offset
* of the last I/O queued to this vdev as they should incur less * of the last I/O issued to this vdev as they should incur less
* of a seek increment. * of a seek increment.
*/ */
if (ABS(lastoffset - zio_offset) < offset_diff = (int64_t)(last_offset - zio_offset);
zfs_vdev_mirror_rotating_seek_offset) if (ABS(offset_diff) < zfs_vdev_mirror_rotating_seek_offset)
return (load + (zfs_vdev_mirror_rotating_seek_inc / 2)); return (load + (zfs_vdev_mirror_rotating_seek_inc / 2));
/* Apply the full seek increment to all other I/O's. */ /* Apply the full seek increment to all other I/O's. */
@ -382,30 +387,21 @@ vdev_mirror_child_select(zio_t *zio)
mm->mm_preferred_cnt++; mm->mm_preferred_cnt++;
} }
if (mm->mm_preferred_cnt == 1) { if (mm->mm_preferred_cnt == 1)
vdev_queue_register_lastoffset(
mm->mm_child[mm->mm_preferred[0]].mc_vd, zio);
return (mm->mm_preferred[0]); return (mm->mm_preferred[0]);
}
if (mm->mm_preferred_cnt > 1) {
int c = vdev_mirror_preferred_child_randomize(zio);
vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd, zio); if (mm->mm_preferred_cnt > 1)
return (c); return (vdev_mirror_preferred_child_randomize(zio));
}
/* /*
* Every device is either missing or has this txg in its DTL. * Every device is either missing or has this txg in its DTL.
* Look for any child we haven't already tried before giving up. * Look for any child we haven't already tried before giving up.
*/ */
for (c = 0; c < mm->mm_children; c++) { for (c = 0; c < mm->mm_children; c++) {
if (!mm->mm_child[c].mc_tried) { if (!mm->mm_child[c].mc_tried)
vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd,
zio);
return (c); return (c);
} }
}
/* /*
* Every child failed. There's no place left to look. * Every child failed. There's no place left to look.

View File

@ -393,7 +393,7 @@ vdev_queue_init(vdev_t *vd)
sizeof (zio_t), offsetof(struct zio, io_queue_node)); sizeof (zio_t), offsetof(struct zio, io_queue_node));
} }
vq->vq_lastoffset = 0; vq->vq_last_offset = 0;
} }
void void
@ -699,9 +699,8 @@ again:
*/ */
tree = vdev_queue_class_tree(vq, p); tree = vdev_queue_class_tree(vq, p);
vq->vq_io_search.io_timestamp = 0; vq->vq_io_search.io_timestamp = 0;
vq->vq_io_search.io_offset = vq->vq_last_offset + 1; vq->vq_io_search.io_offset = vq->vq_last_offset - 1;
VERIFY3P(avl_find(tree, &vq->vq_io_search, VERIFY3P(avl_find(tree, &vq->vq_io_search, &idx), ==, NULL);
&idx), ==, NULL);
zio = avl_nearest(tree, idx, AVL_AFTER); zio = avl_nearest(tree, idx, AVL_AFTER);
if (zio == NULL) if (zio == NULL)
zio = avl_first(tree); zio = avl_first(tree);
@ -728,7 +727,7 @@ again:
} }
vdev_queue_pending_add(vq, zio); vdev_queue_pending_add(vq, zio);
vq->vq_last_offset = zio->io_offset; vq->vq_last_offset = zio->io_offset + zio->io_size;
return (zio); return (zio);
} }
@ -806,7 +805,7 @@ vdev_queue_io_done(zio_t *zio)
} }
/* /*
* As these three methods are only used for load calculations we're not * As these two methods are only used for load calculations we're not
* concerned if we get an incorrect value on 32bit platforms due to lack of * concerned if we get an incorrect value on 32bit platforms due to lack of
* vq_lock mutex use here, instead we prefer to keep it lock free for * vq_lock mutex use here, instead we prefer to keep it lock free for
* performance. * performance.
@ -818,15 +817,9 @@ vdev_queue_length(vdev_t *vd)
} }
uint64_t uint64_t
vdev_queue_lastoffset(vdev_t *vd) vdev_queue_last_offset(vdev_t *vd)
{ {
return (vd->vdev_queue.vq_lastoffset); return (vd->vdev_queue.vq_last_offset);
}
void
vdev_queue_register_lastoffset(vdev_t *vd, zio_t *zio)
{
vd->vdev_queue.vq_lastoffset = zio->io_offset + zio->io_size;
} }
#if defined(_KERNEL) && defined(HAVE_SPL) #if defined(_KERNEL) && defined(HAVE_SPL)