From fb40095f5f0853946f8150481ca22602d1334dfe Mon Sep 17 00:00:00 2001 From: Richard Yao Date: Sat, 29 Aug 2015 12:01:07 -0400 Subject: [PATCH] Disable LBA weighting on files and SSDs The LBA weighting makes sense on rotational media where the outer tracks have twice the bandwidth of the inner tracks. However, it is detrimental on nonrotational media such as solid state disks, where the only effect is to ensure that metaslabs enter the best-fit allocation behavior sooner, which is detrimental to performance. It also makes no sense on files where the underlying filesystem can arrange things however it wants. Signed-off-by: Richard Yao Signed-off-by: Brian Behlendorf Closes #3712 --- include/sys/vdev_impl.h | 1 + module/zfs/metaslab.c | 2 +- module/zfs/vdev.c | 10 +++++++++- module/zfs/vdev_disk.c | 3 +++ module/zfs/vdev_file.c | 3 +++ 5 files changed, 17 insertions(+), 2 deletions(-) diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 6b27e75ae5..1371a3f039 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -151,6 +151,7 @@ struct vdev { vdev_stat_t vdev_stat; /* virtual device statistics */ boolean_t vdev_expanding; /* expand the vdev? */ boolean_t vdev_reopening; /* reopen in progress? */ + boolean_t vdev_nonrot; /* true if solid state */ int vdev_open_error; /* error on last open */ kthread_t *vdev_open_thread; /* thread opening children */ uint64_t vdev_crtxg; /* txg when top-level was added */ diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index b328cbb0a1..59bcefd346 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -1518,7 +1518,7 @@ metaslab_weight(metaslab_t *msp) * In effect, this means that we'll select the metaslab with the most * free bandwidth rather than simply the one with the most free space. */ - if (metaslab_lba_weighting_enabled) { + if (!vd->vdev_nonrot && metaslab_lba_weighting_enabled) { weight = 2 * weight - (msp->ms_id * weight) / vd->vdev_ms_count; ASSERT(weight >= space && weight <= 2 * space); } diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 8e50ababc1..7aff5455b1 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -1108,6 +1108,7 @@ vdev_open_child(void *arg) vd->vdev_open_thread = curthread; vd->vdev_open_error = vdev_open(vd); vd->vdev_open_thread = NULL; + vd->vdev_parent->vdev_nonrot &= vd->vdev_nonrot; } static boolean_t @@ -1134,15 +1135,19 @@ vdev_open_children(vdev_t *vd) int children = vd->vdev_children; int c; + vd->vdev_nonrot = B_TRUE; + /* * in order to handle pools on top of zvols, do the opens * in a single thread so that the same thread holds the * spa_namespace_lock */ if (vdev_uses_zvols(vd)) { - for (c = 0; c < children; c++) + for (c = 0; c < children; c++) { vd->vdev_child[c]->vdev_open_error = vdev_open(vd->vdev_child[c]); + vd->vdev_nonrot &= vd->vdev_child[c]->vdev_nonrot; + } return; } tq = taskq_create("vdev_open", children, minclsyspri, @@ -1153,6 +1158,9 @@ vdev_open_children(vdev_t *vd) TQ_SLEEP) != 0); taskq_destroy(tq); + + for (c = 0; c < children; c++) + vd->vdev_nonrot &= vd->vdev_child[c]->vdev_nonrot; } /* diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index eb77c269c2..380ede35b5 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -301,6 +301,9 @@ skip_open: /* Clear the nowritecache bit, causes vdev_reopen() to try again. */ v->vdev_nowritecache = B_FALSE; + /* Inform the ZIO pipeline that we are non-rotational */ + v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(vd->vd_bdev)); + /* Physical volume size in bytes */ *psize = bdev_capacity(vd->vd_bdev); diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c index e61240fdcc..a29ea7bf95 100644 --- a/module/zfs/vdev_file.c +++ b/module/zfs/vdev_file.c @@ -57,6 +57,9 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, vattr_t vattr; int error; + /* Rotational optimizations only make sense on block devices */ + vd->vdev_nonrot = B_TRUE; + /* * We must have a pathname, and it must be absolute. */