vdev_disk: rework bio max segment calculation

A single "page" in an ABD does not necessarily correspond to one segment
in a bio, because of how ZFS does ABD allocations and how it breaks them
up with adding them to a bio. Because of this, simply dividing the ABD
size by the page size can only ever give a minimum number of segments
required, rather than the correct number.

Until we can fix that, we'll just make each bio as large as they can be
for as many segments as the device queue will permit without needing to
split the the bio. This is a little wasteful if we don't intend to put
that many segments in the bio, but its not a lot of memory and its only
lost until the bio is completed.

This also adds a tuneable, vdev_disk_max_segs, to allow setting this
value to be set by the operator. This is very useful for debugging.

Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
(cherry picked from commit a3a438d1bedb0626417cd73ba10b1479a06bef7f)
This commit is contained in:
Rob Norris 2023-07-18 11:11:29 +10:00 committed by Geoff Amey
parent f615207ee6
commit 98bcc390e8
2 changed files with 21 additions and 7 deletions

View File

@ -72,6 +72,7 @@ enum scope_prefix_types {
zfs_txg, zfs_txg,
zfs_vdev, zfs_vdev,
zfs_vdev_cache, zfs_vdev_cache,
zfs_vdev_disk,
zfs_vdev_file, zfs_vdev_file,
zfs_vdev_mirror, zfs_vdev_mirror,
zfs_vnops, zfs_vnops,

View File

@ -47,6 +47,14 @@ typedef struct vdev_disk {
} vdev_disk_t; } vdev_disk_t;
int zio_suppress_zero_writes = B_TRUE; int zio_suppress_zero_writes = B_TRUE;
/*
* Maximum number of segments to add to a bio. If this is higher than the
* maximum allowed by the device queue or the kernel itself, it will be
* clamped. Setting it to zero will cause the kernel's ideal size to be used.
*/
unsigned long vdev_disk_max_segs = 0;
/* /*
* Unique identifier for the exclusive vdev holder. * Unique identifier for the exclusive vdev holder.
*/ */
@ -591,15 +599,17 @@ vdev_bio_alloc(struct block_device *bdev, gfp_t gfp_mask,
} }
static inline unsigned int static inline unsigned int
vdev_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset) vdev_bio_max_segs(struct block_device *bdev) {
{ const unsigned long tune_max_segs =
unsigned long nr_segs = abd_nr_pages_off(zio->io_abd, vdev_disk_max_segs > 0 ? vdev_disk_max_segs : ULONG_MAX;
bio_size, abd_offset); const unsigned long dev_max_segs =
queue_max_segments(bdev_get_queue(bdev));
const unsigned long max_segs = MIN(tune_max_segs, dev_max_segs);
#ifdef HAVE_BIO_MAX_SEGS #ifdef HAVE_BIO_MAX_SEGS
return (bio_max_segs(nr_segs)); return (bio_max_segs(max_segs));
#else #else
return (MIN(nr_segs, BIO_MAX_PAGES)); return (MIN(max_segs, BIO_MAX_PAGES));
#endif #endif
} }
@ -664,7 +674,7 @@ retry:
goto retry; goto retry;
} }
nr_vecs = vdev_bio_max_segs(zio, bio_size, abd_offset); nr_vecs = vdev_bio_max_segs(bdev);
dr->dr_bio[i] = vdev_bio_alloc(bdev, GFP_NOIO, nr_vecs); dr->dr_bio[i] = vdev_bio_alloc(bdev, GFP_NOIO, nr_vecs);
if (unlikely(dr->dr_bio[i] == NULL)) { if (unlikely(dr->dr_bio[i] == NULL)) {
vdev_disk_dio_free(dr); vdev_disk_dio_free(dr);
@ -1030,3 +1040,6 @@ param_set_max_auto_ashift(const char *buf, zfs_kernel_param_t *kp)
ZFS_MODULE_PARAM(zfs_zio, zio_, suppress_zero_writes, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_zio, zio_, suppress_zero_writes, INT, ZMOD_RW,
"Do not send zero byte writes to hardware"); "Do not send zero byte writes to hardware");
ZFS_MODULE_PARAM(zfs_vdev_disk, vdev_disk_, max_segs, ULONG, ZMOD_RW,
"Maximum number of data segments to add to an IO request");