Use 'noop' IO Scheduler
Initial testing has shown the the right IO scheduler to use under Linux is noop. This strikes the ideal balance by allowing the zfs elevator to do all request ordering and prioritization. While allowing the Linux elevator to do the maximum front/back merging allowed by the physical device. This yields the largest possible requests for the device with the lowest total overhead. While 'noop' should be right for your system you can choose a different IO scheduler with the 'zfs_vdev_scheduler' option. You may set this value to any of the standard Linux schedulers: noop, cfq, deadline, anticipatory. In addition, if you choose 'none' zfs will not attempt to change the IO scheduler for the block device.
This commit is contained in:
parent
4db77a74a6
commit
6839eed23e
|
@ -81,6 +81,9 @@ extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **);
|
|||
# define vdev_bdev_block_size(bdev) bdev_hardsect_size(bdev)
|
||||
#endif
|
||||
|
||||
/* Default Linux IO Scheduler */
|
||||
#define VDEV_SCHEDULER "noop"
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -33,6 +33,8 @@
|
|||
#include <sys/zio.h>
|
||||
#include <sys/sunldi.h>
|
||||
|
||||
char *zfs_vdev_scheduler = VDEV_SCHEDULER;
|
||||
|
||||
/*
|
||||
* Virtual device vector for disks.
|
||||
*/
|
||||
|
@ -102,6 +104,43 @@ vdev_disk_error(zio_t *zio)
|
|||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the Linux 'noop' elevator for zfs managed block devices. This
|
||||
* strikes the ideal balance by allowing the zfs elevator to do all
|
||||
* request ordering and prioritization. While allowing the Linux
|
||||
* elevator to do the maximum front/back merging allowed by the
|
||||
* physical device. This yields the largest possible requests for
|
||||
* the device with the lowest total overhead.
|
||||
*
|
||||
* Unfortunately we cannot directly call the elevator_switch() function
|
||||
* because it is not exported from the block layer. This means we have
|
||||
* to use the sysfs interface and a user space upcall. Pools will be
|
||||
* automatically imported on module load so we must do this at device
|
||||
* open time from the kernel.
|
||||
*/
|
||||
static int
|
||||
vdev_elevator_switch(vdev_t *v, char *elevator, char *device)
|
||||
{
|
||||
char sh_path[] = "/bin/sh";
|
||||
char sh_cmd[128];
|
||||
char *argv[] = { sh_path, "-c", sh_cmd };
|
||||
char *envp[] = { NULL };
|
||||
int error;
|
||||
|
||||
if (!strncmp(elevator, "none", 4) && (strlen(elevator) == 4))
|
||||
return (0);
|
||||
|
||||
sprintf(sh_cmd, "%s \"%s\" >/sys/block/%s/queue/scheduler",
|
||||
"/bin/echo", elevator, device);
|
||||
|
||||
error = call_usermodehelper(sh_path, argv, envp, 1);
|
||||
if (error)
|
||||
printk("ZFS: Unable to set \"%s\" scheduler for %s (%s): %d\n",
|
||||
elevator, v->vdev_path, device, error);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift)
|
||||
{
|
||||
|
@ -167,6 +206,10 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift)
|
|||
/* Based on the minimum sector size set the block size */
|
||||
*ashift = highbit(MAX(block_size, SPA_MINBLOCKSIZE)) - 1;
|
||||
|
||||
/* Try to set the io scheduler elevator algorithm */
|
||||
(void) vdev_elevator_switch(v, zfs_vdev_scheduler,
|
||||
bdev->bd_disk->disk_name);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -702,3 +745,6 @@ vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config)
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
module_param(zfs_vdev_scheduler, charp, 0644);
|
||||
MODULE_PARM_DESC(zfs_vdev_scheduler, "IO Scheduler (noop)");
|
||||
|
|
Loading…
Reference in New Issue