linux: zvol: avoid heap allocation for zvol_request_sync=1

The spl_kmem_alloc showed up in some flamegraphs in a single-threaded
4k sync write workload at 85k IOPS on an
Intel(R) Xeon(R) Silver 4215 CPU @ 2.50GHz.
Certainly not a huge win but I believe the change is clean and
easy to maintain down the road.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Christian Schwarz <me@cschwarz.com>
Closes #11666
This commit is contained in:
Christian Schwarz 2021-03-03 17:15:28 +01:00 committed by GitHub
parent 3242b5358e
commit e439ee83c1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 64 additions and 29 deletions

View File

@ -56,12 +56,32 @@ struct zvol_state_os {
taskq_t *zvol_taskq;
static struct ida zvol_ida;
typedef struct zv_request {
typedef struct zv_request_stack {
zvol_state_t *zv;
struct bio *bio;
taskq_ent_t ent;
} zv_request_t;
typedef struct zv_request_task {
zv_request_t zvr;
taskq_ent_t ent;
} zv_request_task_t;
static zv_request_task_t *
zv_request_task_create(zv_request_t zvr)
{
zv_request_task_t *task;
task = kmem_alloc(sizeof (zv_request_task_t), KM_SLEEP);
taskq_init_ent(&task->ent);
task->zvr = zvr;
return (task);
}
static void
zv_request_task_free(zv_request_task_t *task)
{
kmem_free(task, sizeof (*task));
}
/*
* Given a path, return TRUE if path is a ZVOL.
*/
@ -80,9 +100,8 @@ zvol_is_zvol_impl(const char *path)
}
static void
zvol_write(void *arg)
zvol_write(zv_request_t *zvr)
{
zv_request_t *zvr = arg;
struct bio *bio = zvr->bio;
int error = 0;
zfs_uio_t uio;
@ -102,7 +121,6 @@ zvol_write(void *arg)
if (uio.uio_resid == 0) {
rw_exit(&zv->zv_suspend_lock);
BIO_END_IO(bio, 0);
kmem_free(zvr, sizeof (zv_request_t));
return;
}
@ -162,13 +180,19 @@ zvol_write(void *arg)
blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
BIO_END_IO(bio, -error);
kmem_free(zvr, sizeof (zv_request_t));
}
static void
zvol_discard(void *arg)
zvol_write_task(void *arg)
{
zv_request_task_t *task = arg;
zvol_write(&task->zvr);
zv_request_task_free(task);
}
static void
zvol_discard(zv_request_t *zvr)
{
zv_request_t *zvr = arg;
struct bio *bio = zvr->bio;
zvol_state_t *zv = zvr->zv;
uint64_t start = BIO_BI_SECTOR(bio) << 9;
@ -238,13 +262,19 @@ unlock:
blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
BIO_END_IO(bio, -error);
kmem_free(zvr, sizeof (zv_request_t));
}
static void
zvol_read(void *arg)
zvol_discard_task(void *arg)
{
zv_request_task_t *task = arg;
zvol_discard(&task->zvr);
zv_request_task_free(task);
}
static void
zvol_read(zv_request_t *zvr)
{
zv_request_t *zvr = arg;
struct bio *bio = zvr->bio;
int error = 0;
zfs_uio_t uio;
@ -295,7 +325,14 @@ zvol_read(void *arg)
blk_generic_end_io_acct(q, disk, READ, bio, start_time);
BIO_END_IO(bio, -error);
kmem_free(zvr, sizeof (zv_request_t));
}
static void
zvol_read_task(void *arg)
{
zv_request_task_t *task = arg;
zvol_read(&task->zvr);
zv_request_task_free(task);
}
#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
@ -318,7 +355,6 @@ zvol_request(struct request_queue *q, struct bio *bio)
uint64_t offset = BIO_BI_SECTOR(bio) << 9;
uint64_t size = BIO_BI_SIZE(bio);
int rw = bio_data_dir(bio);
zv_request_t *zvr;
if (bio_has_data(bio) && offset + size > zv->zv_volsize) {
printk(KERN_INFO
@ -331,6 +367,12 @@ zvol_request(struct request_queue *q, struct bio *bio)
goto out;
}
zv_request_t zvr = {
.zv = zv,
.bio = bio,
};
zv_request_task_t *task;
if (rw == WRITE) {
if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
BIO_END_IO(bio, -SET_ERROR(EROFS));
@ -361,11 +403,6 @@ zvol_request(struct request_queue *q, struct bio *bio)
rw_downgrade(&zv->zv_suspend_lock);
}
zvr = kmem_alloc(sizeof (zv_request_t), KM_SLEEP);
zvr->zv = zv;
zvr->bio = bio;
taskq_init_ent(&zvr->ent);
/*
* We don't want this thread to be blocked waiting for i/o to
* complete, so we instead wait from a taskq callback. The
@ -398,17 +435,19 @@ zvol_request(struct request_queue *q, struct bio *bio)
*/
if (bio_is_discard(bio) || bio_is_secure_erase(bio)) {
if (zvol_request_sync) {
zvol_discard(zvr);
zvol_discard(&zvr);
} else {
task = zv_request_task_create(zvr);
taskq_dispatch_ent(zvol_taskq,
zvol_discard, zvr, 0, &zvr->ent);
zvol_discard_task, task, 0, &task->ent);
}
} else {
if (zvol_request_sync) {
zvol_write(zvr);
zvol_write(&zvr);
} else {
task = zv_request_task_create(zvr);
taskq_dispatch_ent(zvol_taskq,
zvol_write, zvr, 0, &zvr->ent);
zvol_write_task, task, 0, &task->ent);
}
}
} else {
@ -422,19 +461,15 @@ zvol_request(struct request_queue *q, struct bio *bio)
goto out;
}
zvr = kmem_alloc(sizeof (zv_request_t), KM_SLEEP);
zvr->zv = zv;
zvr->bio = bio;
taskq_init_ent(&zvr->ent);
rw_enter(&zv->zv_suspend_lock, RW_READER);
/* See comment in WRITE case above. */
if (zvol_request_sync) {
zvol_read(zvr);
zvol_read(&zvr);
} else {
task = zv_request_task_create(zvr);
taskq_dispatch_ent(zvol_taskq,
zvol_read, zvr, 0, &zvr->ent);
zvol_read_task, task, 0, &task->ent);
}
}