ABD page support to vdev_disk.c

Signed-off-by: Isaac Huang <he.huang@intel.com>
This commit is contained in:
Isaac Huang 2016-08-31 00:26:43 -06:00 committed by Brian Behlendorf
parent a6255b7fce
commit b0be93e81a
4 changed files with 90 additions and 57 deletions

View File

@ -32,6 +32,7 @@
#include <sys/refcount.h>
#ifdef _KERNEL
#include <linux/mm.h>
#include <linux/bio.h>
#include <sys/uio.h>
#endif
@ -112,6 +113,12 @@ int abd_cmp(abd_t *, abd_t *);
int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t);
void abd_zero_off(abd_t *, size_t, size_t);
#if defined(_KERNEL) && defined(HAVE_SPL)
unsigned int abd_scatter_bio_map_off(struct bio *, abd_t *, unsigned int,
size_t);
unsigned long abd_nr_pages_off(abd_t *, unsigned int, size_t);
#endif
/*
* Wrappers for calls with offsets of 0
*/

View File

@ -586,7 +586,6 @@ extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
size_t buflen);
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
nvlist_t *zplprops);
extern int spa_import_rootpool(char *devpath, char *devid);
extern int spa_import(char *pool, nvlist_t *config, nvlist_t *props,
uint64_t flags);
extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);

View File

@ -999,8 +999,66 @@ abd_cmp(abd_t *dabd, abd_t *sabd)
abd_cmp_cb, NULL));
}
#if defined(_KERNEL) && defined(HAVE_SPL)
/*
* bio_nr_pages for ABD.
* @off is the offset in @abd
*/
unsigned long
abd_nr_pages_off(abd_t *abd, unsigned int size, size_t off)
{
unsigned long pos;
if (abd_is_linear(abd))
pos = (unsigned long)abd_to_buf(abd) + off;
else
pos = abd->abd_u.abd_scatter.abd_offset + off;
return ((pos + size + PAGESIZE - 1) >> PAGE_SHIFT)
- (pos >> PAGE_SHIFT);
}
/*
* bio_map for scatter ABD.
* @off is the offset in @abd
* Remaining IO size is returned
*/
unsigned int
abd_scatter_bio_map_off(struct bio *bio, abd_t *abd,
unsigned int io_size, size_t off)
{
int i;
struct abd_iter aiter;
ASSERT(!abd_is_linear(abd));
ASSERT3U(io_size, <=, abd->abd_size - off);
abd_iter_init(&aiter, abd);
abd_iter_advance(&aiter, off);
for (i = 0; i < bio->bi_max_vecs; i++) {
struct page *pg;
size_t len, pgoff, index;
if (io_size <= 0)
break;
pgoff = abd_iter_scatter_chunk_offset(&aiter);
len = MIN(io_size, PAGESIZE - pgoff);
ASSERT(len > 0);
index = abd_iter_scatter_chunk_index(&aiter);
pg = abd->abd_u.abd_scatter.abd_chunks[index];
if (bio_add_page(bio, pg, len, pgoff) != len)
break;
io_size -= len;
abd_iter_advance(&aiter, len);
}
return (io_size);
}
/* Tunable Parameters */
module_param(zfs_abd_scatter_enabled, int, 0644);
MODULE_PARM_DESC(zfs_abd_scatter_enabled,

View File

@ -43,7 +43,6 @@ static void *zfs_vdev_holder = VDEV_HOLDER;
*/
typedef struct dio_request {
zio_t *dr_zio; /* Parent ZIO */
void *dr_loanbuf; /* borrowed abd buffer */
atomic_t dr_ref; /* References */
int dr_error; /* Bio error */
int dr_bio_count; /* Count of bio's */
@ -404,7 +403,6 @@ vdev_disk_dio_put(dio_request_t *dr)
*/
if (rc == 0) {
zio_t *zio = dr->dr_zio;
void *loanbuf = dr->dr_loanbuf;
int error = dr->dr_error;
vdev_disk_dio_free(dr);
@ -414,14 +412,6 @@ vdev_disk_dio_put(dio_request_t *dr)
ASSERT3S(zio->io_error, >=, 0);
if (zio->io_error)
vdev_disk_error(zio);
/* ABD placeholder */
if (loanbuf != NULL) {
if (zio->io_type == ZIO_TYPE_READ) {
abd_copy_from_buf(zio->io_abd, loanbuf,
zio->io_size);
}
zio_buf_free(loanbuf, zio->io_size);
}
zio_delay_interrupt(zio);
}
@ -446,17 +436,10 @@ BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error)
#endif
}
/* Drop reference aquired by __vdev_disk_physio */
/* Drop reference acquired by __vdev_disk_physio */
rc = vdev_disk_dio_put(dr);
}
static inline unsigned long
bio_nr_pages(void *bio_ptr, unsigned int bio_size)
{
return ((((unsigned long)bio_ptr + bio_size + PAGE_SIZE - 1) >>
PAGE_SHIFT) - ((unsigned long)bio_ptr >> PAGE_SHIFT));
}
static unsigned int
bio_map(struct bio *bio, void *bio_ptr, unsigned int bio_size)
{
@ -496,6 +479,15 @@ bio_map(struct bio *bio, void *bio_ptr, unsigned int bio_size)
return (bio_size);
}
static unsigned int
bio_map_abd_off(struct bio *bio, abd_t *abd, unsigned int size, size_t off)
{
if (abd_is_linear(abd))
return (bio_map(bio, ((char *)abd_to_buf(abd)) + off, size));
return (abd_scatter_bio_map_off(bio, abd, size, off));
}
#ifndef bio_set_op_attrs
#define bio_set_op_attrs(bio, rw, flags) \
do { (bio)->bi_rw |= (rw)|(flags); } while (0)
@ -528,11 +520,11 @@ vdev_submit_bio(struct bio *bio)
}
static int
__vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
size_t kbuf_size, uint64_t kbuf_offset, int rw, int flags)
__vdev_disk_physio(struct block_device *bdev, zio_t *zio,
size_t io_size, uint64_t io_offset, int rw, int flags)
{
dio_request_t *dr;
caddr_t bio_ptr;
uint64_t abd_offset;
uint64_t bio_offset;
int bio_size, bio_count = 16;
int i = 0, error = 0;
@ -540,7 +532,8 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
struct blk_plug plug;
#endif
ASSERT3U(kbuf_offset + kbuf_size, <=, bdev->bd_inode->i_size);
ASSERT(zio != NULL);
ASSERT3U(io_offset + io_size, <=, bdev->bd_inode->i_size);
retry:
dr = vdev_disk_dio_alloc(bio_count);
@ -559,32 +552,10 @@ retry:
* their volume block size to match the maximum request size and
* the common case will be one bio per vdev IO request.
*/
if (zio != NULL) {
abd_t *abd = zio->io_abd;
/*
* ABD placeholder
* We can't use abd_borrow_buf routines here since our
* completion context is interrupt and abd refcounts
* take a mutex (in debug mode).
*/
if (abd_is_linear(abd)) {
bio_ptr = abd_to_buf(abd);
dr->dr_loanbuf = NULL;
} else {
bio_ptr = zio_buf_alloc(zio->io_size);
dr->dr_loanbuf = bio_ptr;
if (zio->io_type != ZIO_TYPE_READ)
abd_copy_to_buf(bio_ptr, abd, zio->io_size);
}
} else {
bio_ptr = kbuf_ptr;
dr->dr_loanbuf = NULL;
}
bio_offset = kbuf_offset;
bio_size = kbuf_size;
abd_offset = 0;
bio_offset = io_offset;
bio_size = io_size;
for (i = 0; i <= dr->dr_bio_count; i++) {
/* Finished constructing bio's for given buffer */
@ -597,8 +568,6 @@ retry:
* are needed we allocate a larger dio and warn the user.
*/
if (dr->dr_bio_count == i) {
if (dr->dr_loanbuf)
zio_buf_free(dr->dr_loanbuf, zio->io_size);
vdev_disk_dio_free(dr);
bio_count *= 2;
goto retry;
@ -606,10 +575,9 @@ retry:
/* bio_alloc() with __GFP_WAIT never returns NULL */
dr->dr_bio[i] = bio_alloc(GFP_NOIO,
MIN(bio_nr_pages(bio_ptr, bio_size), BIO_MAX_PAGES));
MIN(abd_nr_pages_off(zio->io_abd, bio_size, abd_offset),
BIO_MAX_PAGES));
if (unlikely(dr->dr_bio[i] == NULL)) {
if (dr->dr_loanbuf)
zio_buf_free(dr->dr_loanbuf, zio->io_size);
vdev_disk_dio_free(dr);
return (ENOMEM);
}
@ -624,10 +592,11 @@ retry:
bio_set_op_attrs(dr->dr_bio[i], rw, flags);
/* Remaining size is returned to become the new size */
bio_size = bio_map(dr->dr_bio[i], bio_ptr, bio_size);
bio_size = bio_map_abd_off(dr->dr_bio[i], zio->io_abd,
bio_size, abd_offset);
/* Advance in buffer and construct another bio if needed */
bio_ptr += BIO_BI_SIZE(dr->dr_bio[i]);
abd_offset += BIO_BI_SIZE(dr->dr_bio[i]);
bio_offset += BIO_BI_SIZE(dr->dr_bio[i]);
}
@ -769,7 +738,7 @@ vdev_disk_io_start(zio_t *zio)
}
zio->io_target_timestamp = zio_handle_io_delay(zio);
error = __vdev_disk_physio(vd->vd_bdev, zio, NULL,
error = __vdev_disk_physio(vd->vd_bdev, zio,
zio->io_size, zio->io_offset, rw, flags);
if (error) {
zio->io_error = error;