MMAP Optimization
Enable zfs_getpage, zfs_fillpage, zfs_putpage, zfs_putapage functions. The functions have been modified to make them Linux friendly. ZFS uses these functions to read/write the mmapped pages. Using them from readpage/writepage results in clear code. The patch also adds readpages and writepages interface functions to read/write list of pages in one function call. The code change handles the first mmap optimization mentioned on https://github.com/behlendorf/zfs/issues/225 Signed-off-by: Prasad Joshi <pjoshi@stec-inc.com> Signed-off-by: Brian Behlendorf <behlendorf@llnl.gov> Issue #255
This commit is contained in:
parent
2a005961a4
commit
dde471ef5a
|
@ -71,6 +71,9 @@ extern int zfs_getsecattr(struct inode *ip, vsecattr_t *vsecp, int flag,
|
||||||
cred_t *cr);
|
cred_t *cr);
|
||||||
extern int zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag,
|
extern int zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag,
|
||||||
cred_t *cr);
|
cred_t *cr);
|
||||||
|
extern int zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages);
|
||||||
|
extern int zfs_putpage(struct page *page, struct writeback_control *wbc,
|
||||||
|
void *data);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
#include <linux/vfs_compat.h>
|
#include <linux/vfs_compat.h>
|
||||||
#include <linux/xattr_compat.h>
|
#include <linux/xattr_compat.h>
|
||||||
#include <linux/exportfs.h>
|
#include <linux/exportfs.h>
|
||||||
|
#include <linux/writeback.h>
|
||||||
|
|
||||||
/* zpl_inode.c */
|
/* zpl_inode.c */
|
||||||
extern const struct inode_operations zpl_inode_operations;
|
extern const struct inode_operations zpl_inode_operations;
|
||||||
|
|
|
@ -3725,54 +3725,32 @@ zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp,
|
||||||
pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR);
|
pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR);
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
#endif /* HAVE_MMAP */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Push a page out to disk, klustering if possible.
|
* Push a page out to disk
|
||||||
*
|
*
|
||||||
* IN: vp - file to push page to.
|
* IN: vp - file to push page to.
|
||||||
* pp - page to push.
|
* pp - page to push.
|
||||||
* flags - additional flags.
|
* off - start of range pushed.
|
||||||
* cr - credentials of caller.
|
* len - len of range pushed.
|
||||||
*
|
*
|
||||||
* OUT: offp - start of range pushed.
|
|
||||||
* lenp - len of range pushed.
|
|
||||||
*
|
*
|
||||||
* RETURN: 0 if success
|
* RETURN: 0 if success
|
||||||
* error code if failure
|
* error code if failure
|
||||||
*
|
*
|
||||||
* NOTE: callers must have locked the page to be pushed. On
|
* NOTE: callers must have locked the page to be pushed.
|
||||||
* exit, the page (and all other pages in the kluster) must be
|
|
||||||
* unlocked.
|
|
||||||
*/
|
*/
|
||||||
/* ARGSUSED */
|
/* ARGSUSED */
|
||||||
static int
|
static int
|
||||||
zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp,
|
zfs_putapage(struct inode *ip, struct page *pp, u_offset_t off, size_t len)
|
||||||
size_t *lenp, int flags, cred_t *cr)
|
|
||||||
{
|
{
|
||||||
znode_t *zp = VTOZ(vp);
|
znode_t *zp = ITOZ(ip);
|
||||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
zfs_sb_t *zsb = ITOZSB(ip);
|
||||||
dmu_tx_t *tx;
|
dmu_tx_t *tx;
|
||||||
u_offset_t off, koff;
|
caddr_t va;
|
||||||
size_t len, klen;
|
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
off = pp->p_offset;
|
|
||||||
len = PAGESIZE;
|
|
||||||
/*
|
|
||||||
* If our blocksize is bigger than the page size, try to kluster
|
|
||||||
* multiple pages so that we write a full block (thus avoiding
|
|
||||||
* a read-modify-write).
|
|
||||||
*/
|
|
||||||
if (off < zp->z_size && zp->z_blksz > PAGESIZE) {
|
|
||||||
klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE);
|
|
||||||
koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0;
|
|
||||||
ASSERT(koff <= zp->z_size);
|
|
||||||
if (koff + klen > zp->z_size)
|
|
||||||
klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE);
|
|
||||||
pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags);
|
|
||||||
}
|
|
||||||
ASSERT3U(btop(len), ==, btopr(len));
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Can't push pages past end-of-file.
|
* Can't push pages past end-of-file.
|
||||||
*/
|
*/
|
||||||
|
@ -3780,24 +3758,16 @@ zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp,
|
||||||
/* ignore all pages */
|
/* ignore all pages */
|
||||||
err = 0;
|
err = 0;
|
||||||
goto out;
|
goto out;
|
||||||
} else if (off + len > zp->z_size) {
|
} else if (off + len > zp->z_size)
|
||||||
int npages = btopr(zp->z_size - off);
|
|
||||||
page_t *trunc;
|
|
||||||
|
|
||||||
page_list_break(&pp, &trunc, npages);
|
|
||||||
/* ignore pages past end of file */
|
|
||||||
if (trunc)
|
|
||||||
pvn_write_done(trunc, flags);
|
|
||||||
len = zp->z_size - off;
|
len = zp->z_size - off;
|
||||||
}
|
|
||||||
|
|
||||||
if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
|
if (zfs_owner_overquota(zsb, zp, B_FALSE) ||
|
||||||
zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
|
zfs_owner_overquota(zsb, zp, B_TRUE)) {
|
||||||
err = EDQUOT;
|
err = EDQUOT;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
top:
|
top:
|
||||||
tx = dmu_tx_create(zfsvfs->z_os);
|
tx = dmu_tx_create(zsb->z_os);
|
||||||
dmu_tx_hold_write(tx, zp->z_id, off, len);
|
dmu_tx_hold_write(tx, zp->z_id, off, len);
|
||||||
|
|
||||||
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
|
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
|
||||||
|
@ -3813,52 +3783,38 @@ top:
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (zp->z_blksz <= PAGESIZE) {
|
va = kmap(pp);
|
||||||
caddr_t va = zfs_map_page(pp, S_READ);
|
|
||||||
ASSERT3U(len, <=, PAGESIZE);
|
ASSERT3U(len, <=, PAGESIZE);
|
||||||
dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx);
|
dmu_write(zsb->z_os, zp->z_id, off, len, va, tx);
|
||||||
zfs_unmap_page(pp, va);
|
kunmap(pp);
|
||||||
} else {
|
|
||||||
err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (err == 0) {
|
if (err == 0) {
|
||||||
uint64_t mtime[2], ctime[2];
|
uint64_t mtime[2], ctime[2];
|
||||||
sa_bulk_attr_t bulk[3];
|
sa_bulk_attr_t bulk[3];
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
|
||||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
|
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL,
|
||||||
&mtime, 16);
|
&mtime, 16);
|
||||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
|
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
|
||||||
&ctime, 16);
|
&ctime, 16);
|
||||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
|
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
|
||||||
&zp->z_pflags, 8);
|
&zp->z_pflags, 8);
|
||||||
zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
|
zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
|
||||||
B_TRUE);
|
B_TRUE);
|
||||||
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0);
|
zfs_log_write(zsb->z_log, tx, TX_WRITE, zp, off, len, 0);
|
||||||
}
|
}
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
pvn_write_done(pp, (err ? B_ERROR : 0) | flags);
|
|
||||||
if (offp)
|
|
||||||
*offp = off;
|
|
||||||
if (lenp)
|
|
||||||
*lenp = len;
|
|
||||||
|
|
||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copy the portion of the file indicated from pages into the file.
|
* Copy the portion of the file indicated from page into the file.
|
||||||
* The pages are stored in a page list attached to the files vnode.
|
|
||||||
*
|
*
|
||||||
* IN: vp - vnode of file to push page data to.
|
* IN: ip - inode of file to push page data to.
|
||||||
* off - position in file to put data.
|
* wbc - Unused parameter
|
||||||
* len - amount of data to write.
|
* data - pointer to address_space
|
||||||
* flags - flags to control the operation.
|
|
||||||
* cr - credentials of caller.
|
|
||||||
* ct - caller context.
|
|
||||||
*
|
*
|
||||||
* RETURN: 0 if success
|
* RETURN: 0 if success
|
||||||
* error code if failure
|
* error code if failure
|
||||||
|
@ -3867,87 +3823,45 @@ out:
|
||||||
* vp - ctime|mtime updated
|
* vp - ctime|mtime updated
|
||||||
*/
|
*/
|
||||||
/*ARGSUSED*/
|
/*ARGSUSED*/
|
||||||
static int
|
int
|
||||||
zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr)
|
zfs_putpage(struct page *page, struct writeback_control *wbc, void *data)
|
||||||
{
|
{
|
||||||
znode_t *zp = VTOZ(vp);
|
struct address_space *mapping = data;
|
||||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
struct inode *ip = mapping->host;
|
||||||
page_t *pp;
|
znode_t *zp = ITOZ(ip);
|
||||||
size_t io_len;
|
zfs_sb_t *zsb = ITOZSB(ip);
|
||||||
u_offset_t io_off;
|
|
||||||
uint_t blksz;
|
|
||||||
rl_t *rl;
|
rl_t *rl;
|
||||||
int error = 0;
|
u_offset_t io_off;
|
||||||
|
size_t io_len;
|
||||||
|
size_t len;
|
||||||
|
int error;
|
||||||
|
|
||||||
ZFS_ENTER(zfsvfs);
|
io_off = page_offset(page);
|
||||||
|
io_len = PAGESIZE;
|
||||||
|
|
||||||
|
ZFS_ENTER(zsb);
|
||||||
ZFS_VERIFY_ZP(zp);
|
ZFS_VERIFY_ZP(zp);
|
||||||
|
|
||||||
/*
|
|
||||||
* Align this request to the file block size in case we kluster.
|
|
||||||
* XXX - this can result in pretty aggresive locking, which can
|
|
||||||
* impact simultanious read/write access. One option might be
|
|
||||||
* to break up long requests (len == 0) into block-by-block
|
|
||||||
* operations to get narrower locking.
|
|
||||||
*/
|
|
||||||
blksz = zp->z_blksz;
|
|
||||||
if (ISP2(blksz))
|
|
||||||
io_off = P2ALIGN_TYPED(off, blksz, u_offset_t);
|
|
||||||
else
|
|
||||||
io_off = 0;
|
|
||||||
if (len > 0 && ISP2(blksz))
|
|
||||||
io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t);
|
|
||||||
else
|
|
||||||
io_len = 0;
|
|
||||||
|
|
||||||
if (io_len == 0) {
|
|
||||||
/*
|
|
||||||
* Search the entire vp list for pages >= io_off.
|
|
||||||
*/
|
|
||||||
rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER);
|
|
||||||
error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER);
|
rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER);
|
||||||
|
|
||||||
if (off > zp->z_size) {
|
if (io_off > zp->z_size) {
|
||||||
/* past end of file */
|
/* past end of file */
|
||||||
zfs_range_unlock(rl);
|
zfs_range_unlock(rl);
|
||||||
ZFS_EXIT(zfsvfs);
|
ZFS_EXIT(zsb);
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off);
|
len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off);
|
||||||
|
|
||||||
for (off = io_off; io_off < off + len; io_off += io_len) {
|
error = zfs_putapage(ip, page, io_off, len);
|
||||||
if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
|
|
||||||
pp = page_lookup(vp, io_off,
|
|
||||||
(flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED);
|
|
||||||
} else {
|
|
||||||
pp = page_lookup_nowait(vp, io_off,
|
|
||||||
(flags & B_FREE) ? SE_EXCL : SE_SHARED);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pp != NULL && pvn_getdirty(pp, flags)) {
|
|
||||||
int err;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Found a dirty page to push
|
|
||||||
*/
|
|
||||||
err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr);
|
|
||||||
if (err)
|
|
||||||
error = err;
|
|
||||||
} else {
|
|
||||||
io_len = PAGESIZE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
out:
|
|
||||||
zfs_range_unlock(rl);
|
zfs_range_unlock(rl);
|
||||||
if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
|
|
||||||
zil_commit(zfsvfs->z_log, zp->z_id);
|
if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
|
||||||
ZFS_EXIT(zfsvfs);
|
zil_commit(zsb->z_log, zp->z_id);
|
||||||
|
ZFS_EXIT(zsb);
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
#endif /* HAVE_MMAP */
|
EXPORT_SYMBOL(zfs_putpage);
|
||||||
|
|
||||||
/*ARGSUSED*/
|
/*ARGSUSED*/
|
||||||
void
|
void
|
||||||
|
@ -4039,102 +3953,62 @@ zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset,
|
||||||
ZFS_EXIT(zfsvfs);
|
ZFS_EXIT(zfsvfs);
|
||||||
return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
|
return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
|
||||||
}
|
}
|
||||||
|
#endif /* HAVE_MMAP */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we can't find a page in the cache, we will create a new page
|
* Fill pages with data from the disk.
|
||||||
* and fill it with file data. For efficiency, we may try to fill
|
|
||||||
* multiple pages at once (klustering) to fill up the supplied page
|
|
||||||
* list. Note that the pages to be filled are held with an exclusive
|
|
||||||
* lock to prevent access by other threads while they are being filled.
|
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg,
|
zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
|
||||||
caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw)
|
|
||||||
{
|
{
|
||||||
znode_t *zp = VTOZ(vp);
|
znode_t *zp = ITOZ(ip);
|
||||||
page_t *pp, *cur_pp;
|
zfs_sb_t *zsb = ITOZSB(ip);
|
||||||
objset_t *os = zp->z_zfsvfs->z_os;
|
objset_t *os;
|
||||||
|
struct page *cur_pp;
|
||||||
u_offset_t io_off, total;
|
u_offset_t io_off, total;
|
||||||
size_t io_len;
|
size_t io_len;
|
||||||
|
loff_t i_size;
|
||||||
|
unsigned page_idx;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) {
|
os = zsb->z_os;
|
||||||
/*
|
io_len = nr_pages << PAGE_CACHE_SHIFT;
|
||||||
* We only have a single page, don't bother klustering
|
i_size = i_size_read(ip);
|
||||||
*/
|
io_off = page_offset(pl[0]);
|
||||||
io_off = off;
|
|
||||||
io_len = PAGESIZE;
|
if (io_off + io_len > i_size)
|
||||||
pp = page_create_va(vp, io_off, io_len,
|
io_len = i_size - io_off;
|
||||||
PG_EXCL | PG_WAIT, seg, addr);
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* Try to find enough pages to fill the page list
|
|
||||||
*/
|
|
||||||
pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
|
|
||||||
&io_len, off, plsz, 0);
|
|
||||||
}
|
|
||||||
if (pp == NULL) {
|
|
||||||
/*
|
|
||||||
* The page already exists, nothing to do here.
|
|
||||||
*/
|
|
||||||
*pl = NULL;
|
|
||||||
return (0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fill the pages in the kluster.
|
* Iterate over list of pages and read each page individually.
|
||||||
*/
|
*/
|
||||||
cur_pp = pp;
|
page_idx = 0;
|
||||||
|
cur_pp = pl[0];
|
||||||
for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) {
|
for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) {
|
||||||
caddr_t va;
|
caddr_t va;
|
||||||
|
|
||||||
ASSERT3U(io_off, ==, cur_pp->p_offset);
|
va = kmap(cur_pp);
|
||||||
va = zfs_map_page(cur_pp, S_WRITE);
|
|
||||||
err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va,
|
err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va,
|
||||||
DMU_READ_PREFETCH);
|
DMU_READ_PREFETCH);
|
||||||
zfs_unmap_page(cur_pp, va);
|
kunmap(cur_pp);
|
||||||
if (err) {
|
if (err) {
|
||||||
/* On error, toss the entire kluster */
|
|
||||||
pvn_read_done(pp, B_ERROR);
|
|
||||||
/* convert checksum errors into IO errors */
|
/* convert checksum errors into IO errors */
|
||||||
if (err == ECKSUM)
|
if (err == ECKSUM)
|
||||||
err = EIO;
|
err = EIO;
|
||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
cur_pp = cur_pp->p_next;
|
cur_pp = pl[++page_idx];
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Fill in the page list array from the kluster starting
|
|
||||||
* from the desired offset `off'.
|
|
||||||
* NOTE: the page list will always be null terminated.
|
|
||||||
*/
|
|
||||||
pvn_plist_init(pp, pl, plsz, off, io_len, rw);
|
|
||||||
ASSERT(pl == NULL || (*pl)->p_offset == off);
|
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return pointers to the pages for the file region [off, off + len]
|
* Uses zfs_fillpage to read data from the file and fill the pages.
|
||||||
* in the pl array. If plsz is greater than len, this function may
|
|
||||||
* also return page pointers from after the specified region
|
|
||||||
* (i.e. the region [off, off + plsz]). These additional pages are
|
|
||||||
* only returned if they are already in the cache, or were created as
|
|
||||||
* part of a klustered read.
|
|
||||||
*
|
*
|
||||||
* IN: vp - vnode of file to get data from.
|
* IN: ip - inode of file to get data from.
|
||||||
* off - position in file to get data from.
|
* pl - list of pages to read
|
||||||
* len - amount of data to retrieve.
|
* nr_pages - number of pages to read
|
||||||
* plsz - length of provided page list.
|
|
||||||
* seg - segment to obtain pages for.
|
|
||||||
* addr - virtual address of fault.
|
|
||||||
* rw - mode of created pages.
|
|
||||||
* cr - credentials of caller.
|
|
||||||
* ct - caller context.
|
|
||||||
*
|
|
||||||
* OUT: protp - protection mode of created pages.
|
|
||||||
* pl - list of pages created.
|
|
||||||
*
|
*
|
||||||
* RETURN: 0 if success
|
* RETURN: 0 if success
|
||||||
* error code if failure
|
* error code if failure
|
||||||
|
@ -4143,80 +4017,30 @@ zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg,
|
||||||
* vp - atime updated
|
* vp - atime updated
|
||||||
*/
|
*/
|
||||||
/* ARGSUSED */
|
/* ARGSUSED */
|
||||||
static int
|
int
|
||||||
zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
|
zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages)
|
||||||
page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
|
|
||||||
enum seg_rw rw, cred_t *cr)
|
|
||||||
{
|
{
|
||||||
znode_t *zp = VTOZ(vp);
|
znode_t *zp = ITOZ(ip);
|
||||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
zfs_sb_t *zsb = ITOZSB(ip);
|
||||||
page_t **pl0 = pl;
|
int err;
|
||||||
int err = 0;
|
|
||||||
|
|
||||||
/* we do our own caching, faultahead is unnecessary */
|
|
||||||
if (pl == NULL)
|
if (pl == NULL)
|
||||||
return (0);
|
return (0);
|
||||||
else if (len > plsz)
|
|
||||||
len = plsz;
|
|
||||||
else
|
|
||||||
len = P2ROUNDUP(len, PAGESIZE);
|
|
||||||
ASSERT(plsz >= len);
|
|
||||||
|
|
||||||
ZFS_ENTER(zfsvfs);
|
ZFS_ENTER(zsb);
|
||||||
ZFS_VERIFY_ZP(zp);
|
ZFS_VERIFY_ZP(zp);
|
||||||
|
|
||||||
if (protp)
|
err = zfs_fillpage(ip, pl, nr_pages);
|
||||||
*protp = PROT_ALL;
|
|
||||||
|
|
||||||
/*
|
if (!err)
|
||||||
* Loop through the requested range [off, off + len) looking
|
ZFS_ACCESSTIME_STAMP(zsb, zp);
|
||||||
* for pages. If we don't find a page, we will need to create
|
|
||||||
* a new page and fill it with data from the file.
|
|
||||||
*/
|
|
||||||
while (len > 0) {
|
|
||||||
if (*pl = page_lookup(vp, off, SE_SHARED))
|
|
||||||
*(pl+1) = NULL;
|
|
||||||
else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw))
|
|
||||||
goto out;
|
|
||||||
while (*pl) {
|
|
||||||
ASSERT3U((*pl)->p_offset, ==, off);
|
|
||||||
off += PAGESIZE;
|
|
||||||
addr += PAGESIZE;
|
|
||||||
if (len > 0) {
|
|
||||||
ASSERT3U(len, >=, PAGESIZE);
|
|
||||||
len -= PAGESIZE;
|
|
||||||
}
|
|
||||||
ASSERT3U(plsz, >=, PAGESIZE);
|
|
||||||
plsz -= PAGESIZE;
|
|
||||||
pl++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
ZFS_EXIT(zsb);
|
||||||
* Fill out the page array with any pages already in the cache.
|
|
||||||
*/
|
|
||||||
while (plsz > 0 &&
|
|
||||||
(*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) {
|
|
||||||
off += PAGESIZE;
|
|
||||||
plsz -= PAGESIZE;
|
|
||||||
}
|
|
||||||
out:
|
|
||||||
if (err) {
|
|
||||||
/*
|
|
||||||
* Release any pages we have previously locked.
|
|
||||||
*/
|
|
||||||
while (pl > pl0)
|
|
||||||
page_unlock(*--pl);
|
|
||||||
} else {
|
|
||||||
ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
|
|
||||||
}
|
|
||||||
|
|
||||||
*pl = NULL;
|
|
||||||
|
|
||||||
ZFS_EXIT(zfsvfs);
|
|
||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(zfs_getpage);
|
||||||
|
|
||||||
|
#ifdef HAVE_MMAP
|
||||||
/*
|
/*
|
||||||
* Request a memory map for a section of a file. This code interacts
|
* Request a memory map for a section of a file. This code interacts
|
||||||
* with common code and the VM system as follows:
|
* with common code and the VM system as follows:
|
||||||
|
|
|
@ -254,6 +254,60 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct page **
|
||||||
|
pages_vector_from_list(struct list_head *pages, unsigned nr_pages)
|
||||||
|
{
|
||||||
|
struct page **pl;
|
||||||
|
struct page *t;
|
||||||
|
unsigned page_idx;
|
||||||
|
|
||||||
|
pl = kmalloc(sizeof(*pl) * nr_pages, GFP_NOFS);
|
||||||
|
if (!pl)
|
||||||
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
|
page_idx = 0;
|
||||||
|
list_for_each_entry_reverse(t, pages, lru) {
|
||||||
|
pl[page_idx] = t;
|
||||||
|
page_idx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return pl;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
zpl_readpages(struct file *file, struct address_space *mapping,
|
||||||
|
struct list_head *pages, unsigned nr_pages)
|
||||||
|
{
|
||||||
|
struct inode *ip;
|
||||||
|
struct page **pl;
|
||||||
|
struct page *p, *n;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
ip = mapping->host;
|
||||||
|
|
||||||
|
pl = pages_vector_from_list(pages, nr_pages);
|
||||||
|
if (IS_ERR(pl))
|
||||||
|
return PTR_ERR(pl);
|
||||||
|
|
||||||
|
error = -zfs_getpage(ip, pl, nr_pages);
|
||||||
|
if (error)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
list_for_each_entry_safe_reverse(p, n, pages, lru) {
|
||||||
|
|
||||||
|
list_del(&p->lru);
|
||||||
|
|
||||||
|
flush_dcache_page(p);
|
||||||
|
SetPageUptodate(p);
|
||||||
|
unlock_page(p);
|
||||||
|
page_cache_release(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
error:
|
||||||
|
kfree(pl);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Populate a page with data for the Linux page cache. This function is
|
* Populate a page with data for the Linux page cache. This function is
|
||||||
* only used to support mmap(2). There will be an identical copy of the
|
* only used to support mmap(2). There will be an identical copy of the
|
||||||
|
@ -267,33 +321,14 @@ static int
|
||||||
zpl_readpage(struct file *filp, struct page *pp)
|
zpl_readpage(struct file *filp, struct page *pp)
|
||||||
{
|
{
|
||||||
struct inode *ip;
|
struct inode *ip;
|
||||||
loff_t off, i_size;
|
struct page *pl[1];
|
||||||
size_t len, wrote;
|
|
||||||
cred_t *cr = CRED();
|
|
||||||
void *pb;
|
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
|
||||||
ASSERT(PageLocked(pp));
|
ASSERT(PageLocked(pp));
|
||||||
ip = pp->mapping->host;
|
ip = pp->mapping->host;
|
||||||
off = page_offset(pp);
|
pl[0] = pp;
|
||||||
i_size = i_size_read(ip);
|
|
||||||
ASSERT3S(off, <, i_size);
|
|
||||||
|
|
||||||
crhold(cr);
|
error = -zfs_getpage(ip, pl, 1);
|
||||||
len = MIN(PAGE_CACHE_SIZE, i_size - off);
|
|
||||||
|
|
||||||
pb = kmap(pp);
|
|
||||||
|
|
||||||
/* O_DIRECT is passed to bypass the page cache and avoid deadlock. */
|
|
||||||
wrote = zpl_read_common(ip, pb, len, off, UIO_SYSSPACE, O_DIRECT, cr);
|
|
||||||
if (wrote != len)
|
|
||||||
error = -EIO;
|
|
||||||
|
|
||||||
if (!error && (len < PAGE_CACHE_SIZE))
|
|
||||||
memset(pb + len, 0, PAGE_CACHE_SIZE - len);
|
|
||||||
|
|
||||||
kunmap(pp);
|
|
||||||
crfree(cr);
|
|
||||||
|
|
||||||
if (error) {
|
if (error) {
|
||||||
SetPageError(pp);
|
SetPageError(pp);
|
||||||
|
@ -305,47 +340,15 @@ zpl_readpage(struct file *filp, struct page *pp)
|
||||||
}
|
}
|
||||||
|
|
||||||
unlock_page(pp);
|
unlock_page(pp);
|
||||||
|
return error;
|
||||||
return (error);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
int
|
||||||
* Write out dirty pages to the ARC, this function is only required to
|
zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
|
||||||
* support mmap(2). Mapped pages may be dirtied by memory operations
|
|
||||||
* which never call .write(). These dirty pages are kept in sync with
|
|
||||||
* the ARC buffers via this hook.
|
|
||||||
*
|
|
||||||
* Currently this function relies on zpl_write_common() and the O_DIRECT
|
|
||||||
* flag to push out the page. This works but the more correct way is
|
|
||||||
* to update zfs_putapage() to be Linux friendly and use that interface.
|
|
||||||
*/
|
|
||||||
static int
|
|
||||||
zpl_writepage(struct page *pp, struct writeback_control *wbc)
|
|
||||||
{
|
{
|
||||||
struct inode *ip;
|
int error;
|
||||||
loff_t off, i_size;
|
|
||||||
size_t len, read;
|
|
||||||
cred_t *cr = CRED();
|
|
||||||
void *pb;
|
|
||||||
int error = 0;
|
|
||||||
|
|
||||||
ASSERT(PageLocked(pp));
|
error = -zfs_putpage(pp, wbc, data);
|
||||||
ip = pp->mapping->host;
|
|
||||||
off = page_offset(pp);
|
|
||||||
i_size = i_size_read(ip);
|
|
||||||
|
|
||||||
crhold(cr);
|
|
||||||
len = MIN(PAGE_CACHE_SIZE, i_size - off);
|
|
||||||
|
|
||||||
pb = kmap(pp);
|
|
||||||
|
|
||||||
/* O_DIRECT is passed to bypass the page cache and avoid deadlock. */
|
|
||||||
read = zpl_write_common(ip, pb, len, off, UIO_SYSSPACE, O_DIRECT, cr);
|
|
||||||
if (read != len)
|
|
||||||
error = -EIO;
|
|
||||||
|
|
||||||
kunmap(pp);
|
|
||||||
crfree(cr);
|
|
||||||
|
|
||||||
if (error) {
|
if (error) {
|
||||||
SetPageError(pp);
|
SetPageError(pp);
|
||||||
|
@ -353,16 +356,36 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc)
|
||||||
} else {
|
} else {
|
||||||
ClearPageError(pp);
|
ClearPageError(pp);
|
||||||
SetPageUptodate(pp);
|
SetPageUptodate(pp);
|
||||||
|
flush_dcache_page(pp);
|
||||||
}
|
}
|
||||||
|
|
||||||
unlock_page(pp);
|
unlock_page(pp);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
return (error);
|
static int
|
||||||
|
zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
|
||||||
|
{
|
||||||
|
return write_cache_pages(mapping, wbc, zpl_putpage, mapping);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Write out dirty pages to the ARC, this function is only required to
|
||||||
|
* support mmap(2). Mapped pages may be dirtied by memory operations
|
||||||
|
* which never call .write(). These dirty pages are kept in sync with
|
||||||
|
* the ARC buffers via this hook.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
zpl_writepage(struct page *pp, struct writeback_control *wbc)
|
||||||
|
{
|
||||||
|
return zpl_putpage(pp, wbc, pp->mapping);
|
||||||
}
|
}
|
||||||
|
|
||||||
const struct address_space_operations zpl_address_space_operations = {
|
const struct address_space_operations zpl_address_space_operations = {
|
||||||
|
.readpages = zpl_readpages,
|
||||||
.readpage = zpl_readpage,
|
.readpage = zpl_readpage,
|
||||||
.writepage = zpl_writepage,
|
.writepage = zpl_writepage,
|
||||||
|
.writepages = zpl_writepages,
|
||||||
};
|
};
|
||||||
|
|
||||||
const struct file_operations zpl_file_operations = {
|
const struct file_operations zpl_file_operations = {
|
||||||
|
|
Loading…
Reference in New Issue