317 lines
7.3 KiB
C
317 lines
7.3 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* The contents of this file are subject to the terms of the
|
|
* Common Development and Distribution License (the "License").
|
|
* You may not use this file except in compliance with the License.
|
|
*
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
* or https://opensource.org/licenses/CDDL-1.0.
|
|
* See the License for the specific language governing permissions
|
|
* and limitations under the License.
|
|
*
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
/*
|
|
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
|
* Use is subject to license terms.
|
|
*/
|
|
|
|
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
|
|
/* All Rights Reserved */
|
|
|
|
/*
|
|
* University Copyright- Copyright (c) 1982, 1986, 1988
|
|
* The Regents of the University of California
|
|
* All Rights Reserved
|
|
*
|
|
* University Acknowledgment- Portions of this document are derived from
|
|
* software developed by the University of California, Berkeley, and its
|
|
* contributors.
|
|
*/
|
|
|
|
/*
|
|
* $FreeBSD$
|
|
*/
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/uio_impl.h>
|
|
#include <sys/vnode.h>
|
|
#include <sys/zfs_znode.h>
|
|
#include <sys/byteorder.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/vm.h>
|
|
#include <vm/vm_map.h>
|
|
|
|
static void
|
|
zfs_freeuio(struct uio *uio)
|
|
{
|
|
#if __FreeBSD_version > 1500013
|
|
freeuio(uio);
|
|
#else
|
|
free(uio, M_IOV);
|
|
#endif
|
|
}
|
|
|
|
int
|
|
zfs_uiomove(void *cp, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio)
|
|
{
|
|
ASSERT3U(zfs_uio_rw(uio), ==, dir);
|
|
return (uiomove(cp, (int)n, GET_UIO_STRUCT(uio)));
|
|
}
|
|
|
|
/*
|
|
* same as zfs_uiomove() but doesn't modify uio structure.
|
|
* return in cbytes how many bytes were copied.
|
|
*/
|
|
int
|
|
zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes)
|
|
{
|
|
struct iovec small_iovec[1];
|
|
struct uio small_uio_clone;
|
|
struct uio *uio_clone;
|
|
int error;
|
|
|
|
ASSERT3U(zfs_uio_rw(uio), ==, rw);
|
|
if (zfs_uio_iovcnt(uio) == 1) {
|
|
small_uio_clone = *(GET_UIO_STRUCT(uio));
|
|
small_iovec[0] = *(GET_UIO_STRUCT(uio)->uio_iov);
|
|
small_uio_clone.uio_iov = small_iovec;
|
|
uio_clone = &small_uio_clone;
|
|
} else {
|
|
uio_clone = cloneuio(GET_UIO_STRUCT(uio));
|
|
}
|
|
|
|
error = vn_io_fault_uiomove(p, n, uio_clone);
|
|
*cbytes = zfs_uio_resid(uio) - uio_clone->uio_resid;
|
|
if (uio_clone != &small_uio_clone)
|
|
zfs_freeuio(uio_clone);
|
|
return (error);
|
|
}
|
|
|
|
/*
|
|
* Drop the next n chars out of *uiop.
|
|
*/
|
|
void
|
|
zfs_uioskip(zfs_uio_t *uio, size_t n)
|
|
{
|
|
zfs_uio_seg_t segflg;
|
|
|
|
/* For the full compatibility with illumos. */
|
|
if (n > zfs_uio_resid(uio))
|
|
return;
|
|
|
|
segflg = zfs_uio_segflg(uio);
|
|
zfs_uio_segflg(uio) = UIO_NOCOPY;
|
|
zfs_uiomove(NULL, n, zfs_uio_rw(uio), uio);
|
|
zfs_uio_segflg(uio) = segflg;
|
|
}
|
|
|
|
int
|
|
zfs_uio_fault_move(void *p, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio)
|
|
{
|
|
ASSERT3U(zfs_uio_rw(uio), ==, dir);
|
|
return (vn_io_fault_uiomove(p, n, GET_UIO_STRUCT(uio)));
|
|
}
|
|
|
|
/*
|
|
* Check if the uio is page-aligned in memory.
|
|
*/
|
|
boolean_t
|
|
zfs_uio_page_aligned(zfs_uio_t *uio)
|
|
{
|
|
const struct iovec *iov = GET_UIO_STRUCT(uio)->uio_iov;
|
|
|
|
for (int i = zfs_uio_iovcnt(uio); i > 0; iov++, i--) {
|
|
uintptr_t addr = (uintptr_t)iov->iov_base;
|
|
size_t size = iov->iov_len;
|
|
if ((addr & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
|
|
return (B_FALSE);
|
|
}
|
|
}
|
|
|
|
return (B_TRUE);
|
|
}
|
|
|
|
static void
|
|
zfs_uio_set_pages_to_stable(zfs_uio_t *uio)
|
|
{
|
|
ASSERT3P(uio->uio_dio.pages, !=, NULL);
|
|
ASSERT3S(uio->uio_dio.npages, >, 0);
|
|
|
|
for (int i = 0; i < uio->uio_dio.npages; i++) {
|
|
vm_page_t page = uio->uio_dio.pages[i];
|
|
ASSERT3P(page, !=, NULL);
|
|
|
|
MPASS(page == PHYS_TO_VM_PAGE(VM_PAGE_TO_PHYS(page)));
|
|
vm_page_busy_acquire(page, VM_ALLOC_SBUSY);
|
|
pmap_remove_write(page);
|
|
}
|
|
}
|
|
|
|
static void
|
|
zfs_uio_release_stable_pages(zfs_uio_t *uio)
|
|
{
|
|
ASSERT3P(uio->uio_dio.pages, !=, NULL);
|
|
for (int i = 0; i < uio->uio_dio.npages; i++) {
|
|
vm_page_t page = uio->uio_dio.pages[i];
|
|
|
|
ASSERT3P(page, !=, NULL);
|
|
vm_page_sunbusy(page);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If the operation is marked as read, then we are stating the pages will be
|
|
* written to and must be given write access.
|
|
*/
|
|
static int
|
|
zfs_uio_hold_pages(unsigned long start, size_t len, int nr_pages,
|
|
zfs_uio_rw_t rw, vm_page_t *pages)
|
|
{
|
|
vm_map_t map;
|
|
vm_prot_t prot;
|
|
int count;
|
|
|
|
map = &curthread->td_proc->p_vmspace->vm_map;
|
|
ASSERT3S(len, >, 0);
|
|
|
|
prot = rw == UIO_READ ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ;
|
|
count = vm_fault_quick_hold_pages(map, start, len, prot, pages,
|
|
nr_pages);
|
|
|
|
return (count);
|
|
}
|
|
|
|
void
|
|
zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
|
{
|
|
ASSERT(uio->uio_extflg & UIO_DIRECT);
|
|
ASSERT3P(uio->uio_dio.pages, !=, NULL);
|
|
ASSERT(zfs_uio_rw(uio) == rw);
|
|
|
|
if (rw == UIO_WRITE)
|
|
zfs_uio_release_stable_pages(uio);
|
|
|
|
vm_page_unhold_pages(&uio->uio_dio.pages[0],
|
|
uio->uio_dio.npages);
|
|
|
|
kmem_free(uio->uio_dio.pages,
|
|
uio->uio_dio.npages * sizeof (vm_page_t));
|
|
}
|
|
|
|
static int
|
|
zfs_uio_get_user_pages(unsigned long start, int nr_pages,
|
|
size_t len, zfs_uio_rw_t rw, vm_page_t *pages)
|
|
{
|
|
int count;
|
|
|
|
count = zfs_uio_hold_pages(start, len, nr_pages, rw, pages);
|
|
|
|
if (count != nr_pages) {
|
|
if (count > 0)
|
|
vm_page_unhold_pages(pages, count);
|
|
return (count);
|
|
}
|
|
|
|
ASSERT3S(count, ==, nr_pages);
|
|
|
|
return (count);
|
|
}
|
|
|
|
static int
|
|
zfs_uio_iov_step(struct iovec v, zfs_uio_t *uio, int *numpages)
|
|
{
|
|
unsigned long addr = (unsigned long)(v.iov_base);
|
|
size_t len = v.iov_len;
|
|
int n = DIV_ROUND_UP(len, PAGE_SIZE);
|
|
|
|
int res = zfs_uio_get_user_pages(
|
|
P2ALIGN_TYPED(addr, PAGE_SIZE, unsigned long), n, len,
|
|
zfs_uio_rw(uio), &uio->uio_dio.pages[uio->uio_dio.npages]);
|
|
|
|
if (res != n)
|
|
return (SET_ERROR(EFAULT));
|
|
|
|
ASSERT3U(len, ==, res * PAGE_SIZE);
|
|
*numpages = res;
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
zfs_uio_get_dio_pages_impl(zfs_uio_t *uio)
|
|
{
|
|
const struct iovec *iovp = GET_UIO_STRUCT(uio)->uio_iov;
|
|
size_t len = zfs_uio_resid(uio);
|
|
|
|
for (int i = 0; i < zfs_uio_iovcnt(uio); i++) {
|
|
struct iovec iov;
|
|
int numpages = 0;
|
|
|
|
if (iovp->iov_len == 0) {
|
|
iovp++;
|
|
continue;
|
|
}
|
|
iov.iov_len = MIN(len, iovp->iov_len);
|
|
iov.iov_base = iovp->iov_base;
|
|
int error = zfs_uio_iov_step(iov, uio, &numpages);
|
|
|
|
if (error)
|
|
return (error);
|
|
|
|
uio->uio_dio.npages += numpages;
|
|
len -= iov.iov_len;
|
|
iovp++;
|
|
}
|
|
|
|
ASSERT0(len);
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* This function holds user pages into the kernel. In the event that the user
|
|
* pages are not successfully held an error value is returned.
|
|
*
|
|
* On success, 0 is returned.
|
|
*/
|
|
int
|
|
zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
|
{
|
|
int error = 0;
|
|
int npages = DIV_ROUND_UP(zfs_uio_resid(uio), PAGE_SIZE);
|
|
size_t size = npages * sizeof (vm_page_t);
|
|
|
|
ASSERT(zfs_uio_rw(uio) == rw);
|
|
|
|
uio->uio_dio.pages = kmem_alloc(size, KM_SLEEP);
|
|
|
|
error = zfs_uio_get_dio_pages_impl(uio);
|
|
|
|
if (error) {
|
|
kmem_free(uio->uio_dio.pages, size);
|
|
return (error);
|
|
}
|
|
|
|
ASSERT3S(uio->uio_dio.npages, >, 0);
|
|
|
|
/*
|
|
* Since we will be writing the user pages we must make sure that
|
|
* they are stable. That way the contents of the pages can not change
|
|
* while we are doing: compression, checksumming, encryption, parity
|
|
* calculations or deduplication.
|
|
*/
|
|
if (zfs_uio_rw(uio) == UIO_WRITE)
|
|
zfs_uio_set_pages_to_stable(uio);
|
|
|
|
uio->uio_extflg |= UIO_DIRECT;
|
|
|
|
return (0);
|
|
}
|