Remove UIO_ZEROCOPY functions structures

The original xuio zero copy functionality has always been unused 
on Linux and FreeBSD.  Remove this disabled code to avoid any
confusion and improve readability.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Matt Macy <mmacy@FreeBSD.org>
Closes #11124
This commit is contained in:
Matthew Macy 2020-10-30 10:00:33 -07:00 committed by Tony Hutter
parent 3fcf17e69d
commit 6f59f6402d
9 changed files with 0 additions and 454 deletions

View File

@ -58,7 +58,6 @@ SECTION_PATHS = {'arc': 'arcstats',
'dmu': 'dmu_tx', 'dmu': 'dmu_tx',
'l2arc': 'arcstats', # L2ARC stuff lives in arcstats 'l2arc': 'arcstats', # L2ARC stuff lives in arcstats
'vdev': 'vdev_cache_stats', 'vdev': 'vdev_cache_stats',
'xuio': 'xuio_stats',
'zfetch': 'zfetchstats', 'zfetch': 'zfetchstats',
'zil': 'zil'} 'zil': 'zil'}

View File

@ -43,27 +43,6 @@ typedef struct uio uio_t;
typedef struct iovec iovec_t; typedef struct iovec iovec_t;
typedef enum uio_seg uio_seg_t; typedef enum uio_seg uio_seg_t;
typedef enum xuio_type {
UIOTYPE_ASYNCIO,
UIOTYPE_ZEROCOPY
} xuio_type_t;
typedef struct xuio {
uio_t xu_uio;
/* Extended uio fields */
enum xuio_type xu_type; /* What kind of uio structure? */
union {
struct {
int xu_zc_rw;
void *xu_zc_priv;
} xu_zc;
} xu_ext;
} xuio_t;
#define XUIO_XUZC_PRIV(xuio) xuio->xu_ext.xu_zc.xu_zc_priv
#define XUIO_XUZC_RW(xuio) xuio->xu_ext.xu_zc.xu_zc_rw
static __inline int static __inline int
zfs_uiomove(void *cp, size_t n, enum uio_rw dir, uio_t *uio) zfs_uiomove(void *cp, size_t n, enum uio_rw dir, uio_t *uio)
{ {

View File

@ -68,49 +68,6 @@ typedef struct uio {
size_t uio_skip; size_t uio_skip;
} uio_t; } uio_t;
typedef struct aio_req {
uio_t *aio_uio;
void *aio_private;
} aio_req_t;
typedef enum xuio_type {
UIOTYPE_ASYNCIO,
UIOTYPE_ZEROCOPY,
} xuio_type_t;
#define UIOA_IOV_MAX 16
typedef struct uioa_page_s {
int uioa_pfncnt;
void **uioa_ppp;
caddr_t uioa_base;
size_t uioa_len;
} uioa_page_t;
typedef struct xuio {
uio_t xu_uio;
enum xuio_type xu_type;
union {
struct {
uint32_t xu_a_state;
ssize_t xu_a_mbytes;
uioa_page_t *xu_a_lcur;
void **xu_a_lppp;
void *xu_a_hwst[4];
uioa_page_t xu_a_locked[UIOA_IOV_MAX];
} xu_aio;
struct {
int xu_zc_rw;
void *xu_zc_priv;
} xu_zc;
} xu_ext;
} xuio_t;
#define XUIO_XUZC_PRIV(xuio) xuio->xu_ext.xu_zc.xu_zc_priv
#define XUIO_XUZC_RW(xuio) xuio->xu_ext.xu_zc.xu_zc_rw
#define uio_segflg(uio) (uio)->uio_segflg #define uio_segflg(uio) (uio)->uio_segflg
#define uio_offset(uio) (uio)->uio_loffset #define uio_offset(uio) (uio)->uio_loffset
#define uio_resid(uio) (uio)->uio_resid #define uio_resid(uio) (uio)->uio_resid

View File

@ -864,18 +864,6 @@ int dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset,
int dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset, int dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset,
struct arc_buf *buf, dmu_tx_t *tx); struct arc_buf *buf, dmu_tx_t *tx);
#define dmu_assign_arcbuf dmu_assign_arcbuf_by_dbuf #define dmu_assign_arcbuf dmu_assign_arcbuf_by_dbuf
#ifdef HAVE_UIO_ZEROCOPY
int dmu_xuio_init(struct xuio *uio, int niov);
void dmu_xuio_fini(struct xuio *uio);
int dmu_xuio_add(struct xuio *uio, struct arc_buf *abuf, offset_t off,
size_t n);
int dmu_xuio_cnt(struct xuio *uio);
struct arc_buf *dmu_xuio_arcbuf(struct xuio *uio, int i);
void dmu_xuio_clear(struct xuio *uio, int i);
#endif /* HAVE_UIO_ZEROCOPY */
void xuio_stat_wbuf_copied(void);
void xuio_stat_wbuf_nocopy(void);
extern int zfs_prefetch_disable; extern int zfs_prefetch_disable;
extern int zfs_max_recordsize; extern int zfs_max_recordsize;

View File

@ -237,13 +237,6 @@ extern "C" {
struct objset; struct objset;
struct dmu_pool; struct dmu_pool;
typedef struct dmu_xuio {
int next;
int cnt;
struct arc_buf **bufs;
iovec_t *iovp;
} dmu_xuio_t;
typedef struct dmu_sendstatus { typedef struct dmu_sendstatus {
list_node_t dss_link; list_node_t dss_link;
int dss_outfd; int dss_outfd;

View File

@ -75,45 +75,6 @@ typedef struct uio {
ssize_t uio_resid; /* residual count */ ssize_t uio_resid; /* residual count */
} uio_t; } uio_t;
typedef enum xuio_type {
UIOTYPE_ASYNCIO,
UIOTYPE_ZEROCOPY,
} xuio_type_t;
#define UIOA_IOV_MAX 16
typedef struct uioa_page_s { /* locked uio_iov state */
int uioa_pfncnt; /* count of pfn_t(s) in *uioa_ppp */
void **uioa_ppp; /* page_t or pfn_t array */
caddr_t uioa_base; /* address base */
size_t uioa_len; /* span length */
} uioa_page_t;
typedef struct xuio {
uio_t xu_uio; /* embedded UIO structure */
/* Extended uio fields */
enum xuio_type xu_type; /* uio type */
union {
struct {
uint32_t xu_a_state; /* state of async i/o */
ssize_t xu_a_mbytes; /* bytes moved */
uioa_page_t *xu_a_lcur; /* uioa_locked[] pointer */
void **xu_a_lppp; /* lcur->uioa_pppp[] pointer */
void *xu_a_hwst[4]; /* opaque hardware state */
uioa_page_t xu_a_locked[UIOA_IOV_MAX];
} xu_aio;
struct {
int xu_zc_rw; /* read or write buffer */
void *xu_zc_priv; /* fs specific */
} xu_zc;
} xu_ext;
} xuio_t;
#define XUIO_XUZC_PRIV(xuio) xuio->xu_ext.xu_zc.xu_zc_priv
#define XUIO_XUZC_RW(xuio) xuio->xu_ext.xu_zc.xu_zc_rw
#define uio_segflg(uio) (uio)->uio_segflg #define uio_segflg(uio) (uio)->uio_segflg
#define uio_offset(uio) (uio)->uio_loffset #define uio_offset(uio) (uio)->uio_loffset
#define uio_resid(uio) (uio)->uio_resid #define uio_resid(uio) (uio)->uio_resid

View File

@ -3978,164 +3978,6 @@ zfs_fid(struct inode *ip, fid_t *fidp)
return (0); return (0);
} }
#ifdef HAVE_UIO_ZEROCOPY
/*
* The smallest read we may consider to loan out an arcbuf.
* This must be a power of 2.
*/
int zcr_blksz_min = (1 << 10); /* 1K */
/*
* If set to less than the file block size, allow loaning out of an
* arcbuf for a partial block read. This must be a power of 2.
*/
int zcr_blksz_max = (1 << 17); /* 128K */
/*ARGSUSED*/
static int
zfs_reqzcbuf(struct inode *ip, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr)
{
znode_t *zp = ITOZ(ip);
zfsvfs_t *zfsvfs = ITOZSB(ip);
int max_blksz = zfsvfs->z_max_blksz;
uio_t *uio = &xuio->xu_uio;
ssize_t size = uio->uio_resid;
offset_t offset = uio->uio_loffset;
int blksz;
int fullblk, i;
arc_buf_t *abuf;
ssize_t maxsize;
int preamble, postamble;
if (xuio->xu_type != UIOTYPE_ZEROCOPY)
return (SET_ERROR(EINVAL));
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
switch (ioflag) {
case UIO_WRITE:
/*
* Loan out an arc_buf for write if write size is bigger than
* max_blksz, and the file's block size is also max_blksz.
*/
blksz = max_blksz;
if (size < blksz || zp->z_blksz != blksz) {
ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
/*
* Caller requests buffers for write before knowing where the
* write offset might be (e.g. NFS TCP write).
*/
if (offset == -1) {
preamble = 0;
} else {
preamble = P2PHASE(offset, blksz);
if (preamble) {
preamble = blksz - preamble;
size -= preamble;
}
}
postamble = P2PHASE(size, blksz);
size -= postamble;
fullblk = size / blksz;
(void) dmu_xuio_init(xuio,
(preamble != 0) + fullblk + (postamble != 0));
/*
* Have to fix iov base/len for partial buffers. They
* currently represent full arc_buf's.
*/
if (preamble) {
/* data begins in the middle of the arc_buf */
abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
blksz);
ASSERT(abuf);
(void) dmu_xuio_add(xuio, abuf,
blksz - preamble, preamble);
}
for (i = 0; i < fullblk; i++) {
abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
blksz);
ASSERT(abuf);
(void) dmu_xuio_add(xuio, abuf, 0, blksz);
}
if (postamble) {
/* data ends in the middle of the arc_buf */
abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
blksz);
ASSERT(abuf);
(void) dmu_xuio_add(xuio, abuf, 0, postamble);
}
break;
case UIO_READ:
/*
* Loan out an arc_buf for read if the read size is larger than
* the current file block size. Block alignment is not
* considered. Partial arc_buf will be loaned out for read.
*/
blksz = zp->z_blksz;
if (blksz < zcr_blksz_min)
blksz = zcr_blksz_min;
if (blksz > zcr_blksz_max)
blksz = zcr_blksz_max;
/* avoid potential complexity of dealing with it */
if (blksz > max_blksz) {
ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
maxsize = zp->z_size - uio->uio_loffset;
if (size > maxsize)
size = maxsize;
if (size < blksz) {
ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
break;
default:
ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
uio->uio_extflg = UIO_XUIO;
XUIO_XUZC_RW(xuio) = ioflag;
ZFS_EXIT(zfsvfs);
return (0);
}
/*ARGSUSED*/
static int
zfs_retzcbuf(struct inode *ip, xuio_t *xuio, cred_t *cr)
{
int i;
arc_buf_t *abuf;
int ioflag = XUIO_XUZC_RW(xuio);
ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY);
i = dmu_xuio_cnt(xuio);
while (i-- > 0) {
abuf = dmu_xuio_arcbuf(xuio, i);
/*
* if abuf == NULL, it must be a write buffer
* that has been returned in zfs_write().
*/
if (abuf)
dmu_return_arcbuf(abuf);
ASSERT(abuf || ioflag == UIO_WRITE);
}
dmu_xuio_fini(xuio);
return (0);
}
#endif /* HAVE_UIO_ZEROCOPY */
#if defined(_KERNEL) #if defined(_KERNEL)
EXPORT_SYMBOL(zfs_open); EXPORT_SYMBOL(zfs_open);
EXPORT_SYMBOL(zfs_close); EXPORT_SYMBOL(zfs_close);

View File

@ -2617,11 +2617,9 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
(void) dbuf_dirty(db, tx); (void) dbuf_dirty(db, tx);
bcopy(buf->b_data, db->db.db_data, db->db.db_size); bcopy(buf->b_data, db->db.db_data, db->db.db_size);
arc_buf_destroy(buf, db); arc_buf_destroy(buf, db);
xuio_stat_wbuf_copied();
return; return;
} }
xuio_stat_wbuf_nocopy();
if (db->db_state == DB_CACHED) { if (db->db_state == DB_CACHED) {
dbuf_dirty_record_t *dr = list_head(&db->db_dirty_records); dbuf_dirty_record_t *dr = list_head(&db->db_dirty_records);

View File

@ -1169,165 +1169,12 @@ dmu_redact(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
dmu_buf_rele_array(dbp, numbufs, FTAG); dmu_buf_rele_array(dbp, numbufs, FTAG);
} }
/*
* DMU support for xuio
*/
kstat_t *xuio_ksp = NULL;
typedef struct xuio_stats {
/* loaned yet not returned arc_buf */
kstat_named_t xuiostat_onloan_rbuf;
kstat_named_t xuiostat_onloan_wbuf;
/* whether a copy is made when loaning out a read buffer */
kstat_named_t xuiostat_rbuf_copied;
kstat_named_t xuiostat_rbuf_nocopy;
/* whether a copy is made when assigning a write buffer */
kstat_named_t xuiostat_wbuf_copied;
kstat_named_t xuiostat_wbuf_nocopy;
} xuio_stats_t;
static xuio_stats_t xuio_stats = {
{ "onloan_read_buf", KSTAT_DATA_UINT64 },
{ "onloan_write_buf", KSTAT_DATA_UINT64 },
{ "read_buf_copied", KSTAT_DATA_UINT64 },
{ "read_buf_nocopy", KSTAT_DATA_UINT64 },
{ "write_buf_copied", KSTAT_DATA_UINT64 },
{ "write_buf_nocopy", KSTAT_DATA_UINT64 }
};
#define XUIOSTAT_INCR(stat, val) \
atomic_add_64(&xuio_stats.stat.value.ui64, (val))
#define XUIOSTAT_BUMP(stat) XUIOSTAT_INCR(stat, 1)
#ifdef HAVE_UIO_ZEROCOPY
int
dmu_xuio_init(xuio_t *xuio, int nblk)
{
dmu_xuio_t *priv;
uio_t *uio = &xuio->xu_uio;
uio->uio_iovcnt = nblk;
uio->uio_iov = kmem_zalloc(nblk * sizeof (iovec_t), KM_SLEEP);
priv = kmem_zalloc(sizeof (dmu_xuio_t), KM_SLEEP);
priv->cnt = nblk;
priv->bufs = kmem_zalloc(nblk * sizeof (arc_buf_t *), KM_SLEEP);
priv->iovp = (iovec_t *)uio->uio_iov;
XUIO_XUZC_PRIV(xuio) = priv;
if (XUIO_XUZC_RW(xuio) == UIO_READ)
XUIOSTAT_INCR(xuiostat_onloan_rbuf, nblk);
else
XUIOSTAT_INCR(xuiostat_onloan_wbuf, nblk);
return (0);
}
void
dmu_xuio_fini(xuio_t *xuio)
{
dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio);
int nblk = priv->cnt;
kmem_free(priv->iovp, nblk * sizeof (iovec_t));
kmem_free(priv->bufs, nblk * sizeof (arc_buf_t *));
kmem_free(priv, sizeof (dmu_xuio_t));
if (XUIO_XUZC_RW(xuio) == UIO_READ)
XUIOSTAT_INCR(xuiostat_onloan_rbuf, -nblk);
else
XUIOSTAT_INCR(xuiostat_onloan_wbuf, -nblk);
}
/*
* Initialize iov[priv->next] and priv->bufs[priv->next] with { off, n, abuf }
* and increase priv->next by 1.
*/
int
dmu_xuio_add(xuio_t *xuio, arc_buf_t *abuf, offset_t off, size_t n)
{
struct iovec *iov;
uio_t *uio = &xuio->xu_uio;
dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio);
int i = priv->next++;
ASSERT(i < priv->cnt);
ASSERT(off + n <= arc_buf_lsize(abuf));
iov = (iovec_t *)uio->uio_iov + i;
iov->iov_base = (char *)abuf->b_data + off;
iov->iov_len = n;
priv->bufs[i] = abuf;
return (0);
}
int
dmu_xuio_cnt(xuio_t *xuio)
{
dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio);
return (priv->cnt);
}
arc_buf_t *
dmu_xuio_arcbuf(xuio_t *xuio, int i)
{
dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio);
ASSERT(i < priv->cnt);
return (priv->bufs[i]);
}
void
dmu_xuio_clear(xuio_t *xuio, int i)
{
dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio);
ASSERT(i < priv->cnt);
priv->bufs[i] = NULL;
}
#endif /* HAVE_UIO_ZEROCOPY */
static void
xuio_stat_init(void)
{
xuio_ksp = kstat_create("zfs", 0, "xuio_stats", "misc",
KSTAT_TYPE_NAMED, sizeof (xuio_stats) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL);
if (xuio_ksp != NULL) {
xuio_ksp->ks_data = &xuio_stats;
kstat_install(xuio_ksp);
}
}
static void
xuio_stat_fini(void)
{
if (xuio_ksp != NULL) {
kstat_delete(xuio_ksp);
xuio_ksp = NULL;
}
}
void
xuio_stat_wbuf_copied(void)
{
XUIOSTAT_BUMP(xuiostat_wbuf_copied);
}
void
xuio_stat_wbuf_nocopy(void)
{
XUIOSTAT_BUMP(xuiostat_wbuf_nocopy);
}
#ifdef _KERNEL #ifdef _KERNEL
int int
dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size) dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
{ {
dmu_buf_t **dbp; dmu_buf_t **dbp;
int numbufs, i, err; int numbufs, i, err;
#ifdef HAVE_UIO_ZEROCOPY
xuio_t *xuio = NULL;
#endif
/* /*
* NB: we could do this block-at-a-time, but it's nice * NB: we could do this block-at-a-time, but it's nice
@ -1348,21 +1195,6 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
bufoff = uio_offset(uio) - db->db_offset; bufoff = uio_offset(uio) - db->db_offset;
tocpy = MIN(db->db_size - bufoff, size); tocpy = MIN(db->db_size - bufoff, size);
#ifdef HAVE_UIO_ZEROCOPY
if (xuio) {
dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
arc_buf_t *dbuf_abuf = dbi->db_buf;
arc_buf_t *abuf = dbuf_loan_arcbuf(dbi);
err = dmu_xuio_add(xuio, abuf, bufoff, tocpy);
if (!err)
uio_advance(uio, tocpy);
if (abuf == dbuf_abuf)
XUIOSTAT_BUMP(xuiostat_rbuf_nocopy);
else
XUIOSTAT_BUMP(xuiostat_rbuf_copied);
} else
#endif
#ifdef __FreeBSD__ #ifdef __FreeBSD__
err = vn_io_fault_uiomove((char *)db->db_data + bufoff, err = vn_io_fault_uiomove((char *)db->db_data + bufoff,
tocpy, uio); tocpy, uio);
@ -1602,7 +1434,6 @@ dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf,
dbuf_rele(db, FTAG); dbuf_rele(db, FTAG);
dmu_write(os, object, offset, blksz, buf->b_data, tx); dmu_write(os, object, offset, blksz, buf->b_data, tx);
dmu_return_arcbuf(buf); dmu_return_arcbuf(buf);
XUIOSTAT_BUMP(xuiostat_wbuf_copied);
} }
return (0); return (0);
@ -2414,7 +2245,6 @@ dmu_init(void)
abd_init(); abd_init();
zfs_dbgmsg_init(); zfs_dbgmsg_init();
sa_cache_init(); sa_cache_init();
xuio_stat_init();
dmu_objset_init(); dmu_objset_init();
dnode_init(); dnode_init();
zfetch_init(); zfetch_init();
@ -2434,7 +2264,6 @@ dmu_fini(void)
dbuf_fini(); dbuf_fini();
dnode_fini(); dnode_fini();
dmu_objset_fini(); dmu_objset_fini();
xuio_stat_fini();
sa_cache_fini(); sa_cache_fini();
zfs_dbgmsg_fini(); zfs_dbgmsg_fini();
abd_fini(); abd_fini();