2017-10-04 16:33:43 +00:00
|
|
|
/*
|
2010-05-17 22:18:00 +00:00
|
|
|
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
|
|
|
|
* Copyright (C) 2007 The Regents of the University of California.
|
2015-07-30 14:19:01 +00:00
|
|
|
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
|
2010-05-17 22:18:00 +00:00
|
|
|
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
|
|
|
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
|
|
|
|
* UCRL-CODE-235197
|
|
|
|
*
|
|
|
|
* This file is part of the SPL, Solaris Porting Layer.
|
|
|
|
*
|
|
|
|
* The SPL is free software; you can redistribute it and/or modify it
|
|
|
|
* under the terms of the GNU General Public License as published by the
|
|
|
|
* Free Software Foundation; either version 2 of the License, or (at your
|
|
|
|
* option) any later version.
|
|
|
|
*
|
|
|
|
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
|
|
* for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
2017-10-04 16:33:43 +00:00
|
|
|
*/
|
2010-05-17 22:18:00 +00:00
|
|
|
|
2008-03-10 17:05:34 +00:00
|
|
|
#ifndef _SPL_UIO_H
|
2018-02-07 19:49:38 +00:00
|
|
|
#define _SPL_UIO_H
|
2008-03-10 17:05:34 +00:00
|
|
|
|
2019-11-27 19:11:03 +00:00
|
|
|
#include <sys/debug.h>
|
2008-03-13 19:49:09 +00:00
|
|
|
#include <linux/uio.h>
|
2015-07-30 14:19:01 +00:00
|
|
|
#include <linux/blkdev.h>
|
2019-11-27 19:11:03 +00:00
|
|
|
#include <linux/blkdev_compat.h>
|
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/bio.h>
|
2008-03-13 19:49:09 +00:00
|
|
|
#include <asm/uaccess.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
|
2022-01-14 10:07:33 +00:00
|
|
|
#if defined(HAVE_VFS_IOV_ITER) && defined(HAVE_FAULT_IN_IOV_ITER_READABLE)
|
|
|
|
#define iov_iter_fault_in_readable(a, b) fault_in_iov_iter_readable(a, b)
|
|
|
|
#endif
|
|
|
|
|
2010-06-11 22:02:24 +00:00
|
|
|
typedef struct iovec iovec_t;
|
|
|
|
|
2021-01-21 05:27:30 +00:00
|
|
|
typedef enum zfs_uio_rw {
|
2018-02-07 19:49:38 +00:00
|
|
|
UIO_READ = 0,
|
|
|
|
UIO_WRITE = 1,
|
2021-01-21 05:27:30 +00:00
|
|
|
} zfs_uio_rw_t;
|
2008-03-10 17:05:34 +00:00
|
|
|
|
2021-01-21 05:27:30 +00:00
|
|
|
typedef enum zfs_uio_seg {
|
2018-02-07 19:49:38 +00:00
|
|
|
UIO_USERSPACE = 0,
|
|
|
|
UIO_SYSSPACE = 1,
|
2020-12-18 16:48:26 +00:00
|
|
|
UIO_BVEC = 2,
|
|
|
|
#if defined(HAVE_VFS_IOV_ITER)
|
|
|
|
UIO_ITER = 3,
|
|
|
|
#endif
|
2021-01-21 05:27:30 +00:00
|
|
|
} zfs_uio_seg_t;
|
2008-03-13 19:49:09 +00:00
|
|
|
|
2021-01-21 05:27:30 +00:00
|
|
|
typedef struct zfs_uio {
|
2015-07-30 14:19:01 +00:00
|
|
|
union {
|
|
|
|
const struct iovec *uio_iov;
|
|
|
|
const struct bio_vec *uio_bvec;
|
2020-12-18 16:48:26 +00:00
|
|
|
#if defined(HAVE_VFS_IOV_ITER)
|
|
|
|
struct iov_iter *uio_iter;
|
|
|
|
#endif
|
2015-07-30 14:19:01 +00:00
|
|
|
};
|
2010-06-11 22:02:24 +00:00
|
|
|
int uio_iovcnt;
|
|
|
|
offset_t uio_loffset;
|
2021-01-21 05:27:30 +00:00
|
|
|
zfs_uio_seg_t uio_segflg;
|
deadlock between mm_sem and tx assign in zfs_write() and page fault
The bug time sequence:
1. thread #1, `zfs_write` assign a txg "n".
2. In a same process, thread #2, mmap page fault (which means the
`mm_sem` is hold) occurred, `zfs_dirty_inode` open a txg failed,
and wait previous txg "n" completed.
3. thread #1 call `uiomove` to write, however page fault is occurred
in `uiomove`, which means it need `mm_sem`, but `mm_sem` is hold by
thread #2, so it stuck and can't complete, then txg "n" will
not complete.
So thread #1 and thread #2 are deadlocked.
Reviewed-by: Chunwei Chen <tuxoko@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Grady Wong <grady.w@xtaotech.com>
Closes #7939
2018-10-16 18:11:24 +00:00
|
|
|
boolean_t uio_fault_disable;
|
2010-06-11 22:02:24 +00:00
|
|
|
uint16_t uio_fmode;
|
|
|
|
uint16_t uio_extflg;
|
|
|
|
ssize_t uio_resid;
|
2022-06-09 14:10:38 +00:00
|
|
|
|
2015-07-30 14:19:01 +00:00
|
|
|
size_t uio_skip;
|
2022-06-09 14:10:38 +00:00
|
|
|
|
|
|
|
struct request *rq;
|
2021-01-21 05:27:30 +00:00
|
|
|
} zfs_uio_t;
|
|
|
|
|
2022-06-09 14:10:38 +00:00
|
|
|
|
2021-01-21 05:27:30 +00:00
|
|
|
#define zfs_uio_segflg(u) (u)->uio_segflg
|
|
|
|
#define zfs_uio_offset(u) (u)->uio_loffset
|
|
|
|
#define zfs_uio_resid(u) (u)->uio_resid
|
|
|
|
#define zfs_uio_iovcnt(u) (u)->uio_iovcnt
|
|
|
|
#define zfs_uio_iovlen(u, idx) (u)->uio_iov[(idx)].iov_len
|
|
|
|
#define zfs_uio_iovbase(u, idx) (u)->uio_iov[(idx)].iov_base
|
|
|
|
#define zfs_uio_fault_disable(u, set) (u)->uio_fault_disable = set
|
|
|
|
#define zfs_uio_rlimit_fsize(z, u) (0)
|
|
|
|
#define zfs_uio_fault_move(p, n, rw, u) zfs_uiomove((p), (n), (rw), (u))
|
2008-03-13 19:49:09 +00:00
|
|
|
|
2021-02-21 04:16:50 +00:00
|
|
|
extern int zfs_uio_prefaultpages(ssize_t, zfs_uio_t *);
|
|
|
|
|
2021-01-21 05:27:30 +00:00
|
|
|
static inline void
|
|
|
|
zfs_uio_setoffset(zfs_uio_t *uio, offset_t off)
|
|
|
|
{
|
|
|
|
uio->uio_loffset = off;
|
|
|
|
}
|
2020-06-14 17:09:55 +00:00
|
|
|
|
|
|
|
static inline void
|
dmu: Allow buffer fills to fail
When ZFS overwrites a whole block, it does not bother to read the
old content from disk. It is a good optimization, but if the buffer
fill fails due to page fault or something else, the buffer ends up
corrupted, neither keeping old content, nor getting the new one.
On FreeBSD this is additionally complicated by page faults being
blocked by VFS layer, always returning EFAULT on attempt to write
from mmap()'ed but not yet cached address range. Normally it is
not a big problem, since after original failure VFS will retry the
write after reading the required data. The problem becomes worse
in specific case when somebody tries to write into a file its own
mmap()'ed content from the same location. In that situation the
only copy of the data is getting corrupted on the page fault and
the following retries only fixate the status quo. Block cloning
makes this issue easier to reproduce, since it does not read the
old data, unlike traditional file copy, that may work by chance.
This patch provides the fill status to dmu_buf_fill_done(), that
in case of error can destroy the corrupted buffer as if no write
happened. One more complication in case of block cloning is that
if error is possible during fill, dmu_buf_will_fill() must read
the data via fall-back to dmu_buf_will_dirty(). It is required
to allow in case of error restoring the buffer to a state after
the cloning, not not before it, that would happen if we just call
dbuf_undirty().
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Rob Norris <robn@despairlabs.com>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored by: iXsystems, Inc.
Closes #15665
2023-12-15 17:51:41 +00:00
|
|
|
zfs_uio_advance(zfs_uio_t *uio, ssize_t size)
|
2020-06-14 17:09:55 +00:00
|
|
|
{
|
|
|
|
uio->uio_resid -= size;
|
|
|
|
uio->uio_loffset += size;
|
|
|
|
}
|
|
|
|
|
2020-12-18 16:48:26 +00:00
|
|
|
static inline void
|
2021-01-21 05:27:30 +00:00
|
|
|
zfs_uio_iovec_init(zfs_uio_t *uio, const struct iovec *iov,
|
|
|
|
unsigned long nr_segs, offset_t offset, zfs_uio_seg_t seg, ssize_t resid,
|
|
|
|
size_t skip)
|
2020-12-18 16:48:26 +00:00
|
|
|
{
|
|
|
|
ASSERT(seg == UIO_USERSPACE || seg == UIO_SYSSPACE);
|
|
|
|
|
|
|
|
uio->uio_iov = iov;
|
|
|
|
uio->uio_iovcnt = nr_segs;
|
|
|
|
uio->uio_loffset = offset;
|
|
|
|
uio->uio_segflg = seg;
|
|
|
|
uio->uio_fault_disable = B_FALSE;
|
|
|
|
uio->uio_fmode = 0;
|
|
|
|
uio->uio_extflg = 0;
|
|
|
|
uio->uio_resid = resid;
|
|
|
|
uio->uio_skip = skip;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
2022-06-09 14:10:38 +00:00
|
|
|
zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq)
|
2020-12-18 16:48:26 +00:00
|
|
|
{
|
2022-06-09 14:10:38 +00:00
|
|
|
/* Either bio or rq will be set, but not both */
|
|
|
|
ASSERT3P(uio, !=, bio);
|
|
|
|
|
|
|
|
if (bio) {
|
|
|
|
uio->uio_iovcnt = bio->bi_vcnt - BIO_BI_IDX(bio);
|
|
|
|
uio->uio_bvec = &bio->bi_io_vec[BIO_BI_IDX(bio)];
|
|
|
|
} else {
|
|
|
|
uio->uio_bvec = NULL;
|
|
|
|
uio->uio_iovcnt = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
uio->uio_loffset = io_offset(bio, rq);
|
2020-12-18 16:48:26 +00:00
|
|
|
uio->uio_segflg = UIO_BVEC;
|
|
|
|
uio->uio_fault_disable = B_FALSE;
|
|
|
|
uio->uio_fmode = 0;
|
|
|
|
uio->uio_extflg = 0;
|
2022-06-09 14:10:38 +00:00
|
|
|
uio->uio_resid = io_size(bio, rq);
|
|
|
|
if (bio) {
|
|
|
|
uio->uio_skip = BIO_BI_SKIP(bio);
|
|
|
|
} else {
|
|
|
|
uio->uio_skip = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
uio->rq = rq;
|
2020-12-18 16:48:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#if defined(HAVE_VFS_IOV_ITER)
|
|
|
|
static inline void
|
2021-01-21 05:27:30 +00:00
|
|
|
zfs_uio_iov_iter_init(zfs_uio_t *uio, struct iov_iter *iter, offset_t offset,
|
2020-12-18 16:48:26 +00:00
|
|
|
ssize_t resid, size_t skip)
|
|
|
|
{
|
|
|
|
uio->uio_iter = iter;
|
|
|
|
uio->uio_iovcnt = iter->nr_segs;
|
|
|
|
uio->uio_loffset = offset;
|
|
|
|
uio->uio_segflg = UIO_ITER;
|
|
|
|
uio->uio_fault_disable = B_FALSE;
|
|
|
|
uio->uio_fmode = 0;
|
|
|
|
uio->uio_extflg = 0;
|
|
|
|
uio->uio_resid = resid;
|
|
|
|
uio->uio_skip = skip;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2023-07-23 05:34:29 +00:00
|
|
|
#if defined(HAVE_ITER_IOV)
|
|
|
|
#define zfs_uio_iter_iov(iter) iter_iov((iter))
|
|
|
|
#else
|
|
|
|
#define zfs_uio_iter_iov(iter) (iter)->iov
|
|
|
|
#endif
|
|
|
|
|
2023-07-30 19:23:47 +00:00
|
|
|
#if defined(HAVE_IOV_ITER_TYPE)
|
|
|
|
#define zfs_uio_iov_iter_type(iter) iov_iter_type((iter))
|
|
|
|
#else
|
|
|
|
#define zfs_uio_iov_iter_type(iter) (iter)->type
|
|
|
|
#endif
|
|
|
|
|
2008-03-10 17:05:34 +00:00
|
|
|
#endif /* SPL_UIO_H */
|