Updating based on PR Feedback(1)
Updating code based on PR code comments. I adjusted the following parts of code based on comments: 1. Revert dbuf_undirty() to original logic and got rid of uncessary code change. 2. Cleanup in abd_impl.h 3. Cleanup in abd.h 4. Got rid of duplicate declaration of dmu_buf_hold_noread() in dmu.h. 5. Cleaned up comment for db_mtx in dmu_imp.h 6. Updated zfsprop man page to state correct ZFS version 7. Updated to correct cast in zfs_uio_page_aligned() calls to use uintptr_t. 8. Cleaned up comment in FreeBSD uio code. 9. Removed unnecessary format changes in comments in Linux abd code. 10. Updated ZFS VFS hook for direct_IO to use PANIC(). 11. Updated comment above dbuf_undirty to use double space again. 12. Converted module paramter zfs_vdev_direct_write_verify_pct OS indedepent and in doing so this removed the uneccessary check for bounds. 13. Updated to casting in zfs_dio_page_aligned to uniptr_t and added kernel guard. 14. Updated zfs_dio_size_aligned() to use modulo math because dn->dn_datablksz is not required to be a power of 2. 15. Removed abd scatter stats update calls from all ABD_FLAG_FROM_PAGES. 16. Updated check in abd_alloc_from_pages() for the linear page. This way a single page that is even 4K can represented as an ABD_FLAG_LINEAR_PAGE. 17. Fixing types for UIO code. In FreeBSD the vm code expects and returns int's for values. In linux the interfaces return long value in get_user_pages_unlocked() and rest of the IOV interfaces return int's. Stuck with the worse case and used long for npages in Linux. Updated the uio npage struct to correspond to the correct types and that type checking is consistent in the UIO code. 18. Updated comments about what zfs_uio_get_pages_alloc() is doing. 19. Updated error handeling in zfs_uio_get_dio_pages_alloc() for Linux. Signed-off-by: Brian Atkinson <batkinson@lanl.gov>
This commit is contained in:
parent
7c8b7fe0f3
commit
506bc54006
|
@ -100,9 +100,6 @@
|
||||||
#define spa_taskq_write_param_set_args(var) \
|
#define spa_taskq_write_param_set_args(var) \
|
||||||
CTLTYPE_STRING, NULL, 0, spa_taskq_write_param, "A"
|
CTLTYPE_STRING, NULL, 0, spa_taskq_write_param, "A"
|
||||||
|
|
||||||
#define param_set_direct_write_verify_pct_args(var) \
|
|
||||||
CTLTYPE_UINT, NULL, 0, param_set_direct_write_verify_pct, "IU"
|
|
||||||
|
|
||||||
#define fletcher_4_param_set_args(var) \
|
#define fletcher_4_param_set_args(var) \
|
||||||
CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A"
|
CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A"
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,9 @@ struct abd_scatter {
|
||||||
|
|
||||||
struct abd_linear {
|
struct abd_linear {
|
||||||
void *abd_buf;
|
void *abd_buf;
|
||||||
|
#if defined(_KERNEL)
|
||||||
struct sf_buf *sf; /* for LINEAR_PAGE FreeBSD */
|
struct sf_buf *sf; /* for LINEAR_PAGE FreeBSD */
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
__attribute__((malloc))
|
__attribute__((malloc))
|
||||||
|
|
|
@ -65,7 +65,7 @@ typedef enum zfs_uio_seg {
|
||||||
*/
|
*/
|
||||||
typedef struct {
|
typedef struct {
|
||||||
struct page **pages; /* Mapped pages */
|
struct page **pages; /* Mapped pages */
|
||||||
int npages; /* Number of mapped pages */
|
long npages; /* Number of mapped pages */
|
||||||
} zfs_uio_dio_t;
|
} zfs_uio_dio_t;
|
||||||
|
|
||||||
typedef struct zfs_uio {
|
typedef struct zfs_uio {
|
||||||
|
|
|
@ -73,9 +73,10 @@ struct abd_iter {
|
||||||
size_t iter_pos;
|
size_t iter_pos;
|
||||||
size_t iter_offset; /* offset in current sg/abd_buf, */
|
size_t iter_offset; /* offset in current sg/abd_buf, */
|
||||||
/* abd_offset included */
|
/* abd_offset included */
|
||||||
struct scatterlist *iter_sg; /* current sg */
|
|
||||||
#if defined(__FreeBSD__) && defined(_KERNEL)
|
#if defined(__FreeBSD__) && defined(_KERNEL)
|
||||||
struct sf_buf *sf; /* used to map in vm_page_t FreeBSD */
|
struct sf_buf *sf; /* used to map in vm_page_t FreeBSD */
|
||||||
|
#else
|
||||||
|
struct scatterlist *iter_sg; /* current sg */
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -576,8 +576,6 @@ int dmu_spill_hold_existing(dmu_buf_t *bonus, const void *tag, dmu_buf_t **dbp);
|
||||||
*
|
*
|
||||||
* The object number must be a valid, allocated object number.
|
* The object number must be a valid, allocated object number.
|
||||||
*/
|
*/
|
||||||
int dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
|
|
||||||
const void *tag, dmu_buf_t **dbp);
|
|
||||||
int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
|
int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
const void *tag, dmu_buf_t **, int flags);
|
const void *tag, dmu_buf_t **, int flags);
|
||||||
int dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
|
int dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
|
|
|
@ -138,7 +138,7 @@ extern "C" {
|
||||||
* db_data_pending
|
* db_data_pending
|
||||||
* db_dirtied
|
* db_dirtied
|
||||||
* db_link
|
* db_link
|
||||||
* dbuf_dirty_records
|
* db_dirty_records
|
||||||
* db_dirtycnt
|
* db_dirtycnt
|
||||||
* db_d.*
|
* db_d.*
|
||||||
* db.*
|
* db.*
|
||||||
|
|
|
@ -49,12 +49,14 @@ extern void zfs_uio_free_dio_pages(zfs_uio_t *, zfs_uio_rw_t);
|
||||||
extern int zfs_uio_get_dio_pages_alloc(zfs_uio_t *, zfs_uio_rw_t);
|
extern int zfs_uio_get_dio_pages_alloc(zfs_uio_t *, zfs_uio_rw_t);
|
||||||
extern boolean_t zfs_uio_page_aligned(zfs_uio_t *);
|
extern boolean_t zfs_uio_page_aligned(zfs_uio_t *);
|
||||||
|
|
||||||
|
#ifdef _KERNEL
|
||||||
static inline boolean_t
|
static inline boolean_t
|
||||||
zfs_dio_page_aligned(void *buf)
|
zfs_dio_page_aligned(void *buf)
|
||||||
{
|
{
|
||||||
return ((((unsigned long)(buf) & (PAGESIZE - 1)) == 0) ?
|
return ((((uintptr_t)(buf) & (PAGESIZE - 1)) == 0) ?
|
||||||
B_TRUE : B_FALSE);
|
B_TRUE : B_FALSE);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static inline boolean_t
|
static inline boolean_t
|
||||||
zfs_dio_offset_aligned(uint64_t offset, uint64_t blksz)
|
zfs_dio_offset_aligned(uint64_t offset, uint64_t blksz)
|
||||||
|
@ -65,7 +67,7 @@ zfs_dio_offset_aligned(uint64_t offset, uint64_t blksz)
|
||||||
static inline boolean_t
|
static inline boolean_t
|
||||||
zfs_dio_size_aligned(uint64_t size, uint64_t blksz)
|
zfs_dio_size_aligned(uint64_t size, uint64_t blksz)
|
||||||
{
|
{
|
||||||
return (IS_P2ALIGNED(size, blksz));
|
return ((size % blksz) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline boolean_t
|
static inline boolean_t
|
||||||
|
|
|
@ -658,7 +658,6 @@ int param_set_max_auto_ashift(ZFS_MODULE_PARAM_ARGS);
|
||||||
* VDEV checksum verification precentage for Direct I/O writes
|
* VDEV checksum verification precentage for Direct I/O writes
|
||||||
*/
|
*/
|
||||||
extern uint_t zfs_vdev_direct_write_verify_pct;
|
extern uint_t zfs_vdev_direct_write_verify_pct;
|
||||||
int param_set_direct_write_verify_pct(ZFS_MODULE_PARAM_ARGS);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -98,7 +98,7 @@ zfs_dio_offset_aligned(uint64_t offset, uint64_t blksz)
|
||||||
static inline boolean_t
|
static inline boolean_t
|
||||||
zfs_dio_size_aligned(uint64_t size, uint64_t blksz)
|
zfs_dio_size_aligned(uint64_t size, uint64_t blksz)
|
||||||
{
|
{
|
||||||
return (IS_P2ALIGNED(size, blksz));
|
return ((size % blksz) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline boolean_t
|
static inline boolean_t
|
||||||
|
|
|
@ -1054,7 +1054,7 @@ causes every properly aligned read or write to be treated as a direct request.
|
||||||
.Sy disabled
|
.Sy disabled
|
||||||
causes the O_DIRECT flag to be silently ignored and all direct requests will
|
causes the O_DIRECT flag to be silently ignored and all direct requests will
|
||||||
be handled by the ARC.
|
be handled by the ARC.
|
||||||
This is the default behavior for OpenZFS 2.1 and prior releases.
|
This is the default behavior for OpenZFS 2.2 and prior releases.
|
||||||
.Pp
|
.Pp
|
||||||
Bypassing the ARC requires that a direct request be correctly aligned.
|
Bypassing the ARC requires that a direct request be correctly aligned.
|
||||||
For write requests the starting offset and size of the request must be
|
For write requests the starting offset and size of the request must be
|
||||||
|
|
|
@ -446,7 +446,6 @@ ZFS_OBJS_OS := \
|
||||||
vdev_disk.o \
|
vdev_disk.o \
|
||||||
vdev_file.o \
|
vdev_file.o \
|
||||||
vdev_label_os.o \
|
vdev_label_os.o \
|
||||||
vdev_os.o \
|
|
||||||
zfs_acl.o \
|
zfs_acl.o \
|
||||||
zfs_ctldir.o \
|
zfs_ctldir.o \
|
||||||
zfs_debug.o \
|
zfs_debug.o \
|
||||||
|
|
|
@ -129,7 +129,7 @@ zfs_uio_page_aligned(zfs_uio_t *uio)
|
||||||
const struct iovec *iov = GET_UIO_STRUCT(uio)->uio_iov;
|
const struct iovec *iov = GET_UIO_STRUCT(uio)->uio_iov;
|
||||||
|
|
||||||
for (int i = zfs_uio_iovcnt(uio); i > 0; iov++, i--) {
|
for (int i = zfs_uio_iovcnt(uio); i > 0; iov++, i--) {
|
||||||
unsigned long addr = (unsigned long)iov->iov_base;
|
uintptr_t addr = (uintptr_t)iov->iov_base;
|
||||||
size_t size = iov->iov_len;
|
size_t size = iov->iov_len;
|
||||||
if ((addr & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
|
if ((addr & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
|
||||||
return (B_FALSE);
|
return (B_FALSE);
|
||||||
|
@ -143,7 +143,7 @@ static void
|
||||||
zfs_uio_set_pages_to_stable(zfs_uio_t *uio)
|
zfs_uio_set_pages_to_stable(zfs_uio_t *uio)
|
||||||
{
|
{
|
||||||
ASSERT3P(uio->uio_dio.pages, !=, NULL);
|
ASSERT3P(uio->uio_dio.pages, !=, NULL);
|
||||||
ASSERT3U(uio->uio_dio.npages, >, 0);
|
ASSERT3S(uio->uio_dio.npages, >, 0);
|
||||||
|
|
||||||
for (int i = 0; i < uio->uio_dio.npages; i++) {
|
for (int i = 0; i < uio->uio_dio.npages; i++) {
|
||||||
vm_page_t page = uio->uio_dio.pages[i];
|
vm_page_t page = uio->uio_dio.pages[i];
|
||||||
|
@ -172,7 +172,7 @@ zfs_uio_release_stable_pages(zfs_uio_t *uio)
|
||||||
* written to and must be given write access.
|
* written to and must be given write access.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
zfs_uio_hold_pages(unsigned long start, size_t len, unsigned long nr_pages,
|
zfs_uio_hold_pages(unsigned long start, size_t len, int nr_pages,
|
||||||
zfs_uio_rw_t rw, vm_page_t *pages)
|
zfs_uio_rw_t rw, vm_page_t *pages)
|
||||||
{
|
{
|
||||||
vm_map_t map;
|
vm_map_t map;
|
||||||
|
@ -206,8 +206,8 @@ zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||||
uio->uio_dio.npages * sizeof (vm_page_t));
|
uio->uio_dio.npages * sizeof (vm_page_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
static long
|
static int
|
||||||
zfs_uio_get_user_pages(unsigned long start, unsigned long nr_pages,
|
zfs_uio_get_user_pages(unsigned long start, int nr_pages,
|
||||||
size_t len, zfs_uio_rw_t rw, vm_page_t *pages)
|
size_t len, zfs_uio_rw_t rw, vm_page_t *pages)
|
||||||
{
|
{
|
||||||
int count;
|
int count;
|
||||||
|
@ -220,12 +220,12 @@ zfs_uio_get_user_pages(unsigned long start, unsigned long nr_pages,
|
||||||
return (count);
|
return (count);
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT3U(count, ==, nr_pages);
|
ASSERT3S(count, ==, nr_pages);
|
||||||
|
|
||||||
return (count);
|
return (count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t
|
static int
|
||||||
zfs_uio_iov_step(struct iovec v, zfs_uio_t *uio, int *numpages)
|
zfs_uio_iov_step(struct iovec v, zfs_uio_t *uio, int *numpages)
|
||||||
{
|
{
|
||||||
unsigned long addr = (unsigned long)(v.iov_base);
|
unsigned long addr = (unsigned long)(v.iov_base);
|
||||||
|
@ -235,14 +235,13 @@ zfs_uio_iov_step(struct iovec v, zfs_uio_t *uio, int *numpages)
|
||||||
int res = zfs_uio_get_user_pages(
|
int res = zfs_uio_get_user_pages(
|
||||||
P2ALIGN_TYPED(addr, PAGE_SIZE, unsigned long), n, len,
|
P2ALIGN_TYPED(addr, PAGE_SIZE, unsigned long), n, len,
|
||||||
zfs_uio_rw(uio), &uio->uio_dio.pages[uio->uio_dio.npages]);
|
zfs_uio_rw(uio), &uio->uio_dio.pages[uio->uio_dio.npages]);
|
||||||
if (res != n) {
|
|
||||||
*numpages = -1;
|
|
||||||
return (SET_ERROR(EFAULT));
|
|
||||||
}
|
|
||||||
|
|
||||||
ASSERT3S(len, ==, res * PAGE_SIZE);
|
if (res != n)
|
||||||
|
return (SET_ERROR(EFAULT));
|
||||||
|
|
||||||
|
ASSERT3U(len, ==, res * PAGE_SIZE);
|
||||||
*numpages = res;
|
*numpages = res;
|
||||||
return (len);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -264,12 +263,11 @@ zfs_uio_get_dio_pages_impl(zfs_uio_t *uio)
|
||||||
}
|
}
|
||||||
iov.iov_len = MIN(maxsize, iovp->iov_len);
|
iov.iov_len = MIN(maxsize, iovp->iov_len);
|
||||||
iov.iov_base = iovp->iov_base;
|
iov.iov_base = iovp->iov_base;
|
||||||
size_t left = zfs_uio_iov_step(iov, uio, &numpages);
|
int error = zfs_uio_iov_step(iov, uio, &numpages);
|
||||||
|
|
||||||
if (numpages == -1)
|
if (error)
|
||||||
return (left);
|
return (error);
|
||||||
|
|
||||||
ASSERT3U(left, ==, iov.iov_len);
|
|
||||||
uio->uio_dio.npages += numpages;
|
uio->uio_dio.npages += numpages;
|
||||||
maxsize -= iov.iov_len;
|
maxsize -= iov.iov_len;
|
||||||
wanted -= left;
|
wanted -= left;
|
||||||
|
@ -282,8 +280,8 @@ zfs_uio_get_dio_pages_impl(zfs_uio_t *uio)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function maps user pages into the kernel. In the event that the user
|
* This function holds user pages into the kernel. In the event that the user
|
||||||
* pages were not mapped successfully an error value is reutrned.
|
* pages are not successfully held an error value is returned.
|
||||||
*
|
*
|
||||||
* On success, 0 is returned.
|
* On success, 0 is returned.
|
||||||
*/
|
*/
|
||||||
|
@ -291,7 +289,7 @@ int
|
||||||
zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||||
{
|
{
|
||||||
int error = 0;
|
int error = 0;
|
||||||
size_t npages = DIV_ROUND_UP(zfs_uio_resid(uio), PAGE_SIZE);
|
int npages = DIV_ROUND_UP(zfs_uio_resid(uio), PAGE_SIZE);
|
||||||
size_t size = npages * sizeof (vm_page_t);
|
size_t size = npages * sizeof (vm_page_t);
|
||||||
|
|
||||||
ASSERT(zfs_uio_rw(uio) == rw);
|
ASSERT(zfs_uio_rw(uio) == rw);
|
||||||
|
@ -305,6 +303,8 @@ zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ASSERT3S(uio->uio_dio.npages, >, 0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since we will be writing the user pages we must make sure that
|
* Since we will be writing the user pages we must make sure that
|
||||||
* they are stable. That way the contents of the pages can not change
|
* they are stable. That way the contents of the pages can not change
|
||||||
|
|
|
@ -138,15 +138,9 @@ abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op)
|
||||||
{
|
{
|
||||||
uint_t n;
|
uint_t n;
|
||||||
|
|
||||||
if (abd_is_from_pages(abd))
|
|
||||||
n = abd_chunkcnt_for_bytes(abd->abd_size);
|
|
||||||
else
|
|
||||||
n = abd_scatter_chunkcnt(abd);
|
n = abd_scatter_chunkcnt(abd);
|
||||||
ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
|
ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
|
||||||
int waste = (n << PAGE_SHIFT) - abd->abd_size;
|
int waste = (n << PAGE_SHIFT) - abd->abd_size;
|
||||||
ASSERT3U(n, >, 0);
|
|
||||||
ASSERT3S(waste, >=, 0);
|
|
||||||
IMPLY(abd_is_linear_page(abd), waste < PAGE_SIZE);
|
|
||||||
if (op == ABDSTAT_INCR) {
|
if (op == ABDSTAT_INCR) {
|
||||||
ABDSTAT_BUMP(abdstat_scatter_cnt);
|
ABDSTAT_BUMP(abdstat_scatter_cnt);
|
||||||
ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size);
|
ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size);
|
||||||
|
@ -458,14 +452,13 @@ abd_alloc_from_pages(vm_page_t *pages, unsigned long offset, uint64_t size)
|
||||||
abd->abd_flags |= ABD_FLAG_OWNER | ABD_FLAG_FROM_PAGES;
|
abd->abd_flags |= ABD_FLAG_OWNER | ABD_FLAG_FROM_PAGES;
|
||||||
abd->abd_size = size;
|
abd->abd_size = size;
|
||||||
|
|
||||||
if (size < PAGE_SIZE) {
|
if ((offset + size) <= PAGE_SIZE) {
|
||||||
/*
|
/*
|
||||||
* We do not have a full page so we will just use a linear ABD.
|
* There is only a single page worth of data, so we will just
|
||||||
* We have to make sure to take into account the offset though.
|
* use a linear ABD. We have to make sure to take into account
|
||||||
* In all other cases our offset will be 0 as we are always
|
* the offset though. In all other cases our offset will be 0
|
||||||
* PAGE_SIZE aligned.
|
* as we are always PAGE_SIZE aligned.
|
||||||
*/
|
*/
|
||||||
ASSERT3U(offset + size, <=, PAGE_SIZE);
|
|
||||||
abd->abd_flags |= ABD_FLAG_LINEAR | ABD_FLAG_LINEAR_PAGE;
|
abd->abd_flags |= ABD_FLAG_LINEAR | ABD_FLAG_LINEAR_PAGE;
|
||||||
ABD_LINEAR_BUF(abd) = (char *)zfs_map_page(pages[0],
|
ABD_LINEAR_BUF(abd) = (char *)zfs_map_page(pages[0],
|
||||||
&abd->abd_u.abd_linear.sf) + offset;
|
&abd->abd_u.abd_linear.sf) + offset;
|
||||||
|
@ -480,8 +473,6 @@ abd_alloc_from_pages(vm_page_t *pages, unsigned long offset, uint64_t size)
|
||||||
ABD_SCATTER(abd).abd_chunks[i] = pages[i];
|
ABD_SCATTER(abd).abd_chunks[i] = pages[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
abd_update_scatter_stats(abd, ABDSTAT_INCR);
|
|
||||||
|
|
||||||
return (abd);
|
return (abd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -831,35 +831,6 @@ SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift,
|
||||||
" new top-level vdevs. (LEGACY)");
|
" new top-level vdevs. (LEGACY)");
|
||||||
/* END CSTYLED */
|
/* END CSTYLED */
|
||||||
|
|
||||||
int
|
|
||||||
param_set_direct_write_verify_pct(SYSCTL_HANDLER_ARGS)
|
|
||||||
{
|
|
||||||
int val;
|
|
||||||
int err;
|
|
||||||
|
|
||||||
val = zfs_vdev_direct_write_verify_pct;
|
|
||||||
err = sysctl_handle_int(oidp, &val, 0, req);
|
|
||||||
if (err != 0 || req->newptr == NULL)
|
|
||||||
return (SET_ERROR(err));
|
|
||||||
|
|
||||||
if (val > 100 || val < 0)
|
|
||||||
return (SET_ERROR(EINVAL));
|
|
||||||
|
|
||||||
zfs_vdev_direct_write_verify_pct = val;
|
|
||||||
|
|
||||||
return (0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* BEGIN CSTYLED */
|
|
||||||
SYSCTL_PROC(_vfs_zfs, OID_AUTO, direct_write_verify_pct,
|
|
||||||
CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
|
|
||||||
&zfs_vdev_direct_write_verify_pct,
|
|
||||||
sizeof (zfs_vdev_direct_write_verify_pct),
|
|
||||||
param_set_direct_write_verify_pct, "IU",
|
|
||||||
"Percentage of Direct I/O writes per top-level VDEV for checksum"
|
|
||||||
" verification to be performed");
|
|
||||||
/* END CSTYLED */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since the DTL space map of a vdev is not expected to have a lot of
|
* Since the DTL space map of a vdev is not expected to have a lot of
|
||||||
* entries, we default its block size to 4K.
|
* entries, we default its block size to 4K.
|
||||||
|
|
|
@ -708,8 +708,6 @@ abd_free_linear_page(abd_t *abd)
|
||||||
ABD_SCATTER(abd).abd_offset = 0;
|
ABD_SCATTER(abd).abd_offset = 0;
|
||||||
ABD_SCATTER(abd).abd_sgl = sg;
|
ABD_SCATTER(abd).abd_sgl = sg;
|
||||||
abd_free_chunks(abd);
|
abd_free_chunks(abd);
|
||||||
|
|
||||||
abd_update_scatter_stats(abd, ABDSTAT_DECR);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -742,7 +740,7 @@ abd_alloc_from_pages(struct page **pages, unsigned long offset, uint64_t size)
|
||||||
schedule_timeout_interruptible(1);
|
schedule_timeout_interruptible(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (size < PAGE_SIZE) {
|
if ((offset + size) <= PAGE_SIZE) {
|
||||||
/*
|
/*
|
||||||
* Since there is only one entry, this ABD can be represented
|
* Since there is only one entry, this ABD can be represented
|
||||||
* as a linear buffer. All single-page (4K) ABD's constructed
|
* as a linear buffer. All single-page (4K) ABD's constructed
|
||||||
|
@ -754,7 +752,6 @@ abd_alloc_from_pages(struct page **pages, unsigned long offset, uint64_t size)
|
||||||
* the mapping needs to bet set up on all CPUs. Using kmap()
|
* the mapping needs to bet set up on all CPUs. Using kmap()
|
||||||
* also enables the user of highmem pages when required.
|
* also enables the user of highmem pages when required.
|
||||||
*/
|
*/
|
||||||
ASSERT3U(offset + size, <=, PAGE_SIZE);
|
|
||||||
abd->abd_flags |= ABD_FLAG_LINEAR | ABD_FLAG_LINEAR_PAGE;
|
abd->abd_flags |= ABD_FLAG_LINEAR | ABD_FLAG_LINEAR_PAGE;
|
||||||
abd->abd_u.abd_linear.abd_sgl = table.sgl;
|
abd->abd_u.abd_linear.abd_sgl = table.sgl;
|
||||||
zfs_kmap(sg_page(table.sgl));
|
zfs_kmap(sg_page(table.sgl));
|
||||||
|
@ -770,8 +767,6 @@ abd_alloc_from_pages(struct page **pages, unsigned long offset, uint64_t size)
|
||||||
ASSERT0(ABD_SCATTER(abd).abd_offset);
|
ASSERT0(ABD_SCATTER(abd).abd_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
abd_update_scatter_stats(abd, ABDSTAT_INCR);
|
|
||||||
|
|
||||||
return (abd);
|
return (abd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,49 +0,0 @@
|
||||||
/*
|
|
||||||
* CDDL HEADER START
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the terms of the
|
|
||||||
* Common Development and Distribution License (the "License").
|
|
||||||
* You may not use this file except in compliance with the License.
|
|
||||||
*
|
|
||||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
||||||
* or https://opensource.org/licenses/CDDL-1.0.
|
|
||||||
* See the License for the specific language governing permissions
|
|
||||||
* and limitations under the License.
|
|
||||||
*
|
|
||||||
* When distributing Covered Code, include this CDDL HEADER in each
|
|
||||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
||||||
* If applicable, add the following below this CDDL HEADER, with the
|
|
||||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
||||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
||||||
*
|
|
||||||
* CDDL HEADER END
|
|
||||||
*/
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2022 by Triad National Security, LLC.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <sys/vdev_impl.h>
|
|
||||||
|
|
||||||
#ifdef _KERNEL
|
|
||||||
|
|
||||||
int
|
|
||||||
param_set_direct_write_verify_pct(const char *buf, zfs_kernel_param_t *kp)
|
|
||||||
{
|
|
||||||
uint_t val;
|
|
||||||
int error;
|
|
||||||
|
|
||||||
error = kstrtouint(buf, 0, &val);
|
|
||||||
if (error < 0)
|
|
||||||
return (SET_ERROR(error));
|
|
||||||
|
|
||||||
if (val > 100)
|
|
||||||
return (SET_ERROR(-EINVAL));
|
|
||||||
|
|
||||||
error = param_set_uint(buf, kp);
|
|
||||||
if (error < 0)
|
|
||||||
return (SET_ERROR(error));
|
|
||||||
|
|
||||||
return (0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* _KERNEL */
|
|
|
@ -469,8 +469,7 @@ zfs_uio_page_aligned(zfs_uio_t *uio)
|
||||||
size_t skip = uio->uio_skip;
|
size_t skip = uio->uio_skip;
|
||||||
|
|
||||||
for (int i = uio->uio_iovcnt; i > 0; iov++, i--) {
|
for (int i = uio->uio_iovcnt; i > 0; iov++, i--) {
|
||||||
unsigned long addr =
|
uintptr_t addr = (uintptr_t)(iov->iov_base + skip);
|
||||||
(unsigned long)(iov->iov_base + skip);
|
|
||||||
size_t size = iov->iov_len - skip;
|
size_t size = iov->iov_len - skip;
|
||||||
if ((addr & (PAGE_SIZE - 1)) ||
|
if ((addr & (PAGE_SIZE - 1)) ||
|
||||||
(size & (PAGE_SIZE - 1))) {
|
(size & (PAGE_SIZE - 1))) {
|
||||||
|
@ -535,7 +534,7 @@ zfs_uio_dio_check_for_zero_page(zfs_uio_t *uio)
|
||||||
{
|
{
|
||||||
ASSERT3P(uio->uio_dio.pages, !=, NULL);
|
ASSERT3P(uio->uio_dio.pages, !=, NULL);
|
||||||
|
|
||||||
for (int i = 0; i < uio->uio_dio.npages; i++) {
|
for (long i = 0; i < uio->uio_dio.npages; i++) {
|
||||||
struct page *p = uio->uio_dio.pages[i];
|
struct page *p = uio->uio_dio.pages[i];
|
||||||
lock_page(p);
|
lock_page(p);
|
||||||
|
|
||||||
|
@ -568,7 +567,7 @@ zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||||
ASSERT(uio->uio_extflg & UIO_DIRECT);
|
ASSERT(uio->uio_extflg & UIO_DIRECT);
|
||||||
ASSERT3P(uio->uio_dio.pages, !=, NULL);
|
ASSERT3P(uio->uio_dio.pages, !=, NULL);
|
||||||
|
|
||||||
for (int i = 0; i < uio->uio_dio.npages; i++) {
|
for (long i = 0; i < uio->uio_dio.npages; i++) {
|
||||||
struct page *p = uio->uio_dio.pages[i];
|
struct page *p = uio->uio_dio.pages[i];
|
||||||
|
|
||||||
if (IS_ZFS_MARKED_PAGE(p)) {
|
if (IS_ZFS_MARKED_PAGE(p)) {
|
||||||
|
@ -588,27 +587,26 @@ zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||||
* zfs_uio_iov_step() is just a modified version of the STEP function of Linux's
|
* zfs_uio_iov_step() is just a modified version of the STEP function of Linux's
|
||||||
* iov_iter_get_pages().
|
* iov_iter_get_pages().
|
||||||
*/
|
*/
|
||||||
static size_t
|
static int
|
||||||
zfs_uio_iov_step(struct iovec v, zfs_uio_rw_t rw, zfs_uio_t *uio, int *numpages)
|
zfs_uio_iov_step(struct iovec v, zfs_uio_rw_t rw, zfs_uio_t *uio,
|
||||||
|
long *numpages)
|
||||||
{
|
{
|
||||||
unsigned long addr = (unsigned long)(v.iov_base);
|
unsigned long addr = (unsigned long)(v.iov_base);
|
||||||
size_t len = v.iov_len;
|
size_t len = v.iov_len;
|
||||||
int n = DIV_ROUND_UP(len, PAGE_SIZE);
|
unsigned long n = DIV_ROUND_UP(len, PAGE_SIZE);
|
||||||
|
|
||||||
int res = zfs_get_user_pages(
|
long res = zfs_get_user_pages(
|
||||||
P2ALIGN_TYPED(addr, PAGE_SIZE, unsigned long), n, rw == UIO_READ,
|
P2ALIGN_TYPED(addr, PAGE_SIZE, unsigned long), n, rw == UIO_READ,
|
||||||
&uio->uio_dio.pages[uio->uio_dio.npages]);
|
&uio->uio_dio.pages[uio->uio_dio.npages]);
|
||||||
if (res < 0) {
|
if (res < 0) {
|
||||||
*numpages = -1;
|
|
||||||
return (-res);
|
return (-res);
|
||||||
} else if (len != (res * PAGE_SIZE)) {
|
} else if (len != (res * PAGE_SIZE)) {
|
||||||
*numpages = -1;
|
return (EFAULT);
|
||||||
return (len);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT3S(len, ==, res * PAGE_SIZE);
|
ASSERT3S(len, ==, res * PAGE_SIZE);
|
||||||
*numpages = res;
|
*numpages = res;
|
||||||
return (len);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -623,7 +621,7 @@ zfs_uio_get_dio_pages_iov(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||||
|
|
||||||
for (int i = 0; i < uio->uio_iovcnt; i++) {
|
for (int i = 0; i < uio->uio_iovcnt; i++) {
|
||||||
struct iovec iov;
|
struct iovec iov;
|
||||||
int numpages = 0;
|
long numpages = 0;
|
||||||
|
|
||||||
if (iovp->iov_len == 0) {
|
if (iovp->iov_len == 0) {
|
||||||
iovp++;
|
iovp++;
|
||||||
|
@ -632,13 +630,11 @@ zfs_uio_get_dio_pages_iov(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||||
}
|
}
|
||||||
iov.iov_len = MIN(maxsize, iovp->iov_len - skip);
|
iov.iov_len = MIN(maxsize, iovp->iov_len - skip);
|
||||||
iov.iov_base = iovp->iov_base + skip;
|
iov.iov_base = iovp->iov_base + skip;
|
||||||
ssize_t left = zfs_uio_iov_step(iov, rw, uio, &numpages);
|
int error = zfs_uio_iov_step(iov, rw, uio, &numpages);
|
||||||
|
|
||||||
if (numpages == -1) {
|
if (error)
|
||||||
return (left);
|
return (SET_ERROR(error));
|
||||||
}
|
|
||||||
|
|
||||||
ASSERT3U(left, ==, iov.iov_len);
|
|
||||||
uio->uio_dio.npages += numpages;
|
uio->uio_dio.npages += numpages;
|
||||||
maxsize -= iov.iov_len;
|
maxsize -= iov.iov_len;
|
||||||
wanted -= left;
|
wanted -= left;
|
||||||
|
@ -656,9 +652,9 @@ zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||||
{
|
{
|
||||||
size_t skip = uio->uio_skip;
|
size_t skip = uio->uio_skip;
|
||||||
size_t wanted = uio->uio_resid - uio->uio_skip;
|
size_t wanted = uio->uio_resid - uio->uio_skip;
|
||||||
size_t rollback = 0;
|
ssize_t rollback = 0;
|
||||||
size_t cnt;
|
ssize_t cnt;
|
||||||
size_t maxpages = DIV_ROUND_UP(wanted, PAGE_SIZE);
|
unsigned maxpages = DIV_ROUND_UP(wanted, PAGE_SIZE);
|
||||||
|
|
||||||
while (wanted) {
|
while (wanted) {
|
||||||
#if defined(HAVE_IOV_ITER_GET_PAGES2)
|
#if defined(HAVE_IOV_ITER_GET_PAGES2)
|
||||||
|
@ -694,8 +690,8 @@ zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||||
#endif /* HAVE_VFS_IOV_ITER */
|
#endif /* HAVE_VFS_IOV_ITER */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function maps user pages into the kernel. In the event that the user
|
* This function pins user pages. In the event that the user pages were not
|
||||||
* pages were not mapped successfully an error value is returned.
|
* successfully pinned an error value is returned.
|
||||||
*
|
*
|
||||||
* On success, 0 is returned.
|
* On success, 0 is returned.
|
||||||
*/
|
*/
|
||||||
|
@ -703,26 +699,30 @@ int
|
||||||
zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||||
{
|
{
|
||||||
int error = 0;
|
int error = 0;
|
||||||
size_t npages = DIV_ROUND_UP(uio->uio_resid, PAGE_SIZE);
|
long npages = DIV_ROUND_UP(uio->uio_resid, PAGE_SIZE);
|
||||||
size_t size = npages * sizeof (struct page *);
|
size_t size = npages * sizeof (struct page *);
|
||||||
|
|
||||||
if (uio->uio_segflg == UIO_USERSPACE) {
|
if (uio->uio_segflg == UIO_USERSPACE) {
|
||||||
uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP);
|
uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP);
|
||||||
error = zfs_uio_get_dio_pages_iov(uio, rw);
|
error = zfs_uio_get_dio_pages_iov(uio, rw);
|
||||||
ASSERT3S(uio->uio_dio.npages, ==, npages);
|
|
||||||
#if defined(HAVE_VFS_IOV_ITER)
|
#if defined(HAVE_VFS_IOV_ITER)
|
||||||
} else if (uio->uio_segflg == UIO_ITER) {
|
} else if (uio->uio_segflg == UIO_ITER) {
|
||||||
uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP);
|
uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP);
|
||||||
error = zfs_uio_get_dio_pages_iov_iter(uio, rw);
|
error = zfs_uio_get_dio_pages_iov_iter(uio, rw);
|
||||||
ASSERT3S(uio->uio_dio.npages, ==, npages);
|
|
||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
return (SET_ERROR(EOPNOTSUPP));
|
return (SET_ERROR(EOPNOTSUPP));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ASSERT3S(uio->uio_dio.npages, >=, 0);
|
||||||
|
|
||||||
if (error) {
|
if (error) {
|
||||||
|
for (long i = 0; i < uio->uio_dio.npages; i++)
|
||||||
|
put_page(uio->uio_dio.pages[i]);
|
||||||
vmem_free(uio->uio_dio.pages, size);
|
vmem_free(uio->uio_dio.pages, size);
|
||||||
return (error);
|
return (error);
|
||||||
|
} else {
|
||||||
|
ASSERT3S(uio->uio_dio.npages, ==, npages);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rw == UIO_WRITE) {
|
if (rw == UIO_WRITE) {
|
||||||
|
|
|
@ -801,7 +801,7 @@ zpl_direct_IO_impl(void)
|
||||||
* should call the direct_IO address_space_operations function. We set
|
* should call the direct_IO address_space_operations function. We set
|
||||||
* this code path to be fatal if it is executed.
|
* this code path to be fatal if it is executed.
|
||||||
*/
|
*/
|
||||||
VERIFY(0);
|
PANIC(0);
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2574,12 +2574,8 @@ dbuf_undirty_bonus(dbuf_dirty_record_t *dr)
|
||||||
boolean_t
|
boolean_t
|
||||||
dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
||||||
{
|
{
|
||||||
uint64_t txg;
|
uint64_t txg = tx->tx_txg;
|
||||||
boolean_t brtwrite;
|
boolean_t brtwrite;
|
||||||
dbuf_dirty_record_t *dr;
|
|
||||||
|
|
||||||
txg = tx->tx_txg;
|
|
||||||
dr = dbuf_find_dirty_eq(db, txg);
|
|
||||||
|
|
||||||
ASSERT(txg != 0);
|
ASSERT(txg != 0);
|
||||||
|
|
||||||
|
@ -2599,6 +2595,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
||||||
/*
|
/*
|
||||||
* If this buffer is not dirty, we're done.
|
* If this buffer is not dirty, we're done.
|
||||||
*/
|
*/
|
||||||
|
dbuf_dirty_record_t *dr = dbuf_find_dirty_eq(db, txg);
|
||||||
if (dr == NULL)
|
if (dr == NULL)
|
||||||
return (B_FALSE);
|
return (B_FALSE);
|
||||||
ASSERT(dr->dr_dbuf == db);
|
ASSERT(dr->dr_dbuf == db);
|
||||||
|
|
|
@ -6527,6 +6527,10 @@ ZFS_MODULE_PARAM(zfs, zfs_, dio_write_verify_events_per_second, UINT, ZMOD_RW,
|
||||||
"Rate Direct I/O write verify events to this many per second");
|
"Rate Direct I/O write verify events to this many per second");
|
||||||
|
|
||||||
/* BEGIN CSTYLED */
|
/* BEGIN CSTYLED */
|
||||||
|
ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, direct_write_verify_pct, UINT, ZMOD_RW,
|
||||||
|
"Percentage of Direct I/O writes per top-level VDEV for checksum "
|
||||||
|
"verification to be performed");
|
||||||
|
|
||||||
ZFS_MODULE_PARAM(zfs, zfs_, checksum_events_per_second, UINT, ZMOD_RW,
|
ZFS_MODULE_PARAM(zfs, zfs_, checksum_events_per_second, UINT, ZMOD_RW,
|
||||||
"Rate limit checksum events to this many checksum errors per second "
|
"Rate limit checksum events to this many checksum errors per second "
|
||||||
"(do not set below ZED threshold).");
|
"(do not set below ZED threshold).");
|
||||||
|
@ -6553,9 +6557,4 @@ ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, max_auto_ashift,
|
||||||
param_set_max_auto_ashift, param_get_uint, ZMOD_RW,
|
param_set_max_auto_ashift, param_get_uint, ZMOD_RW,
|
||||||
"Maximum ashift used when optimizing for logical -> physical sector "
|
"Maximum ashift used when optimizing for logical -> physical sector "
|
||||||
"size on new top-level vdevs");
|
"size on new top-level vdevs");
|
||||||
|
|
||||||
ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, direct_write_verify_pct,
|
|
||||||
param_set_direct_write_verify_pct, param_get_uint, ZMOD_RW,
|
|
||||||
"Percentage of Direct I/O writes per top-level VDEV for checksum "
|
|
||||||
"verification to be performed");
|
|
||||||
/* END CSTYLED */
|
/* END CSTYLED */
|
||||||
|
|
Loading…
Reference in New Issue