Updating based on PR Feedback(2)
Updating code base on PR code comments. I adjusted the following parts of the code base on the comments: 1. Updated zfs_check_direct_enabled() so it now just returns an error. This removed the need for the added enum and cleaned up the code. 2. Moved acquiring the rangelock from zfs_fillpage() out to zfs_getpage(). This cleans up the code and gets rid of the need to pass a boolean into zfs_fillpage() to conditionally gra the rangelock. 3. Cleaned up the code in both zfs_uio_get_dio_pages() and zfs_uio_get_dio_pages_iov(). There was no need to have wanted and maxsize as they were the same thing. Also, since the previous commit cleaned up the call to zfs_uio_iov_step() the code is much cleaner over all. 4. Removed dbuf_get_dirty_direct() function. 5. Unified dbuf_read() to account for both block clones and direct I/O writes. This removes redundant code from dbuf_read_impl() for grabbingthe BP. 6. Removed zfs_map_page() and zfs_unmap_page() declarations from Linux headers as those were never called. Signed-off-by: Brian Atkinson <batkinson@lanl.gov>
This commit is contained in:
parent
506bc54006
commit
b1ee363675
|
@ -184,12 +184,6 @@ extern int zfs_inode_alloc(struct super_block *, struct inode **ip);
|
||||||
extern void zfs_inode_destroy(struct inode *);
|
extern void zfs_inode_destroy(struct inode *);
|
||||||
extern void zfs_mark_inode_dirty(struct inode *);
|
extern void zfs_mark_inode_dirty(struct inode *);
|
||||||
extern boolean_t zfs_relatime_need_update(const struct inode *);
|
extern boolean_t zfs_relatime_need_update(const struct inode *);
|
||||||
|
|
||||||
#if defined(HAVE_UIO_RW)
|
|
||||||
extern caddr_t zfs_map_page(page_t *, enum seg_rw);
|
|
||||||
extern void zfs_unmap_page(page_t *, caddr_t);
|
|
||||||
#endif /* HAVE_UIO_RW */
|
|
||||||
|
|
||||||
extern zil_replay_func_t *const zfs_replay_vector[TX_MAX_TYPE];
|
extern zil_replay_func_t *const zfs_replay_vector[TX_MAX_TYPE];
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
@ -393,7 +393,7 @@ dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||||
dbuf_dirty_record_t *dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid,
|
dbuf_dirty_record_t *dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid,
|
||||||
dmu_tx_t *tx);
|
dmu_tx_t *tx);
|
||||||
boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||||
blkptr_t *dmu_buf_get_bp_from_dbuf(dmu_buf_impl_t *db);
|
int dmu_buf_get_bp_from_dbuf(dmu_buf_impl_t *db, blkptr_t **bp);
|
||||||
int dmu_buf_untransform_direct(dmu_buf_impl_t *db, spa_t *spa);
|
int dmu_buf_untransform_direct(dmu_buf_impl_t *db, spa_t *spa);
|
||||||
arc_buf_t *dbuf_loan_arcbuf(dmu_buf_impl_t *db);
|
arc_buf_t *dbuf_loan_arcbuf(dmu_buf_impl_t *db);
|
||||||
void dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
|
void dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
|
||||||
|
@ -467,18 +467,6 @@ dbuf_find_dirty_eq(dmu_buf_impl_t *db, uint64_t txg)
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* All Direct I/O writes happen in open context so the first dirty record will
|
|
||||||
* always be associated with the write. After a Direct I/O write completes the
|
|
||||||
* dirty records dr_overriden state will bet DR_OVERRIDDEN and the dr_data will
|
|
||||||
* get set to NULL.
|
|
||||||
*/
|
|
||||||
static inline dbuf_dirty_record_t *
|
|
||||||
dbuf_get_dirty_direct(dmu_buf_impl_t *db)
|
|
||||||
{
|
|
||||||
return (list_head(&db->db_dirty_records));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline boolean_t
|
static inline boolean_t
|
||||||
dbuf_dirty_is_direct_write(dmu_buf_impl_t *db, dbuf_dirty_record_t *dr)
|
dbuf_dirty_is_direct_write(dmu_buf_impl_t *db, dbuf_dirty_record_t *dr)
|
||||||
{
|
{
|
||||||
|
|
|
@ -29,12 +29,6 @@
|
||||||
|
|
||||||
extern int zfs_bclone_enabled;
|
extern int zfs_bclone_enabled;
|
||||||
|
|
||||||
typedef enum zfs_direct_enabled {
|
|
||||||
ZFS_DIRECT_IO_ERR,
|
|
||||||
ZFS_DIRECT_IO_DISABLED,
|
|
||||||
ZFS_DIRECT_IO_ENABLED
|
|
||||||
} zfs_direct_enabled_t;
|
|
||||||
|
|
||||||
extern int zfs_fsync(znode_t *, int, cred_t *);
|
extern int zfs_fsync(znode_t *, int, cred_t *);
|
||||||
extern int zfs_read(znode_t *, zfs_uio_t *, int, cred_t *);
|
extern int zfs_read(znode_t *, zfs_uio_t *, int, cred_t *);
|
||||||
extern int zfs_write(znode_t *, zfs_uio_t *, int, cred_t *);
|
extern int zfs_write(znode_t *, zfs_uio_t *, int, cred_t *);
|
||||||
|
@ -52,7 +46,7 @@ extern int mappedread(znode_t *, int, zfs_uio_t *);
|
||||||
extern int mappedread_sf(znode_t *, int, zfs_uio_t *);
|
extern int mappedread_sf(znode_t *, int, zfs_uio_t *);
|
||||||
extern void update_pages(znode_t *, int64_t, int, objset_t *);
|
extern void update_pages(znode_t *, int64_t, int, objset_t *);
|
||||||
|
|
||||||
extern zfs_direct_enabled_t zfs_check_direct_enabled(znode_t *, int, int *);
|
extern int zfs_check_direct_enabled(znode_t *, int, boolean_t *);
|
||||||
extern int zfs_setup_direct(znode_t *, zfs_uio_t *, zfs_uio_rw_t, int *);
|
extern int zfs_setup_direct(znode_t *, zfs_uio_t *, zfs_uio_rw_t, int *);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -248,10 +248,7 @@ static int
|
||||||
zfs_uio_get_dio_pages_impl(zfs_uio_t *uio)
|
zfs_uio_get_dio_pages_impl(zfs_uio_t *uio)
|
||||||
{
|
{
|
||||||
const struct iovec *iovp = GET_UIO_STRUCT(uio)->uio_iov;
|
const struct iovec *iovp = GET_UIO_STRUCT(uio)->uio_iov;
|
||||||
size_t wanted;
|
size_t len = zfs_uio_resid(uio);
|
||||||
size_t maxsize = zfs_uio_resid(uio);
|
|
||||||
|
|
||||||
wanted = maxsize;
|
|
||||||
|
|
||||||
for (int i = 0; i < zfs_uio_iovcnt(uio); i++) {
|
for (int i = 0; i < zfs_uio_iovcnt(uio); i++) {
|
||||||
struct iovec iov;
|
struct iovec iov;
|
||||||
|
@ -261,7 +258,7 @@ zfs_uio_get_dio_pages_impl(zfs_uio_t *uio)
|
||||||
iovp++;
|
iovp++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
iov.iov_len = MIN(maxsize, iovp->iov_len);
|
iov.iov_len = MIN(len, iovp->iov_len);
|
||||||
iov.iov_base = iovp->iov_base;
|
iov.iov_base = iovp->iov_base;
|
||||||
int error = zfs_uio_iov_step(iov, uio, &numpages);
|
int error = zfs_uio_iov_step(iov, uio, &numpages);
|
||||||
|
|
||||||
|
@ -269,12 +266,11 @@ zfs_uio_get_dio_pages_impl(zfs_uio_t *uio)
|
||||||
return (error);
|
return (error);
|
||||||
|
|
||||||
uio->uio_dio.npages += numpages;
|
uio->uio_dio.npages += numpages;
|
||||||
maxsize -= iov.iov_len;
|
len -= iov.iov_len;
|
||||||
wanted -= left;
|
|
||||||
iovp++;
|
iovp++;
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT0(wanted);
|
ASSERT0(len);
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -4313,15 +4313,15 @@ zfs_freebsd_read(struct vop_read_args *ap)
|
||||||
int error = 0;
|
int error = 0;
|
||||||
znode_t *zp = VTOZ(ap->a_vp);
|
znode_t *zp = VTOZ(ap->a_vp);
|
||||||
int ioflag = ioflags(ap->a_ioflag);
|
int ioflag = ioflags(ap->a_ioflag);
|
||||||
|
boolean_t is_direct;
|
||||||
|
|
||||||
zfs_uio_init(&uio, ap->a_uio);
|
zfs_uio_init(&uio, ap->a_uio);
|
||||||
|
|
||||||
zfs_direct_enabled_t direct =
|
error = zfs_check_direct_enabled(zp, ioflag, &is_direct);
|
||||||
zfs_check_direct_enabled(zp, ioflag, &error);
|
|
||||||
|
|
||||||
if (direct == ZFS_DIRECT_IO_ERR) {
|
if (error) {
|
||||||
return (error);
|
return (error);
|
||||||
} else if (direct == ZFS_DIRECT_IO_ENABLED) {
|
} else if (is_direct) {
|
||||||
error =
|
error =
|
||||||
zfs_freebsd_read_direct(zp, &uio, UIO_READ, ioflag,
|
zfs_freebsd_read_direct(zp, &uio, UIO_READ, ioflag,
|
||||||
ap->a_cred);
|
ap->a_cred);
|
||||||
|
@ -4362,9 +4362,6 @@ zfs_freebsd_read(struct vop_read_args *ap)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
ASSERT(direct == ZFS_DIRECT_IO_DISABLED ||
|
|
||||||
(direct == ZFS_DIRECT_IO_ENABLED && error == EAGAIN));
|
|
||||||
|
|
||||||
error = zfs_read(zp, &uio, ioflag, ap->a_cred);
|
error = zfs_read(zp, &uio, ioflag, ap->a_cred);
|
||||||
|
|
||||||
return (error);
|
return (error);
|
||||||
|
@ -4409,15 +4406,15 @@ zfs_freebsd_write(struct vop_write_args *ap)
|
||||||
int error = 0;
|
int error = 0;
|
||||||
znode_t *zp = VTOZ(ap->a_vp);
|
znode_t *zp = VTOZ(ap->a_vp);
|
||||||
int ioflag = ioflags(ap->a_ioflag);
|
int ioflag = ioflags(ap->a_ioflag);
|
||||||
|
boolean_t is_direct;
|
||||||
|
|
||||||
zfs_uio_init(&uio, ap->a_uio);
|
zfs_uio_init(&uio, ap->a_uio);
|
||||||
|
|
||||||
zfs_direct_enabled_t direct =
|
error = zfs_check_direct_enabled(zp, ioflag, &is_direct);
|
||||||
zfs_check_direct_enabled(zp, ioflag, &error);
|
|
||||||
|
|
||||||
if (direct == ZFS_DIRECT_IO_ERR) {
|
if (error) {
|
||||||
return (error);
|
return (error);
|
||||||
} else if (direct == ZFS_DIRECT_IO_ENABLED) {
|
} else if (is_direct) {
|
||||||
error =
|
error =
|
||||||
zfs_freebsd_write_direct(zp, &uio, UIO_WRITE, ioflag,
|
zfs_freebsd_write_direct(zp, &uio, UIO_WRITE, ioflag,
|
||||||
ap->a_cred);
|
ap->a_cred);
|
||||||
|
@ -4433,9 +4430,6 @@ zfs_freebsd_write(struct vop_write_args *ap)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT(direct == ZFS_DIRECT_IO_DISABLED ||
|
|
||||||
(direct == ZFS_DIRECT_IO_ENABLED && error == EAGAIN));
|
|
||||||
|
|
||||||
error = zfs_write(zp, &uio, ioflag, ap->a_cred);
|
error = zfs_write(zp, &uio, ioflag, ap->a_cred);
|
||||||
|
|
||||||
return (error);
|
return (error);
|
||||||
|
|
|
@ -614,10 +614,9 @@ zfs_uio_get_dio_pages_iov(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||||
{
|
{
|
||||||
const struct iovec *iovp = uio->uio_iov;
|
const struct iovec *iovp = uio->uio_iov;
|
||||||
size_t skip = uio->uio_skip;
|
size_t skip = uio->uio_skip;
|
||||||
size_t wanted, maxsize;
|
size_t len = uio->uio_resid - skip;
|
||||||
|
|
||||||
ASSERT(uio->uio_segflg != UIO_SYSSPACE);
|
ASSERT(uio->uio_segflg != UIO_SYSSPACE);
|
||||||
wanted = maxsize = uio->uio_resid - skip;
|
|
||||||
|
|
||||||
for (int i = 0; i < uio->uio_iovcnt; i++) {
|
for (int i = 0; i < uio->uio_iovcnt; i++) {
|
||||||
struct iovec iov;
|
struct iovec iov;
|
||||||
|
@ -628,7 +627,7 @@ zfs_uio_get_dio_pages_iov(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||||
skip = 0;
|
skip = 0;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
iov.iov_len = MIN(maxsize, iovp->iov_len - skip);
|
iov.iov_len = MIN(len, iovp->iov_len - skip);
|
||||||
iov.iov_base = iovp->iov_base + skip;
|
iov.iov_base = iovp->iov_base + skip;
|
||||||
int error = zfs_uio_iov_step(iov, rw, uio, &numpages);
|
int error = zfs_uio_iov_step(iov, rw, uio, &numpages);
|
||||||
|
|
||||||
|
@ -636,13 +635,13 @@ zfs_uio_get_dio_pages_iov(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||||
return (SET_ERROR(error));
|
return (SET_ERROR(error));
|
||||||
|
|
||||||
uio->uio_dio.npages += numpages;
|
uio->uio_dio.npages += numpages;
|
||||||
maxsize -= iov.iov_len;
|
len -= iov.iov_len;
|
||||||
wanted -= left;
|
|
||||||
skip = 0;
|
skip = 0;
|
||||||
iovp++;
|
iovp++;
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT0(wanted);
|
ASSERT0(len);
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -228,8 +228,7 @@ zfs_close(struct inode *ip, int flag, cred_t *cr)
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
|
|
||||||
static int zfs_fillpage(struct inode *ip, struct page *pp,
|
static int zfs_fillpage(struct inode *ip, struct page *pp);
|
||||||
boolean_t rangelock_held);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When a file is memory mapped, we must keep the IO data synchronized
|
* When a file is memory mapped, we must keep the IO data synchronized
|
||||||
|
@ -304,7 +303,7 @@ mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
|
||||||
* In this case we must try and fill the page.
|
* In this case we must try and fill the page.
|
||||||
*/
|
*/
|
||||||
if (unlikely(!PageUptodate(pp))) {
|
if (unlikely(!PageUptodate(pp))) {
|
||||||
error = zfs_fillpage(ip, pp, B_TRUE);
|
error = zfs_fillpage(ip, pp);
|
||||||
if (error) {
|
if (error) {
|
||||||
unlock_page(pp);
|
unlock_page(pp);
|
||||||
put_page(pp);
|
put_page(pp);
|
||||||
|
@ -4009,66 +4008,19 @@ zfs_inactive(struct inode *ip)
|
||||||
* Fill pages with data from the disk.
|
* Fill pages with data from the disk.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
zfs_fillpage(struct inode *ip, struct page *pp, boolean_t rangelock_held)
|
zfs_fillpage(struct inode *ip, struct page *pp)
|
||||||
{
|
{
|
||||||
znode_t *zp = ITOZ(ip);
|
znode_t *zp = ITOZ(ip);
|
||||||
zfsvfs_t *zfsvfs = ITOZSB(ip);
|
zfsvfs_t *zfsvfs = ITOZSB(ip);
|
||||||
loff_t i_size = i_size_read(ip);
|
loff_t i_size = i_size_read(ip);
|
||||||
u_offset_t io_off = page_offset(pp);
|
u_offset_t io_off = page_offset(pp);
|
||||||
size_t io_len = PAGE_SIZE;
|
size_t io_len = PAGE_SIZE;
|
||||||
zfs_locked_range_t *lr = NULL;
|
|
||||||
|
|
||||||
ASSERT3U(io_off, <, i_size);
|
ASSERT3U(io_off, <, i_size);
|
||||||
|
|
||||||
if (io_off + io_len > i_size)
|
if (io_off + io_len > i_size)
|
||||||
io_len = i_size - io_off;
|
io_len = i_size - io_off;
|
||||||
|
|
||||||
/*
|
|
||||||
* It is important to hold the rangelock here because it is possible
|
|
||||||
* a Direct I/O write might be taking place at the same time that a
|
|
||||||
* page is being faulted in through filemap_fault(). With a Direct I/O
|
|
||||||
* write, db->db_data will be set to NULL either in:
|
|
||||||
* 1. dmu_write_direct() -> dmu_buf_will_not_fill() ->
|
|
||||||
* dmu_buf_will_fill() -> dbuf_noread() -> dbuf_clear_data()
|
|
||||||
* 2. dmu_write_direct_done()
|
|
||||||
* If the rangelock is not held, then there is a race between faulting
|
|
||||||
* in a page and writing out a Direct I/O write. Without the rangelock
|
|
||||||
* a NULL pointer dereference can occur in dmu_read_impl() for
|
|
||||||
* db->db_data during the mempcy operation.
|
|
||||||
*
|
|
||||||
* Another important note here is we have to check to make sure the
|
|
||||||
* rangelock is not already held from mappedread() -> zfs_fillpage().
|
|
||||||
* filemap_fault() will first add the page to the inode address_space
|
|
||||||
* mapping and then will drop the page lock. This leaves open a window
|
|
||||||
* for mappedread() to begin. In this case he page lock and rangelock,
|
|
||||||
* are both held and it might have to call here if the page is not
|
|
||||||
* up to date. In this case the rangelock can not be held twice or a
|
|
||||||
* deadlock can happen. So the rangelock only needs to be aquired if
|
|
||||||
* zfs_fillpage() is being called by zfs_getpage().
|
|
||||||
*
|
|
||||||
* Finally it is also important to drop the page lock before grabbing
|
|
||||||
* the rangelock to avoid another deadlock between here and
|
|
||||||
* zfs_write() -> update_pages(). update_pages() holds both the
|
|
||||||
* rangelock and the page lock.
|
|
||||||
*/
|
|
||||||
if (rangelock_held == B_FALSE) {
|
|
||||||
/*
|
|
||||||
* First try grabbing the rangelock. If that can not be done
|
|
||||||
* the page lock must be dropped before grabbing the rangelock
|
|
||||||
* to avoid a deadlock with update_pages(). See comment above.
|
|
||||||
*/
|
|
||||||
lr = zfs_rangelock_tryenter(&zp->z_rangelock, io_off, io_len,
|
|
||||||
RL_READER);
|
|
||||||
if (lr == NULL) {
|
|
||||||
get_page(pp);
|
|
||||||
unlock_page(pp);
|
|
||||||
lr = zfs_rangelock_enter(&zp->z_rangelock, io_off,
|
|
||||||
io_len, RL_READER);
|
|
||||||
lock_page(pp);
|
|
||||||
put_page(pp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void *va = kmap(pp);
|
void *va = kmap(pp);
|
||||||
int error = dmu_read(zfsvfs->z_os, zp->z_id, io_off,
|
int error = dmu_read(zfsvfs->z_os, zp->z_id, io_off,
|
||||||
io_len, va, DMU_READ_PREFETCH);
|
io_len, va, DMU_READ_PREFETCH);
|
||||||
|
@ -4088,10 +4040,6 @@ zfs_fillpage(struct inode *ip, struct page *pp, boolean_t rangelock_held)
|
||||||
SetPageUptodate(pp);
|
SetPageUptodate(pp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (rangelock_held == B_FALSE)
|
|
||||||
zfs_rangelock_exit(lr);
|
|
||||||
|
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4112,11 +4060,49 @@ zfs_getpage(struct inode *ip, struct page *pp)
|
||||||
zfsvfs_t *zfsvfs = ITOZSB(ip);
|
zfsvfs_t *zfsvfs = ITOZSB(ip);
|
||||||
znode_t *zp = ITOZ(ip);
|
znode_t *zp = ITOZ(ip);
|
||||||
int error;
|
int error;
|
||||||
|
loff_t i_size = i_size_read(ip);
|
||||||
|
u_offset_t io_off = page_offset(pp);
|
||||||
|
size_t io_len = PAGE_SIZE;
|
||||||
|
|
||||||
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
|
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
|
||||||
return (error);
|
return (error);
|
||||||
|
|
||||||
error = zfs_fillpage(ip, pp, B_FALSE);
|
ASSERT3U(io_off, <, i_size);
|
||||||
|
|
||||||
|
if (io_off + io_len > i_size)
|
||||||
|
io_len = i_size - io_off;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It is important to hold the rangelock here because it is possible
|
||||||
|
* a Direct I/O write or block clone might be taking place at the same
|
||||||
|
* time that a page is being faulted in through filemap_fault(). With
|
||||||
|
* Direct I/O writes and block cloning db->db_data will be set to NULL
|
||||||
|
* with dbuf_clear_data() in dmu_buif_will_clone_or_dio(). If the
|
||||||
|
* rangelock is not held, then there is a race between faulting in a
|
||||||
|
* page and writing out a Direct I/O write or block cloning. Without
|
||||||
|
* the rangelock a NULL pointer dereference can occur in
|
||||||
|
* dmu_read_impl() for db->db_data during the mempcy operation when
|
||||||
|
* zfs_fillpage() calls dmu_read().
|
||||||
|
*/
|
||||||
|
zfs_locked_range_t *lr = zfs_rangelock_tryenter(&zp->z_rangelock,
|
||||||
|
io_off, io_len, RL_READER);
|
||||||
|
if (lr == NULL) {
|
||||||
|
/*
|
||||||
|
* It is important to drop the page lock before grabbing the
|
||||||
|
* rangelock to avoid another deadlock between here and
|
||||||
|
* zfs_write() -> update_pages(). update_pages() holds both the
|
||||||
|
* rangelock and the page lock.
|
||||||
|
*/
|
||||||
|
get_page(pp);
|
||||||
|
unlock_page(pp);
|
||||||
|
lr = zfs_rangelock_enter(&zp->z_rangelock, io_off,
|
||||||
|
io_len, RL_READER);
|
||||||
|
lock_page(pp);
|
||||||
|
put_page(pp);
|
||||||
|
}
|
||||||
|
error = zfs_fillpage(ip, pp);
|
||||||
|
zfs_rangelock_exit(lr);
|
||||||
|
|
||||||
if (error == 0)
|
if (error == 0)
|
||||||
dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, PAGE_SIZE);
|
dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, PAGE_SIZE);
|
||||||
|
|
||||||
|
|
|
@ -387,14 +387,13 @@ zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to)
|
||||||
struct inode *ip = kiocb->ki_filp->f_mapping->host;
|
struct inode *ip = kiocb->ki_filp->f_mapping->host;
|
||||||
struct file *filp = kiocb->ki_filp;
|
struct file *filp = kiocb->ki_filp;
|
||||||
int flags = filp->f_flags | zfs_io_flags(kiocb);
|
int flags = filp->f_flags | zfs_io_flags(kiocb);
|
||||||
int error = 0;
|
boolean_t is_direct;
|
||||||
|
|
||||||
zfs_direct_enabled_t direct =
|
int error = zfs_check_direct_enabled(ITOZ(ip), flags, &is_direct);
|
||||||
zfs_check_direct_enabled(ITOZ(ip), flags, &error);
|
|
||||||
|
|
||||||
if (direct == ZFS_DIRECT_IO_ERR) {
|
if (error) {
|
||||||
return (-error);
|
return (-error);
|
||||||
} else if (direct == ZFS_DIRECT_IO_ENABLED) {
|
} else if (is_direct) {
|
||||||
ssize_t read = zpl_iter_read_direct(kiocb, to);
|
ssize_t read = zpl_iter_read_direct(kiocb, to);
|
||||||
|
|
||||||
if (read >= 0 || read != -EAGAIN)
|
if (read >= 0 || read != -EAGAIN)
|
||||||
|
@ -510,7 +509,7 @@ zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from)
|
||||||
struct file *filp = kiocb->ki_filp;
|
struct file *filp = kiocb->ki_filp;
|
||||||
int flags = filp->f_flags | zfs_io_flags(kiocb);
|
int flags = filp->f_flags | zfs_io_flags(kiocb);
|
||||||
size_t count = 0;
|
size_t count = 0;
|
||||||
int error = 0;
|
boolean_t is_direct;
|
||||||
|
|
||||||
ssize_t ret = zpl_generic_write_checks(kiocb, from, &count);
|
ssize_t ret = zpl_generic_write_checks(kiocb, from, &count);
|
||||||
if (ret)
|
if (ret)
|
||||||
|
@ -518,12 +517,11 @@ zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from)
|
||||||
|
|
||||||
loff_t offset = kiocb->ki_pos;
|
loff_t offset = kiocb->ki_pos;
|
||||||
|
|
||||||
zfs_direct_enabled_t direct =
|
ret = zfs_check_direct_enabled(ITOZ(ip), flags, &is_direct);
|
||||||
zfs_check_direct_enabled(ITOZ(ip), flags, &error);
|
|
||||||
|
|
||||||
if (direct == ZFS_DIRECT_IO_ERR) {
|
if (ret) {
|
||||||
return (-error);
|
return (-ret);
|
||||||
} else if (direct == ZFS_DIRECT_IO_ENABLED) {
|
} else if (is_direct) {
|
||||||
ssize_t wrote = zpl_iter_write_direct(kiocb, from);
|
ssize_t wrote = zpl_iter_write_direct(kiocb, from);
|
||||||
|
|
||||||
if (wrote >= 0 || wrote != -EAGAIN) {
|
if (wrote >= 0 || wrote != -EAGAIN) {
|
||||||
|
@ -638,18 +636,17 @@ zpl_aio_read(struct kiocb *kiocb, const struct iovec *iov,
|
||||||
int flags = filp->f_flags | zfs_io_flags(kiocb);
|
int flags = filp->f_flags | zfs_io_flags(kiocb);
|
||||||
size_t count;
|
size_t count;
|
||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
int error = 0;
|
boolean_t is_direct;
|
||||||
|
|
||||||
ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
|
ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
|
||||||
if (ret)
|
if (ret)
|
||||||
return (ret);
|
return (ret);
|
||||||
|
|
||||||
zfs_direct_enabled_t direct =
|
ret = zfs_check_direct_enabled(ITOZ(ip), flags, &is_direct);
|
||||||
zfs_check_direct_enabled(ITOZ(ip), flags, &error);
|
|
||||||
|
|
||||||
if (direct == ZFS_DIRECT_IO_ERR) {
|
if (ret) {
|
||||||
return (-error);
|
return (-ret);
|
||||||
} else if (direct == ZFS_DIRECT_IO_ENABLED) {
|
} else if (is_direct) {
|
||||||
ssize_t read = zpl_aio_read_direct(kiocb, iov, nr_segs, pos);
|
ssize_t read = zpl_aio_read_direct(kiocb, iov, nr_segs, pos);
|
||||||
|
|
||||||
if (read >= 0 || read != -EAGAIN)
|
if (read >= 0 || read != -EAGAIN)
|
||||||
|
@ -754,7 +751,7 @@ zpl_aio_write(struct kiocb *kiocb, const struct iovec *iov,
|
||||||
size_t ocount;
|
size_t ocount;
|
||||||
size_t count;
|
size_t count;
|
||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
int error = 0;
|
boolean_t is_direct;
|
||||||
|
|
||||||
ret = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
|
ret = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
|
||||||
if (ret)
|
if (ret)
|
||||||
|
@ -768,12 +765,11 @@ zpl_aio_write(struct kiocb *kiocb, const struct iovec *iov,
|
||||||
|
|
||||||
kiocb->ki_pos = pos;
|
kiocb->ki_pos = pos;
|
||||||
|
|
||||||
zfs_direct_enabled_t direct =
|
ret = zfs_check_direct_enabled(ITOZ(ip), flags, &is_direct);
|
||||||
zfs_check_direct_enabled(ITOZ(ip), flags, &error);
|
|
||||||
|
|
||||||
if (direct == ZFS_DIRECT_IO_ERR) {
|
if (ret) {
|
||||||
return (-error);
|
return (-ret);
|
||||||
} else if (direct == ZFS_DIRECT_IO_ENABLED) {
|
} else if (is_direct) {
|
||||||
ssize_t wrote = zpl_aio_write_direct(kiocb, iov, nr_segs, pos);
|
ssize_t wrote = zpl_aio_write_direct(kiocb, iov, nr_segs, pos);
|
||||||
|
|
||||||
if (wrote >= 0 || wrote != -EAGAIN) {
|
if (wrote >= 0 || wrote != -EAGAIN) {
|
||||||
|
|
|
@ -1253,17 +1253,16 @@ dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf)
|
||||||
{
|
{
|
||||||
ASSERT(MUTEX_HELD(&db->db_mtx));
|
ASSERT(MUTEX_HELD(&db->db_mtx));
|
||||||
ASSERT(buf != NULL);
|
ASSERT(buf != NULL);
|
||||||
dbuf_dirty_record_t *dr_dio = NULL;
|
|
||||||
|
|
||||||
db->db_buf = buf;
|
db->db_buf = buf;
|
||||||
dr_dio = dbuf_get_dirty_direct(db);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there is a Direct I/O, set its data too. Then its state
|
* If there is a Direct I/O, set its data too. Then its state
|
||||||
* will be the same as if we did a ZIL dmu_sync().
|
* will be the same as if we did a ZIL dmu_sync().
|
||||||
*/
|
*/
|
||||||
if (dbuf_dirty_is_direct_write(db, dr_dio)) {
|
dbuf_dirty_record_t *dr = list_head(&db->db_dirty_records);
|
||||||
dr_dio->dt.dl.dr_data = db->db_buf;
|
if (dbuf_dirty_is_direct_write(db, dr)) {
|
||||||
|
dr->dt.dl.dr_data = db->db_buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT(buf->b_data != NULL);
|
ASSERT(buf->b_data != NULL);
|
||||||
|
@ -1594,7 +1593,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags,
|
||||||
zbookmark_phys_t zb;
|
zbookmark_phys_t zb;
|
||||||
uint32_t aflags = ARC_FLAG_NOWAIT;
|
uint32_t aflags = ARC_FLAG_NOWAIT;
|
||||||
int err, zio_flags;
|
int err, zio_flags;
|
||||||
blkptr_t *bpp = bp;
|
|
||||||
|
|
||||||
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
|
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
|
||||||
ASSERT(MUTEX_HELD(&db->db_mtx));
|
ASSERT(MUTEX_HELD(&db->db_mtx));
|
||||||
|
@ -1608,37 +1606,18 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags,
|
||||||
goto early_unlock;
|
goto early_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
err = dbuf_read_hole(db, dn, bp);
|
||||||
* If we have a pending block clone, we don't want to read the
|
|
||||||
* underlying block, but the content of the block being cloned,
|
|
||||||
* pointed by the dirty record, so we have the most recent data.
|
|
||||||
* If there is no dirty record, then we hit a race in a sync
|
|
||||||
* process when the dirty record is already removed, while the
|
|
||||||
* dbuf is not yet destroyed. Such case is equivalent to uncached.
|
|
||||||
*/
|
|
||||||
if (db->db_state == DB_NOFILL) {
|
|
||||||
dbuf_dirty_record_t *dr = list_head(&db->db_dirty_records);
|
|
||||||
if (dr != NULL) {
|
|
||||||
if (!dr->dt.dl.dr_brtwrite) {
|
|
||||||
err = EIO;
|
|
||||||
goto early_unlock;
|
|
||||||
}
|
|
||||||
bpp = &dr->dt.dl.dr_overridden_by;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
err = dbuf_read_hole(db, dn, bpp);
|
|
||||||
if (err == 0)
|
if (err == 0)
|
||||||
goto early_unlock;
|
goto early_unlock;
|
||||||
|
|
||||||
ASSERT(bpp != NULL);
|
ASSERT(bp != NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Any attempt to read a redacted block should result in an error. This
|
* Any attempt to read a redacted block should result in an error. This
|
||||||
* will never happen under normal conditions, but can be useful for
|
* will never happen under normal conditions, but can be useful for
|
||||||
* debugging purposes.
|
* debugging purposes.
|
||||||
*/
|
*/
|
||||||
if (BP_IS_REDACTED(bpp)) {
|
if (BP_IS_REDACTED(bp)) {
|
||||||
ASSERT(dsl_dataset_feature_is_active(
|
ASSERT(dsl_dataset_feature_is_active(
|
||||||
db->db_objset->os_dsl_dataset,
|
db->db_objset->os_dsl_dataset,
|
||||||
SPA_FEATURE_REDACTED_DATASETS));
|
SPA_FEATURE_REDACTED_DATASETS));
|
||||||
|
@ -1653,9 +1632,9 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags,
|
||||||
* All bps of an encrypted os should have the encryption bit set.
|
* All bps of an encrypted os should have the encryption bit set.
|
||||||
* If this is not true it indicates tampering and we report an error.
|
* If this is not true it indicates tampering and we report an error.
|
||||||
*/
|
*/
|
||||||
if (db->db_objset->os_encrypted && !BP_USES_CRYPT(bpp)) {
|
if (db->db_objset->os_encrypted && !BP_USES_CRYPT(bp)) {
|
||||||
spa_log_error(db->db_objset->os_spa, &zb,
|
spa_log_error(db->db_objset->os_spa, &zb,
|
||||||
BP_GET_LOGICAL_BIRTH(bpp));
|
BP_GET_LOGICAL_BIRTH(bp));
|
||||||
err = SET_ERROR(EIO);
|
err = SET_ERROR(EIO);
|
||||||
goto early_unlock;
|
goto early_unlock;
|
||||||
}
|
}
|
||||||
|
@ -1666,7 +1645,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags,
|
||||||
|
|
||||||
if (!DBUF_IS_CACHEABLE(db))
|
if (!DBUF_IS_CACHEABLE(db))
|
||||||
aflags |= ARC_FLAG_UNCACHED;
|
aflags |= ARC_FLAG_UNCACHED;
|
||||||
else if (dbuf_is_l2cacheable(db, bpp))
|
else if (dbuf_is_l2cacheable(db, bp))
|
||||||
aflags |= ARC_FLAG_L2CACHE;
|
aflags |= ARC_FLAG_L2CACHE;
|
||||||
|
|
||||||
dbuf_add_ref(db, NULL);
|
dbuf_add_ref(db, NULL);
|
||||||
|
@ -1674,7 +1653,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags,
|
||||||
zio_flags = (flags & DB_RF_CANFAIL) ?
|
zio_flags = (flags & DB_RF_CANFAIL) ?
|
||||||
ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED;
|
ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED;
|
||||||
|
|
||||||
if ((flags & DB_RF_NO_DECRYPT) && BP_IS_PROTECTED(bpp))
|
if ((flags & DB_RF_NO_DECRYPT) && BP_IS_PROTECTED(bp))
|
||||||
zio_flags |= ZIO_FLAG_RAW;
|
zio_flags |= ZIO_FLAG_RAW;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1684,7 +1663,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags,
|
||||||
* an l1 cache hit) we don't acquire the db_mtx while holding the
|
* an l1 cache hit) we don't acquire the db_mtx while holding the
|
||||||
* parent's rwlock, which would be a lock ordering violation.
|
* parent's rwlock, which would be a lock ordering violation.
|
||||||
*/
|
*/
|
||||||
blkptr_t copy = *bpp;
|
blkptr_t copy = *bp;
|
||||||
dmu_buf_unlock_parent(db, dblt, tag);
|
dmu_buf_unlock_parent(db, dblt, tag);
|
||||||
return (arc_read(zio, db->db_objset->os_spa, ©,
|
return (arc_read(zio, db->db_objset->os_spa, ©,
|
||||||
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, zio_flags,
|
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, zio_flags,
|
||||||
|
@ -1856,24 +1835,33 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *pio, uint32_t flags)
|
||||||
}
|
}
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
} else {
|
} else {
|
||||||
blkptr_t *bp = NULL;
|
|
||||||
ASSERT(db->db_state == DB_UNCACHED ||
|
ASSERT(db->db_state == DB_UNCACHED ||
|
||||||
db->db_state == DB_NOFILL);
|
db->db_state == DB_NOFILL);
|
||||||
db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG);
|
db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG);
|
||||||
|
blkptr_t *bp;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If a Direct I/O write has occurred we will use the updated
|
* If a block clone or Direct I/O write has occurred we will
|
||||||
* block pointer.
|
* get the dirty records overridden BP so we get the most
|
||||||
|
* recent data..
|
||||||
*/
|
*/
|
||||||
bp = dmu_buf_get_bp_from_dbuf(db);
|
err = dmu_buf_get_bp_from_dbuf(db, &bp);
|
||||||
|
|
||||||
|
if (!err) {
|
||||||
if (pio == NULL && (db->db_state == DB_NOFILL ||
|
if (pio == NULL && (db->db_state == DB_NOFILL ||
|
||||||
(bp != NULL && !BP_IS_HOLE(bp)))) {
|
(bp != NULL && !BP_IS_HOLE(bp)))) {
|
||||||
spa_t *spa = dn->dn_objset->os_spa;
|
spa_t *spa = dn->dn_objset->os_spa;
|
||||||
pio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
|
pio =
|
||||||
|
zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
|
||||||
need_wait = B_TRUE;
|
need_wait = B_TRUE;
|
||||||
}
|
}
|
||||||
err = dbuf_read_impl(db, dn, pio, flags, dblt, bp, FTAG);
|
|
||||||
|
err =
|
||||||
|
dbuf_read_impl(db, dn, pio, flags, dblt, bp, FTAG);
|
||||||
|
} else {
|
||||||
|
mutex_exit(&db->db_mtx);
|
||||||
|
dmu_buf_unlock_parent(db, dblt, FTAG);
|
||||||
|
}
|
||||||
/* dbuf_read_impl drops db_mtx and parent's rwlock. */
|
/* dbuf_read_impl drops db_mtx and parent's rwlock. */
|
||||||
miss = (db->db_state != DB_CACHED);
|
miss = (db->db_state != DB_CACHED);
|
||||||
}
|
}
|
||||||
|
@ -2756,31 +2744,39 @@ dmu_buf_is_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Normally the db_blkptr points to the most recent on-disk content for the
|
* Normally the db_blkptr points to the most recent on-disk content for the
|
||||||
* dbuf (and anything newer will be cached in the dbuf). However, a recent
|
* dbuf (and anything newer will be cached in the dbuf). However, a pending
|
||||||
* Direct I/O write could leave newer content on disk and the dbuf uncached.
|
* block clone or not yet synced Direct I/O write will have a dirty record BP
|
||||||
* In this case we must return the (as yet unsynced) pointer to the lastest
|
* pointing to the most recent data.
|
||||||
* on-disk content.
|
|
||||||
*/
|
*/
|
||||||
blkptr_t *
|
int
|
||||||
dmu_buf_get_bp_from_dbuf(dmu_buf_impl_t *db)
|
dmu_buf_get_bp_from_dbuf(dmu_buf_impl_t *db, blkptr_t **bp)
|
||||||
{
|
{
|
||||||
ASSERT(MUTEX_HELD(&db->db_mtx));
|
ASSERT(MUTEX_HELD(&db->db_mtx));
|
||||||
|
int error = 0;
|
||||||
|
|
||||||
if (db->db_level != 0)
|
if (db->db_level != 0) {
|
||||||
return (db->db_blkptr);
|
*bp = db->db_blkptr;
|
||||||
|
return (0);
|
||||||
blkptr_t *bp = db->db_blkptr;
|
|
||||||
|
|
||||||
dbuf_dirty_record_t *dr_dio = dbuf_get_dirty_direct(db);
|
|
||||||
if (dr_dio && dr_dio->dt.dl.dr_override_state == DR_OVERRIDDEN &&
|
|
||||||
dr_dio->dt.dl.dr_data == NULL) {
|
|
||||||
ASSERT(db->db_state == DB_UNCACHED ||
|
|
||||||
db->db_state == DB_NOFILL);
|
|
||||||
/* We have a Direct I/O write or cloned block, use it's BP */
|
|
||||||
bp = &dr_dio->dt.dl.dr_overridden_by;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return (bp);
|
*bp = db->db_blkptr;
|
||||||
|
dbuf_dirty_record_t *dr = list_head(&db->db_dirty_records);
|
||||||
|
if (dr) {
|
||||||
|
if (db->db_state == DB_NOFILL) {
|
||||||
|
/* Block clone */
|
||||||
|
if (!dr->dt.dl.dr_brtwrite)
|
||||||
|
error = EIO;
|
||||||
|
else
|
||||||
|
*bp = &dr->dt.dl.dr_overridden_by;
|
||||||
|
} else if (dr->dt.dl.dr_override_state == DR_OVERRIDDEN &&
|
||||||
|
dr->dt.dl.dr_data == NULL) {
|
||||||
|
ASSERT(db->db_state == DB_UNCACHED);
|
||||||
|
/* Direct I/O write */
|
||||||
|
*bp = &dr->dt.dl.dr_overridden_by;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -152,7 +152,7 @@ dmu_write_direct(zio_t *pio, dmu_buf_impl_t *db, abd_t *data, dmu_tx_t *tx)
|
||||||
ASSERT3U(txg, >, spa_last_synced_txg(os->os_spa));
|
ASSERT3U(txg, >, spa_last_synced_txg(os->os_spa));
|
||||||
ASSERT3U(txg, >, spa_syncing_txg(os->os_spa));
|
ASSERT3U(txg, >, spa_syncing_txg(os->os_spa));
|
||||||
|
|
||||||
dr_head = dbuf_get_dirty_direct(db);
|
dr_head = list_head(&db->db_dirty_records);
|
||||||
ASSERT3U(dr_head->dr_txg, ==, txg);
|
ASSERT3U(dr_head->dr_txg, ==, txg);
|
||||||
dr_head->dr_accounted = db->db.db_size;
|
dr_head->dr_accounted = db->db.db_size;
|
||||||
|
|
||||||
|
@ -260,6 +260,7 @@ dmu_read_abd(dnode_t *dn, uint64_t offset, uint64_t size,
|
||||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i];
|
dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i];
|
||||||
abd_t *mbuf;
|
abd_t *mbuf;
|
||||||
zbookmark_phys_t zb;
|
zbookmark_phys_t zb;
|
||||||
|
blkptr_t *bp;
|
||||||
|
|
||||||
mutex_enter(&db->db_mtx);
|
mutex_enter(&db->db_mtx);
|
||||||
|
|
||||||
|
@ -273,7 +274,11 @@ dmu_read_abd(dnode_t *dn, uint64_t offset, uint64_t size,
|
||||||
while (db->db_state == DB_READ)
|
while (db->db_state == DB_READ)
|
||||||
cv_wait(&db->db_changed, &db->db_mtx);
|
cv_wait(&db->db_changed, &db->db_mtx);
|
||||||
|
|
||||||
blkptr_t *bp = dmu_buf_get_bp_from_dbuf(db);
|
err = dmu_buf_get_bp_from_dbuf(db, &bp);
|
||||||
|
if (err) {
|
||||||
|
mutex_exit(&db->db_mtx);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* There is no need to read if this is a hole or the data is
|
* There is no need to read if this is a hole or the data is
|
||||||
|
@ -310,13 +315,13 @@ dmu_read_abd(dnode_t *dn, uint64_t offset, uint64_t size,
|
||||||
/*
|
/*
|
||||||
* The dbuf mutex (db_mtx) must be held when creating the ZIO
|
* The dbuf mutex (db_mtx) must be held when creating the ZIO
|
||||||
* for the read. The BP returned from
|
* for the read. The BP returned from
|
||||||
* dmu_buf_get_bp_from_dbuf() could be from a previous Direct
|
* dmu_buf_get_bp_from_dbuf() could be from a pending block
|
||||||
* I/O write that is in the dbuf's dirty record. When
|
* clone or a yet to be synced Direct I/O write that is in the
|
||||||
* zio_read() is called, zio_create() will make a copy of the
|
* dbuf's dirty record. When zio_read() is called, zio_create()
|
||||||
* BP. However, if zio_read() is called without the mutex
|
* will make a copy of the BP. However, if zio_read() is called
|
||||||
* being held then the dirty record from the dbuf could be
|
* without the mutex being held then the dirty record from the
|
||||||
* freed in dbuf_write_done() resulting in garbage being set
|
* dbuf could be freed in dbuf_write_done() resulting in garbage
|
||||||
* for the zio BP.
|
* being set for the zio BP.
|
||||||
*/
|
*/
|
||||||
zio_t *cio = zio_read(rio, spa, bp, mbuf, db->db.db_size,
|
zio_t *cio = zio_read(rio, spa, bp, mbuf, db->db.db_size,
|
||||||
dmu_read_abd_done, NULL, ZIO_PRIORITY_SYNC_READ,
|
dmu_read_abd_done, NULL, ZIO_PRIORITY_SYNC_READ,
|
||||||
|
@ -330,6 +335,11 @@ dmu_read_abd(dnode_t *dn, uint64_t offset, uint64_t size,
|
||||||
dmu_buf_rele_array(dbp, numbufs, FTAG);
|
dmu_buf_rele_array(dbp, numbufs, FTAG);
|
||||||
|
|
||||||
return (zio_wait(rio));
|
return (zio_wait(rio));
|
||||||
|
|
||||||
|
error:
|
||||||
|
dmu_buf_rele_array(dbp, numbufs, FTAG);
|
||||||
|
(void) zio_wait(rio);
|
||||||
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
|
|
|
@ -202,25 +202,26 @@ zfs_access(znode_t *zp, int mode, int flag, cred_t *cr)
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
zfs_direct_enabled_t
|
int
|
||||||
zfs_check_direct_enabled(znode_t *zp, int ioflags, int *error)
|
zfs_check_direct_enabled(znode_t *zp, int ioflags, boolean_t *is_direct)
|
||||||
{
|
{;
|
||||||
zfs_direct_enabled_t is_direct = ZFS_DIRECT_IO_DISABLED;
|
|
||||||
zfsvfs_t *zfsvfs = ZTOZSB(zp);
|
zfsvfs_t *zfsvfs = ZTOZSB(zp);
|
||||||
|
*is_direct = B_FALSE;
|
||||||
|
int error;
|
||||||
|
|
||||||
if ((*error = zfs_enter(zfsvfs, FTAG)) != 0)
|
if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
|
||||||
return (ZFS_DIRECT_IO_ERR);
|
return (error);
|
||||||
|
|
||||||
if (ioflags & O_DIRECT &&
|
if (ioflags & O_DIRECT &&
|
||||||
zfsvfs->z_os->os_direct != ZFS_DIRECT_DISABLED) {
|
zfsvfs->z_os->os_direct != ZFS_DIRECT_DISABLED) {
|
||||||
is_direct = ZFS_DIRECT_IO_ENABLED;
|
*is_direct = B_TRUE;
|
||||||
} else if (zfsvfs->z_os->os_direct == ZFS_DIRECT_ALWAYS) {
|
} else if (zfsvfs->z_os->os_direct == ZFS_DIRECT_ALWAYS) {
|
||||||
is_direct = ZFS_DIRECT_IO_ENABLED;
|
*is_direct = B_TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
zfs_exit(zfsvfs, FTAG);
|
zfs_exit(zfsvfs, FTAG);
|
||||||
|
|
||||||
return (is_direct);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue