Mark IO pipeline with PF_FSTRANS
In order to avoid deadlocking in the IO pipeline it is critical that pageout be avoided during direct memory reclaim. This ensures that the pipeline threads can always make forward progress and never end up blocking on a DMU transaction. For this very reason Linux now provides the PF_FSTRANS flag which may be set in the process context. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
parent
d958324f97
commit
92119cc259
|
@ -733,6 +733,11 @@ void ksiddomain_rele(ksiddomain_t *);
|
||||||
(void) nanosleep(&ts, NULL); \
|
(void) nanosleep(&ts, NULL); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#endif /* _KERNEL */
|
typedef int fstrans_cookie_t;
|
||||||
|
|
||||||
|
extern fstrans_cookie_t spl_fstrans_mark(void);
|
||||||
|
extern void spl_fstrans_unmark(fstrans_cookie_t);
|
||||||
|
extern int spl_fstrans_check(void);
|
||||||
|
|
||||||
|
#endif /* _KERNEL */
|
||||||
#endif /* _SYS_ZFS_CONTEXT_H */
|
#endif /* _SYS_ZFS_CONTEXT_H */
|
||||||
|
|
|
@ -1275,3 +1275,20 @@ zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
|
||||||
{
|
{
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fstrans_cookie_t
|
||||||
|
spl_fstrans_mark(void)
|
||||||
|
{
|
||||||
|
return ((fstrans_cookie_t) 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
spl_fstrans_unmark(fstrans_cookie_t cookie)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
spl_fstrans_check(void)
|
||||||
|
{
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
|
@ -483,15 +483,7 @@ txg_sync_thread(dsl_pool_t *dp)
|
||||||
vdev_stat_t *vs1, *vs2;
|
vdev_stat_t *vs1, *vs2;
|
||||||
clock_t start, delta;
|
clock_t start, delta;
|
||||||
|
|
||||||
#ifdef _KERNEL
|
(void) spl_fstrans_mark();
|
||||||
/*
|
|
||||||
* Annotate this process with a flag that indicates that it is
|
|
||||||
* unsafe to use KM_SLEEP during memory allocations due to the
|
|
||||||
* potential for a deadlock. KM_PUSHPAGE should be used instead.
|
|
||||||
*/
|
|
||||||
current->flags |= PF_NOFS;
|
|
||||||
#endif /* _KERNEL */
|
|
||||||
|
|
||||||
txg_thread_enter(tx, &cpr);
|
txg_thread_enter(tx, &cpr);
|
||||||
|
|
||||||
vs1 = kmem_alloc(sizeof (vdev_stat_t), KM_PUSHPAGE);
|
vs1 = kmem_alloc(sizeof (vdev_stat_t), KM_PUSHPAGE);
|
||||||
|
|
|
@ -161,6 +161,17 @@ vdev_file_io_strategy(void *arg)
|
||||||
zio_interrupt(zio);
|
zio_interrupt(zio);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
vdev_file_io_fsync(void *arg)
|
||||||
|
{
|
||||||
|
zio_t *zio = (zio_t *)arg;
|
||||||
|
vdev_file_t *vf = zio->io_vd->vdev_tsd;
|
||||||
|
|
||||||
|
zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, kcred, NULL);
|
||||||
|
|
||||||
|
zio_interrupt(zio);
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
vdev_file_io_start(zio_t *zio)
|
vdev_file_io_start(zio_t *zio)
|
||||||
{
|
{
|
||||||
|
@ -180,6 +191,19 @@ vdev_file_io_start(zio_t *zio)
|
||||||
if (zfs_nocacheflush)
|
if (zfs_nocacheflush)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We cannot safely call vfs_fsync() when PF_FSTRANS
|
||||||
|
* is set in the current context. Filesystems like
|
||||||
|
* XFS include sanity checks to verify it is not
|
||||||
|
* already set, see xfs_vm_writepage(). Therefore
|
||||||
|
* the sync must be dispatched to a different context.
|
||||||
|
*/
|
||||||
|
if (spl_fstrans_check()) {
|
||||||
|
VERIFY3U(taskq_dispatch(vdev_file_taskq,
|
||||||
|
vdev_file_io_fsync, zio, TQ_SLEEP), !=, 0);
|
||||||
|
return (ZIO_PIPELINE_STOP);
|
||||||
|
}
|
||||||
|
|
||||||
zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC,
|
zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC,
|
||||||
kcred, NULL);
|
kcred, NULL);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -1361,7 +1361,11 @@ static zio_pipe_stage_t *zio_pipeline[];
|
||||||
void
|
void
|
||||||
zio_execute(zio_t *zio)
|
zio_execute(zio_t *zio)
|
||||||
{
|
{
|
||||||
|
fstrans_cookie_t cookie;
|
||||||
|
|
||||||
|
cookie = spl_fstrans_mark();
|
||||||
__zio_execute(zio);
|
__zio_execute(zio);
|
||||||
|
spl_fstrans_unmark(cookie);
|
||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
|
|
|
@ -481,19 +481,14 @@ int
|
||||||
zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
|
zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
|
||||||
{
|
{
|
||||||
struct address_space *mapping = data;
|
struct address_space *mapping = data;
|
||||||
|
fstrans_cookie_t cookie;
|
||||||
|
|
||||||
ASSERT(PageLocked(pp));
|
ASSERT(PageLocked(pp));
|
||||||
ASSERT(!PageWriteback(pp));
|
ASSERT(!PageWriteback(pp));
|
||||||
ASSERT(!(current->flags & PF_NOFS));
|
|
||||||
|
|
||||||
/*
|
cookie = spl_fstrans_mark();
|
||||||
* Annotate this call path with a flag that indicates that it is
|
|
||||||
* unsafe to use KM_SLEEP during memory allocations due to the
|
|
||||||
* potential for a deadlock. KM_PUSHPAGE should be used instead.
|
|
||||||
*/
|
|
||||||
current->flags |= PF_NOFS;
|
|
||||||
(void) zfs_putpage(mapping->host, pp, wbc);
|
(void) zfs_putpage(mapping->host, pp, wbc);
|
||||||
current->flags &= ~PF_NOFS;
|
spl_fstrans_unmark(cookie);
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -577,20 +577,13 @@ zvol_write(void *arg)
|
||||||
struct request *req = (struct request *)arg;
|
struct request *req = (struct request *)arg;
|
||||||
struct request_queue *q = req->q;
|
struct request_queue *q = req->q;
|
||||||
zvol_state_t *zv = q->queuedata;
|
zvol_state_t *zv = q->queuedata;
|
||||||
|
fstrans_cookie_t cookie = spl_fstrans_mark();
|
||||||
uint64_t offset = blk_rq_pos(req) << 9;
|
uint64_t offset = blk_rq_pos(req) << 9;
|
||||||
uint64_t size = blk_rq_bytes(req);
|
uint64_t size = blk_rq_bytes(req);
|
||||||
int error = 0;
|
int error = 0;
|
||||||
dmu_tx_t *tx;
|
dmu_tx_t *tx;
|
||||||
rl_t *rl;
|
rl_t *rl;
|
||||||
|
|
||||||
/*
|
|
||||||
* Annotate this call path with a flag that indicates that it is
|
|
||||||
* unsafe to use KM_SLEEP during memory allocations due to the
|
|
||||||
* potential for a deadlock. KM_PUSHPAGE should be used instead.
|
|
||||||
*/
|
|
||||||
ASSERT(!(current->flags & PF_NOFS));
|
|
||||||
current->flags |= PF_NOFS;
|
|
||||||
|
|
||||||
if (req->cmd_flags & VDEV_REQ_FLUSH)
|
if (req->cmd_flags & VDEV_REQ_FLUSH)
|
||||||
zil_commit(zv->zv_zilog, ZVOL_OBJ);
|
zil_commit(zv->zv_zilog, ZVOL_OBJ);
|
||||||
|
|
||||||
|
@ -598,7 +591,7 @@ zvol_write(void *arg)
|
||||||
* Some requests are just for flush and nothing else.
|
* Some requests are just for flush and nothing else.
|
||||||
*/
|
*/
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
blk_end_request(req, 0, size);
|
error = 0;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -612,7 +605,6 @@ zvol_write(void *arg)
|
||||||
if (error) {
|
if (error) {
|
||||||
dmu_tx_abort(tx);
|
dmu_tx_abort(tx);
|
||||||
zfs_range_unlock(rl);
|
zfs_range_unlock(rl);
|
||||||
blk_end_request(req, -error, size);
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -628,9 +620,9 @@ zvol_write(void *arg)
|
||||||
zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)
|
zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)
|
||||||
zil_commit(zv->zv_zilog, ZVOL_OBJ);
|
zil_commit(zv->zv_zilog, ZVOL_OBJ);
|
||||||
|
|
||||||
blk_end_request(req, -error, size);
|
|
||||||
out:
|
out:
|
||||||
current->flags &= ~PF_NOFS;
|
blk_end_request(req, -error, size);
|
||||||
|
spl_fstrans_unmark(cookie);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_BLK_QUEUE_DISCARD
|
#ifdef HAVE_BLK_QUEUE_DISCARD
|
||||||
|
@ -640,21 +632,14 @@ zvol_discard(void *arg)
|
||||||
struct request *req = (struct request *)arg;
|
struct request *req = (struct request *)arg;
|
||||||
struct request_queue *q = req->q;
|
struct request_queue *q = req->q;
|
||||||
zvol_state_t *zv = q->queuedata;
|
zvol_state_t *zv = q->queuedata;
|
||||||
|
fstrans_cookie_t cookie = spl_fstrans_mark();
|
||||||
uint64_t start = blk_rq_pos(req) << 9;
|
uint64_t start = blk_rq_pos(req) << 9;
|
||||||
uint64_t end = start + blk_rq_bytes(req);
|
uint64_t end = start + blk_rq_bytes(req);
|
||||||
int error;
|
int error;
|
||||||
rl_t *rl;
|
rl_t *rl;
|
||||||
|
|
||||||
/*
|
|
||||||
* Annotate this call path with a flag that indicates that it is
|
|
||||||
* unsafe to use KM_SLEEP during memory allocations due to the
|
|
||||||
* potential for a deadlock. KM_PUSHPAGE should be used instead.
|
|
||||||
*/
|
|
||||||
ASSERT(!(current->flags & PF_NOFS));
|
|
||||||
current->flags |= PF_NOFS;
|
|
||||||
|
|
||||||
if (end > zv->zv_volsize) {
|
if (end > zv->zv_volsize) {
|
||||||
blk_end_request(req, -EIO, blk_rq_bytes(req));
|
error = EIO;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -668,7 +653,7 @@ zvol_discard(void *arg)
|
||||||
end = P2ALIGN(end, zv->zv_volblocksize);
|
end = P2ALIGN(end, zv->zv_volblocksize);
|
||||||
|
|
||||||
if (start >= end) {
|
if (start >= end) {
|
||||||
blk_end_request(req, 0, blk_rq_bytes(req));
|
error = 0;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -681,10 +666,9 @@ zvol_discard(void *arg)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
zfs_range_unlock(rl);
|
zfs_range_unlock(rl);
|
||||||
|
|
||||||
blk_end_request(req, -error, blk_rq_bytes(req));
|
|
||||||
out:
|
out:
|
||||||
current->flags &= ~PF_NOFS;
|
blk_end_request(req, -error, blk_rq_bytes(req));
|
||||||
|
spl_fstrans_unmark(cookie);
|
||||||
}
|
}
|
||||||
#endif /* HAVE_BLK_QUEUE_DISCARD */
|
#endif /* HAVE_BLK_QUEUE_DISCARD */
|
||||||
|
|
||||||
|
@ -700,14 +684,15 @@ zvol_read(void *arg)
|
||||||
struct request *req = (struct request *)arg;
|
struct request *req = (struct request *)arg;
|
||||||
struct request_queue *q = req->q;
|
struct request_queue *q = req->q;
|
||||||
zvol_state_t *zv = q->queuedata;
|
zvol_state_t *zv = q->queuedata;
|
||||||
|
fstrans_cookie_t cookie = spl_fstrans_mark();
|
||||||
uint64_t offset = blk_rq_pos(req) << 9;
|
uint64_t offset = blk_rq_pos(req) << 9;
|
||||||
uint64_t size = blk_rq_bytes(req);
|
uint64_t size = blk_rq_bytes(req);
|
||||||
int error;
|
int error;
|
||||||
rl_t *rl;
|
rl_t *rl;
|
||||||
|
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
blk_end_request(req, 0, size);
|
error = 0;
|
||||||
return;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER);
|
rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER);
|
||||||
|
@ -720,7 +705,9 @@ zvol_read(void *arg)
|
||||||
if (error == ECKSUM)
|
if (error == ECKSUM)
|
||||||
error = SET_ERROR(EIO);
|
error = SET_ERROR(EIO);
|
||||||
|
|
||||||
|
out:
|
||||||
blk_end_request(req, -error, size);
|
blk_end_request(req, -error, size);
|
||||||
|
spl_fstrans_unmark(cookie);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue