2010-12-22 20:13:57 +00:00
|
|
|
/*
|
|
|
|
* CDDL HEADER START
|
|
|
|
*
|
|
|
|
* The contents of this file are subject to the terms of the
|
|
|
|
* Common Development and Distribution License (the "License").
|
|
|
|
* You may not use this file except in compliance with the License.
|
|
|
|
*
|
|
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
|
|
* or http://www.opensolaris.org/os/licensing.
|
|
|
|
* See the License for the specific language governing permissions
|
|
|
|
* and limitations under the License.
|
|
|
|
*
|
|
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
|
|
*
|
|
|
|
* CDDL HEADER END
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
|
|
|
* Use is subject to license terms.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
|
|
|
|
/* All Rights Reserved */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* University Copyright- Copyright (c) 1982, 1986, 1988
|
|
|
|
* The Regents of the University of California
|
|
|
|
* All Rights Reserved
|
|
|
|
*
|
|
|
|
* University Acknowledgment- Portions of this document are derived from
|
|
|
|
* software developed by the University of California, Berkeley, and its
|
|
|
|
* contributors.
|
|
|
|
*/
|
2015-07-30 14:24:36 +00:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
|
|
|
|
*/
|
2010-12-22 20:13:57 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The uio support from OpenSolaris has been added as a short term
|
|
|
|
* work around. The hope is to adopt native Linux type and drop the
|
|
|
|
* use of uio's entirely. Under Linux they only add overhead and
|
|
|
|
* when possible we want to use native APIs for the ZPL layer.
|
|
|
|
*/
|
|
|
|
#ifdef _KERNEL
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/uio_impl.h>
|
2018-02-16 01:53:18 +00:00
|
|
|
#include <sys/sysmacros.h>
|
|
|
|
#include <sys/strings.h>
|
2015-07-30 14:24:36 +00:00
|
|
|
#include <linux/kmap_compat.h>
|
deadlock between mm_sem and tx assign in zfs_write() and page fault
The bug time sequence:
1. thread #1, `zfs_write` assign a txg "n".
2. In a same process, thread #2, mmap page fault (which means the
`mm_sem` is hold) occurred, `zfs_dirty_inode` open a txg failed,
and wait previous txg "n" completed.
3. thread #1 call `uiomove` to write, however page fault is occurred
in `uiomove`, which means it need `mm_sem`, but `mm_sem` is hold by
thread #2, so it stuck and can't complete, then txg "n" will
not complete.
So thread #1 and thread #2 are deadlocked.
Reviewed-by: Chunwei Chen <tuxoko@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Grady Wong <grady.w@xtaotech.com>
Closes #7939
2018-10-16 18:11:24 +00:00
|
|
|
#include <linux/uaccess.h>
|
2010-12-22 20:13:57 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Move "n" bytes at byte address "p"; "rw" indicates the direction
|
|
|
|
* of the move, and the I/O parameters are provided in "uio", which is
|
|
|
|
* update to reflect the data which was moved. Returns 0 on success or
|
|
|
|
* a non-zero errno on failure.
|
|
|
|
*/
|
2015-07-30 14:24:36 +00:00
|
|
|
static int
|
|
|
|
uiomove_iov(void *p, size_t n, enum uio_rw rw, struct uio *uio)
|
2010-12-22 20:13:57 +00:00
|
|
|
{
|
2015-07-30 14:24:36 +00:00
|
|
|
const struct iovec *iov = uio->uio_iov;
|
|
|
|
size_t skip = uio->uio_skip;
|
2010-12-22 20:13:57 +00:00
|
|
|
ulong_t cnt;
|
|
|
|
|
|
|
|
while (n && uio->uio_resid) {
|
2015-07-30 14:24:36 +00:00
|
|
|
cnt = MIN(iov->iov_len - skip, n);
|
2010-12-22 20:13:57 +00:00
|
|
|
switch (uio->uio_segflg) {
|
|
|
|
case UIO_USERSPACE:
|
|
|
|
case UIO_USERISPACE:
|
2013-11-01 19:26:11 +00:00
|
|
|
/*
|
|
|
|
* p = kernel data pointer
|
|
|
|
* iov->iov_base = user data pointer
|
|
|
|
*/
|
2010-12-22 20:13:57 +00:00
|
|
|
if (rw == UIO_READ) {
|
2015-07-30 14:24:36 +00:00
|
|
|
if (copy_to_user(iov->iov_base+skip, p, cnt))
|
2013-11-01 19:26:11 +00:00
|
|
|
return (EFAULT);
|
2010-12-22 20:13:57 +00:00
|
|
|
} else {
|
deadlock between mm_sem and tx assign in zfs_write() and page fault
The bug time sequence:
1. thread #1, `zfs_write` assign a txg "n".
2. In a same process, thread #2, mmap page fault (which means the
`mm_sem` is hold) occurred, `zfs_dirty_inode` open a txg failed,
and wait previous txg "n" completed.
3. thread #1 call `uiomove` to write, however page fault is occurred
in `uiomove`, which means it need `mm_sem`, but `mm_sem` is hold by
thread #2, so it stuck and can't complete, then txg "n" will
not complete.
So thread #1 and thread #2 are deadlocked.
Reviewed-by: Chunwei Chen <tuxoko@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Grady Wong <grady.w@xtaotech.com>
Closes #7939
2018-10-16 18:11:24 +00:00
|
|
|
if (uio->uio_fault_disable) {
|
|
|
|
if (!access_ok(VERIFY_READ,
|
|
|
|
(iov->iov_base + skip), cnt)) {
|
|
|
|
return (EFAULT);
|
|
|
|
}
|
|
|
|
|
|
|
|
pagefault_disable();
|
|
|
|
if (__copy_from_user_inatomic(p,
|
|
|
|
(iov->iov_base + skip), cnt)) {
|
|
|
|
pagefault_enable();
|
|
|
|
return (EFAULT);
|
|
|
|
}
|
|
|
|
pagefault_enable();
|
|
|
|
} else {
|
|
|
|
if (copy_from_user(p,
|
|
|
|
(iov->iov_base + skip), cnt))
|
|
|
|
return (EFAULT);
|
|
|
|
}
|
2010-12-22 20:13:57 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case UIO_SYSSPACE:
|
|
|
|
if (rw == UIO_READ)
|
2015-07-30 14:24:36 +00:00
|
|
|
bcopy(p, iov->iov_base + skip, cnt);
|
2010-12-22 20:13:57 +00:00
|
|
|
else
|
2015-07-30 14:24:36 +00:00
|
|
|
bcopy(iov->iov_base + skip, p, cnt);
|
2010-12-22 20:13:57 +00:00
|
|
|
break;
|
2015-07-30 14:24:36 +00:00
|
|
|
default:
|
|
|
|
ASSERT(0);
|
|
|
|
}
|
|
|
|
skip += cnt;
|
|
|
|
if (skip == iov->iov_len) {
|
|
|
|
skip = 0;
|
|
|
|
uio->uio_iov = (++iov);
|
|
|
|
uio->uio_iovcnt--;
|
2010-12-22 20:13:57 +00:00
|
|
|
}
|
2015-07-30 14:24:36 +00:00
|
|
|
uio->uio_skip = skip;
|
2010-12-22 20:13:57 +00:00
|
|
|
uio->uio_resid -= cnt;
|
|
|
|
uio->uio_loffset += cnt;
|
|
|
|
p = (caddr_t)p + cnt;
|
|
|
|
n -= cnt;
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
2015-07-30 14:24:36 +00:00
|
|
|
|
|
|
|
static int
|
|
|
|
uiomove_bvec(void *p, size_t n, enum uio_rw rw, struct uio *uio)
|
|
|
|
{
|
|
|
|
const struct bio_vec *bv = uio->uio_bvec;
|
|
|
|
size_t skip = uio->uio_skip;
|
|
|
|
ulong_t cnt;
|
|
|
|
|
|
|
|
while (n && uio->uio_resid) {
|
|
|
|
void *paddr;
|
|
|
|
cnt = MIN(bv->bv_len - skip, n);
|
|
|
|
|
|
|
|
paddr = zfs_kmap_atomic(bv->bv_page, KM_USER1);
|
|
|
|
if (rw == UIO_READ)
|
|
|
|
bcopy(p, paddr + bv->bv_offset + skip, cnt);
|
|
|
|
else
|
|
|
|
bcopy(paddr + bv->bv_offset + skip, p, cnt);
|
|
|
|
zfs_kunmap_atomic(paddr, KM_USER1);
|
|
|
|
|
|
|
|
skip += cnt;
|
|
|
|
if (skip == bv->bv_len) {
|
|
|
|
skip = 0;
|
|
|
|
uio->uio_bvec = (++bv);
|
|
|
|
uio->uio_iovcnt--;
|
|
|
|
}
|
|
|
|
uio->uio_skip = skip;
|
|
|
|
uio->uio_resid -= cnt;
|
|
|
|
uio->uio_loffset += cnt;
|
|
|
|
p = (caddr_t)p + cnt;
|
|
|
|
n -= cnt;
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
|
|
|
|
{
|
|
|
|
if (uio->uio_segflg != UIO_BVEC)
|
|
|
|
return (uiomove_iov(p, n, rw, uio));
|
|
|
|
else
|
|
|
|
return (uiomove_bvec(p, n, rw, uio));
|
|
|
|
}
|
2010-12-22 20:13:57 +00:00
|
|
|
EXPORT_SYMBOL(uiomove);
|
|
|
|
|
2013-11-01 19:26:11 +00:00
|
|
|
#define fuword8(uptr, vptr) get_user((*vptr), (uptr))
|
2010-12-22 20:13:57 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Fault in the pages of the first n bytes specified by the uio structure.
|
|
|
|
* 1 byte in each page is touched and the uio struct is unmodified. Any
|
|
|
|
* error will terminate the process as this is only a best attempt to get
|
|
|
|
* the pages resident.
|
|
|
|
*/
|
deadlock between mm_sem and tx assign in zfs_write() and page fault
The bug time sequence:
1. thread #1, `zfs_write` assign a txg "n".
2. In a same process, thread #2, mmap page fault (which means the
`mm_sem` is hold) occurred, `zfs_dirty_inode` open a txg failed,
and wait previous txg "n" completed.
3. thread #1 call `uiomove` to write, however page fault is occurred
in `uiomove`, which means it need `mm_sem`, but `mm_sem` is hold by
thread #2, so it stuck and can't complete, then txg "n" will
not complete.
So thread #1 and thread #2 are deadlocked.
Reviewed-by: Chunwei Chen <tuxoko@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Grady Wong <grady.w@xtaotech.com>
Closes #7939
2018-10-16 18:11:24 +00:00
|
|
|
int
|
2010-12-22 20:13:57 +00:00
|
|
|
uio_prefaultpages(ssize_t n, struct uio *uio)
|
|
|
|
{
|
2015-07-30 14:24:36 +00:00
|
|
|
const struct iovec *iov;
|
2010-12-22 20:13:57 +00:00
|
|
|
ulong_t cnt, incr;
|
|
|
|
caddr_t p;
|
|
|
|
uint8_t tmp;
|
|
|
|
int iovcnt;
|
2015-12-08 20:26:18 +00:00
|
|
|
size_t skip;
|
2015-07-30 14:24:36 +00:00
|
|
|
|
|
|
|
/* no need to fault in kernel pages */
|
|
|
|
switch (uio->uio_segflg) {
|
|
|
|
case UIO_SYSSPACE:
|
|
|
|
case UIO_BVEC:
|
deadlock between mm_sem and tx assign in zfs_write() and page fault
The bug time sequence:
1. thread #1, `zfs_write` assign a txg "n".
2. In a same process, thread #2, mmap page fault (which means the
`mm_sem` is hold) occurred, `zfs_dirty_inode` open a txg failed,
and wait previous txg "n" completed.
3. thread #1 call `uiomove` to write, however page fault is occurred
in `uiomove`, which means it need `mm_sem`, but `mm_sem` is hold by
thread #2, so it stuck and can't complete, then txg "n" will
not complete.
So thread #1 and thread #2 are deadlocked.
Reviewed-by: Chunwei Chen <tuxoko@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Grady Wong <grady.w@xtaotech.com>
Closes #7939
2018-10-16 18:11:24 +00:00
|
|
|
return (0);
|
2015-07-30 14:24:36 +00:00
|
|
|
case UIO_USERSPACE:
|
|
|
|
case UIO_USERISPACE:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ASSERT(0);
|
|
|
|
}
|
2010-12-22 20:13:57 +00:00
|
|
|
|
|
|
|
iov = uio->uio_iov;
|
|
|
|
iovcnt = uio->uio_iovcnt;
|
2015-12-08 20:26:18 +00:00
|
|
|
skip = uio->uio_skip;
|
2010-12-22 20:13:57 +00:00
|
|
|
|
2015-12-08 20:26:18 +00:00
|
|
|
for (; n > 0 && iovcnt > 0; iov++, iovcnt--, skip = 0) {
|
2015-07-30 14:24:36 +00:00
|
|
|
cnt = MIN(iov->iov_len - skip, n);
|
2015-12-08 20:26:18 +00:00
|
|
|
/* empty iov */
|
|
|
|
if (cnt == 0)
|
|
|
|
continue;
|
2010-12-22 20:13:57 +00:00
|
|
|
n -= cnt;
|
|
|
|
/*
|
|
|
|
* touch each page in this segment.
|
|
|
|
*/
|
2015-07-30 14:24:36 +00:00
|
|
|
p = iov->iov_base + skip;
|
2010-12-22 20:13:57 +00:00
|
|
|
while (cnt) {
|
2016-12-12 18:46:26 +00:00
|
|
|
if (fuword8((uint8_t *)p, &tmp))
|
deadlock between mm_sem and tx assign in zfs_write() and page fault
The bug time sequence:
1. thread #1, `zfs_write` assign a txg "n".
2. In a same process, thread #2, mmap page fault (which means the
`mm_sem` is hold) occurred, `zfs_dirty_inode` open a txg failed,
and wait previous txg "n" completed.
3. thread #1 call `uiomove` to write, however page fault is occurred
in `uiomove`, which means it need `mm_sem`, but `mm_sem` is hold by
thread #2, so it stuck and can't complete, then txg "n" will
not complete.
So thread #1 and thread #2 are deadlocked.
Reviewed-by: Chunwei Chen <tuxoko@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Grady Wong <grady.w@xtaotech.com>
Closes #7939
2018-10-16 18:11:24 +00:00
|
|
|
return (EFAULT);
|
2010-12-22 20:13:57 +00:00
|
|
|
incr = MIN(cnt, PAGESIZE);
|
|
|
|
p += incr;
|
|
|
|
cnt -= incr;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* touch the last byte in case it straddles a page.
|
|
|
|
*/
|
|
|
|
p--;
|
2016-12-12 18:46:26 +00:00
|
|
|
if (fuword8((uint8_t *)p, &tmp))
|
deadlock between mm_sem and tx assign in zfs_write() and page fault
The bug time sequence:
1. thread #1, `zfs_write` assign a txg "n".
2. In a same process, thread #2, mmap page fault (which means the
`mm_sem` is hold) occurred, `zfs_dirty_inode` open a txg failed,
and wait previous txg "n" completed.
3. thread #1 call `uiomove` to write, however page fault is occurred
in `uiomove`, which means it need `mm_sem`, but `mm_sem` is hold by
thread #2, so it stuck and can't complete, then txg "n" will
not complete.
So thread #1 and thread #2 are deadlocked.
Reviewed-by: Chunwei Chen <tuxoko@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Grady Wong <grady.w@xtaotech.com>
Closes #7939
2018-10-16 18:11:24 +00:00
|
|
|
return (EFAULT);
|
2010-12-22 20:13:57 +00:00
|
|
|
}
|
deadlock between mm_sem and tx assign in zfs_write() and page fault
The bug time sequence:
1. thread #1, `zfs_write` assign a txg "n".
2. In a same process, thread #2, mmap page fault (which means the
`mm_sem` is hold) occurred, `zfs_dirty_inode` open a txg failed,
and wait previous txg "n" completed.
3. thread #1 call `uiomove` to write, however page fault is occurred
in `uiomove`, which means it need `mm_sem`, but `mm_sem` is hold by
thread #2, so it stuck and can't complete, then txg "n" will
not complete.
So thread #1 and thread #2 are deadlocked.
Reviewed-by: Chunwei Chen <tuxoko@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Grady Wong <grady.w@xtaotech.com>
Closes #7939
2018-10-16 18:11:24 +00:00
|
|
|
|
|
|
|
return (0);
|
2010-12-22 20:13:57 +00:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(uio_prefaultpages);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* same as uiomove() but doesn't modify uio structure.
|
|
|
|
* return in cbytes how many bytes were copied.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
uiocopy(void *p, size_t n, enum uio_rw rw, struct uio *uio, size_t *cbytes)
|
|
|
|
{
|
2015-07-30 14:24:36 +00:00
|
|
|
struct uio uio_copy;
|
|
|
|
int ret;
|
2010-12-22 20:13:57 +00:00
|
|
|
|
2015-07-30 14:24:36 +00:00
|
|
|
bcopy(uio, &uio_copy, sizeof (struct uio));
|
|
|
|
ret = uiomove(p, n, rw, &uio_copy);
|
|
|
|
*cbytes = uio->uio_resid - uio_copy.uio_resid;
|
|
|
|
return (ret);
|
2010-12-22 20:13:57 +00:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(uiocopy);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Drop the next n chars out of *uiop.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
uioskip(uio_t *uiop, size_t n)
|
|
|
|
{
|
|
|
|
if (n > uiop->uio_resid)
|
|
|
|
return;
|
|
|
|
|
2015-07-30 14:24:36 +00:00
|
|
|
uiop->uio_skip += n;
|
|
|
|
if (uiop->uio_segflg != UIO_BVEC) {
|
2015-09-29 07:02:31 +00:00
|
|
|
while (uiop->uio_iovcnt &&
|
|
|
|
uiop->uio_skip >= uiop->uio_iov->iov_len) {
|
2015-07-30 14:24:36 +00:00
|
|
|
uiop->uio_skip -= uiop->uio_iov->iov_len;
|
2010-12-22 20:13:57 +00:00
|
|
|
uiop->uio_iov++;
|
|
|
|
uiop->uio_iovcnt--;
|
|
|
|
}
|
2015-07-30 14:24:36 +00:00
|
|
|
} else {
|
2015-09-29 07:02:31 +00:00
|
|
|
while (uiop->uio_iovcnt &&
|
|
|
|
uiop->uio_skip >= uiop->uio_bvec->bv_len) {
|
2015-07-30 14:24:36 +00:00
|
|
|
uiop->uio_skip -= uiop->uio_bvec->bv_len;
|
|
|
|
uiop->uio_bvec++;
|
|
|
|
uiop->uio_iovcnt--;
|
|
|
|
}
|
2010-12-22 20:13:57 +00:00
|
|
|
}
|
2015-07-30 14:24:36 +00:00
|
|
|
uiop->uio_loffset += n;
|
|
|
|
uiop->uio_resid -= n;
|
2010-12-22 20:13:57 +00:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(uioskip);
|
|
|
|
#endif /* _KERNEL */
|