Implement a proper rw_tryupgrade
Current rw_tryupgrade does rw_exit and then rw_tryenter(RW_RWITER), and then does rw_enter(RW_READER) if it fails. This violate the assumption that rw_tryupgrade should be atomic and could cause extra contention or even lock inversion. This patch we implement a proper rw_tryupgrade. For rwsem-spinlock, we take the spinlock to check rwsem->count and rwsem->wait_list. For normal rwsem, we use cmpxchg on rwsem->count to change the value from single reader to single writer. Signed-off-by: Chunwei Chen <david.chen@osnexus.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tim Chase <tim@chase2k.com> Closes zfsonlinux/zfs#4692 Closes #554
This commit is contained in:
parent
c60a51b640
commit
f58040c0fc
|
@ -39,6 +39,7 @@ AC_DEFUN([SPL_AC_CONFIG_KERNEL], [
|
|||
SPL_AC_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE
|
||||
SPL_AC_SHRINK_CONTROL_STRUCT
|
||||
SPL_AC_RWSEM_SPINLOCK_IS_RAW
|
||||
SPL_AC_RWSEM_ACTIVITY
|
||||
SPL_AC_SCHED_RT_HEADER
|
||||
SPL_AC_2ARGS_VFS_GETATTR
|
||||
SPL_AC_USLEEP_RANGE
|
||||
|
@ -1316,6 +1317,30 @@ AC_DEFUN([SPL_AC_RWSEM_SPINLOCK_IS_RAW], [
|
|||
EXTRA_KCFLAGS="$tmp_flags"
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # 3.16 API Change
|
||||
dnl #
|
||||
dnl # rwsem-spinlock "->activity" changed to "->count"
|
||||
dnl #
|
||||
AC_DEFUN([SPL_AC_RWSEM_ACTIVITY], [
|
||||
AC_MSG_CHECKING([whether struct rw_semaphore has member activity])
|
||||
tmp_flags="$EXTRA_KCFLAGS"
|
||||
EXTRA_KCFLAGS="-Werror"
|
||||
SPL_LINUX_TRY_COMPILE([
|
||||
#include <linux/rwsem.h>
|
||||
],[
|
||||
struct rw_semaphore dummy_semaphore __attribute__ ((unused));
|
||||
dummy_semaphore.activity = 0;
|
||||
],[
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_RWSEM_ACTIVITY, 1,
|
||||
[struct rw_semaphore has member activity])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
EXTRA_KCFLAGS="$tmp_flags"
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # 3.9 API change,
|
||||
dnl # Moved things from linux/sched.h to linux/sched/rt.h
|
||||
|
|
|
@ -27,6 +27,23 @@
|
|||
|
||||
#include <linux/rwsem.h>
|
||||
|
||||
#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
|
||||
#define SPL_RWSEM_SINGLE_READER_VALUE (1)
|
||||
#define SPL_RWSEM_SINGLE_WRITER_VALUE (-1)
|
||||
#else
|
||||
#define SPL_RWSEM_SINGLE_READER_VALUE (RWSEM_ACTIVE_READ_BIAS)
|
||||
#define SPL_RWSEM_SINGLE_WRITER_VALUE (RWSEM_ACTIVE_WRITE_BIAS)
|
||||
#endif
|
||||
|
||||
/* Linux 3.16 change activity to count for rwsem-spinlock */
|
||||
#ifdef HAVE_RWSEM_ACTIVITY
|
||||
#define RWSEM_COUNT(sem) sem->activity
|
||||
#else
|
||||
#define RWSEM_COUNT(sem) sem->count
|
||||
#endif
|
||||
|
||||
int rwsem_tryupgrade(struct rw_semaphore *rwsem);
|
||||
|
||||
#if defined(RWSEM_SPINLOCK_IS_RAW)
|
||||
#define spl_rwsem_lock_irqsave(lk, fl) raw_spin_lock_irqsave(lk, fl)
|
||||
#define spl_rwsem_unlock_irqrestore(lk, fl) raw_spin_unlock_irqrestore(lk, fl)
|
||||
|
|
|
@ -223,13 +223,10 @@ RW_LOCK_HELD(krwlock_t *rwp)
|
|||
if (RW_WRITE_HELD(rwp)) { \
|
||||
_rc_ = 1; \
|
||||
} else { \
|
||||
rw_exit(rwp); \
|
||||
if (rw_tryenter(rwp, RW_WRITER)) { \
|
||||
_rc_ = 1; \
|
||||
} else { \
|
||||
rw_enter(rwp, RW_READER); \
|
||||
_rc_ = 0; \
|
||||
} \
|
||||
spl_rw_lockdep_off_maybe(rwp); \
|
||||
if ((_rc_ = rwsem_tryupgrade(SEM(rwp)))) \
|
||||
spl_rw_set_owner(rwp); \
|
||||
spl_rw_lockdep_on_maybe(rwp); \
|
||||
} \
|
||||
_rc_; \
|
||||
})
|
||||
|
|
|
@ -32,5 +32,46 @@
|
|||
|
||||
#define DEBUG_SUBSYSTEM S_RWLOCK
|
||||
|
||||
#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
|
||||
static int
|
||||
__rwsem_tryupgrade(struct rw_semaphore *rwsem)
|
||||
{
|
||||
int ret = 0;
|
||||
unsigned long flags;
|
||||
spl_rwsem_lock_irqsave(&rwsem->wait_lock, flags);
|
||||
if (RWSEM_COUNT(rwsem) == SPL_RWSEM_SINGLE_READER_VALUE &&
|
||||
list_empty(&rwsem->wait_list)) {
|
||||
ret = 1;
|
||||
RWSEM_COUNT(rwsem) = SPL_RWSEM_SINGLE_WRITER_VALUE;
|
||||
}
|
||||
spl_rwsem_unlock_irqrestore(&rwsem->wait_lock, flags);
|
||||
return (ret);
|
||||
}
|
||||
#else
|
||||
static int
|
||||
__rwsem_tryupgrade(struct rw_semaphore *rwsem)
|
||||
{
|
||||
typeof (rwsem->count) val;
|
||||
val = cmpxchg(&rwsem->count, SPL_RWSEM_SINGLE_READER_VALUE,
|
||||
SPL_RWSEM_SINGLE_WRITER_VALUE);
|
||||
return (val == SPL_RWSEM_SINGLE_READER_VALUE);
|
||||
}
|
||||
#endif
|
||||
|
||||
int
|
||||
rwsem_tryupgrade(struct rw_semaphore *rwsem)
|
||||
{
|
||||
if (__rwsem_tryupgrade(rwsem)) {
|
||||
rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
|
||||
rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
|
||||
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
|
||||
rwsem->owner = current;
|
||||
#endif
|
||||
return (1);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
EXPORT_SYMBOL(rwsem_tryupgrade);
|
||||
|
||||
int spl_rw_init(void) { return 0; }
|
||||
void spl_rw_fini(void) { }
|
||||
|
|
|
@ -55,8 +55,12 @@
|
|||
#define SPLAT_RWLOCK_TEST5_DESC "Write downgrade"
|
||||
|
||||
#define SPLAT_RWLOCK_TEST6_ID 0x0706
|
||||
#define SPLAT_RWLOCK_TEST6_NAME "rw_tryupgrade"
|
||||
#define SPLAT_RWLOCK_TEST6_DESC "Read upgrade"
|
||||
#define SPLAT_RWLOCK_TEST6_NAME "rw_tryupgrade-1"
|
||||
#define SPLAT_RWLOCK_TEST6_DESC "rwsem->count value"
|
||||
|
||||
#define SPLAT_RWLOCK_TEST7_ID 0x0707
|
||||
#define SPLAT_RWLOCK_TEST7_NAME "rw_tryupgrade-2"
|
||||
#define SPLAT_RWLOCK_TEST7_DESC "Read upgrade"
|
||||
|
||||
#define SPLAT_RWLOCK_TEST_MAGIC 0x115599DDUL
|
||||
#define SPLAT_RWLOCK_TEST_NAME "rwlock_test"
|
||||
|
@ -580,8 +584,55 @@ splat_rwlock_test6(struct file *file, void *arg)
|
|||
splat_init_rw_priv(rwp, file);
|
||||
|
||||
rw_enter(&rwp->rw_rwlock, RW_READER);
|
||||
if (!RW_READ_HELD(&rwp->rw_rwlock)) {
|
||||
if (RWSEM_COUNT(SEM(&rwp->rw_rwlock)) !=
|
||||
SPL_RWSEM_SINGLE_READER_VALUE) {
|
||||
splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME,
|
||||
"We assumed single reader rwsem->count "
|
||||
"should be %ld, but is %ld\n",
|
||||
SPL_RWSEM_SINGLE_READER_VALUE,
|
||||
RWSEM_COUNT(SEM(&rwp->rw_rwlock)));
|
||||
rc = -ENOLCK;
|
||||
goto out;
|
||||
}
|
||||
rw_exit(&rwp->rw_rwlock);
|
||||
|
||||
rw_enter(&rwp->rw_rwlock, RW_WRITER);
|
||||
if (RWSEM_COUNT(SEM(&rwp->rw_rwlock)) !=
|
||||
SPL_RWSEM_SINGLE_WRITER_VALUE) {
|
||||
splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME,
|
||||
"We assumed single writer rwsem->count "
|
||||
"should be %ld, but is %ld\n",
|
||||
SPL_RWSEM_SINGLE_WRITER_VALUE,
|
||||
RWSEM_COUNT(SEM(&rwp->rw_rwlock)));
|
||||
rc = -ENOLCK;
|
||||
goto out;
|
||||
}
|
||||
rc = 0;
|
||||
splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "%s",
|
||||
"rwsem->count same as we assumed\n");
|
||||
out:
|
||||
rw_exit(&rwp->rw_rwlock);
|
||||
rw_destroy(&rwp->rw_rwlock);
|
||||
kfree(rwp);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int
|
||||
splat_rwlock_test7(struct file *file, void *arg)
|
||||
{
|
||||
rw_priv_t *rwp;
|
||||
int rc;
|
||||
|
||||
rwp = (rw_priv_t *)kmalloc(sizeof(*rwp), GFP_KERNEL);
|
||||
if (rwp == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
splat_init_rw_priv(rwp, file);
|
||||
|
||||
rw_enter(&rwp->rw_rwlock, RW_READER);
|
||||
if (!RW_READ_HELD(&rwp->rw_rwlock)) {
|
||||
splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME,
|
||||
"rwlock should be read lock: %d\n",
|
||||
RW_READ_HELD(&rwp->rw_rwlock));
|
||||
rc = -ENOLCK;
|
||||
|
@ -591,7 +642,7 @@ splat_rwlock_test6(struct file *file, void *arg)
|
|||
/* With one reader upgrade should never fail. */
|
||||
rc = rw_tryupgrade(&rwp->rw_rwlock);
|
||||
if (!rc) {
|
||||
splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME,
|
||||
splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME,
|
||||
"rwlock failed upgrade from reader: %d\n",
|
||||
RW_READ_HELD(&rwp->rw_rwlock));
|
||||
rc = -ENOLCK;
|
||||
|
@ -599,7 +650,7 @@ splat_rwlock_test6(struct file *file, void *arg)
|
|||
}
|
||||
|
||||
if (RW_READ_HELD(&rwp->rw_rwlock) || !RW_WRITE_HELD(&rwp->rw_rwlock)) {
|
||||
splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "rwlock should "
|
||||
splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME, "rwlock should "
|
||||
"have 0 (not %d) reader and 1 (not %d) writer\n",
|
||||
RW_READ_HELD(&rwp->rw_rwlock),
|
||||
RW_WRITE_HELD(&rwp->rw_rwlock));
|
||||
|
@ -607,7 +658,7 @@ splat_rwlock_test6(struct file *file, void *arg)
|
|||
}
|
||||
|
||||
rc = 0;
|
||||
splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "%s",
|
||||
splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME, "%s",
|
||||
"rwlock properly upgraded\n");
|
||||
out:
|
||||
rw_exit(&rwp->rw_rwlock);
|
||||
|
@ -646,6 +697,8 @@ splat_rwlock_init(void)
|
|||
SPLAT_RWLOCK_TEST5_ID, splat_rwlock_test5);
|
||||
SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST6_NAME, SPLAT_RWLOCK_TEST6_DESC,
|
||||
SPLAT_RWLOCK_TEST6_ID, splat_rwlock_test6);
|
||||
SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST7_NAME, SPLAT_RWLOCK_TEST7_DESC,
|
||||
SPLAT_RWLOCK_TEST7_ID, splat_rwlock_test7);
|
||||
|
||||
return sub;
|
||||
}
|
||||
|
@ -654,6 +707,7 @@ void
|
|||
splat_rwlock_fini(splat_subsystem_t *sub)
|
||||
{
|
||||
ASSERT(sub);
|
||||
SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST7_ID);
|
||||
SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST6_ID);
|
||||
SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST5_ID);
|
||||
SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST4_ID);
|
||||
|
|
Loading…
Reference in New Issue