mutex: force serialization on mutex_exit() to fix races
It is known that mutexes in Linux are not safe when using them to synchronize the freeing of object in which the mutex is embedded: http://lwn.net/Articles/575477/ The known places in ZFS which are suspected to suffer from the race condition are zio->io_lock and dbuf->db_mtx. * zio uses zio->io_lock and zio->io_cv to synchronize freeing between zio_wait() and zio_done(). * dbuf uses dbuf->db_mtx to protect reference counting. This patch fixes this kind of race by forcing serialization on mutex_exit() with a spin lock, making the mutex safe by sacrificing a bit of performance and memory overhead. This issue most commonly manifests itself as a deadlock in the zio pipeline caused by a process spinning on the damaged mutex. Similar deadlocks have been reported for the dbuf->db_mtx mutex. And it can also cause a NULL dereference or bad paging request under the right circumstances. This issue any many like it are linked off the zfsonlinux/zfs#2523 issue. Specifically this fix resolves at least the following outstanding issues: zfsonlinux/zfs#401 zfsonlinux/zfs#2523 zfsonlinux/zfs#2679 zfsonlinux/zfs#2684 zfsonlinux/zfs#2704 zfsonlinux/zfs#2708 zfsonlinux/zfs#2517 zfsonlinux/zfs#2827 zfsonlinux/zfs#2850 zfsonlinux/zfs#2891 zfsonlinux/zfs#2897 zfsonlinux/zfs#2247 zfsonlinux/zfs#2939 Signed-off-by: Chunwei Chen <tuxoko@gmail.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Richard Yao <ryao@gentoo.org> Closes #421
This commit is contained in:
parent
52479ecf58
commit
a3c1eb7772
|
@ -44,6 +44,7 @@ typedef enum {
|
||||||
*/
|
*/
|
||||||
typedef struct {
|
typedef struct {
|
||||||
struct mutex m;
|
struct mutex m;
|
||||||
|
spinlock_t m_lock; /* used for serializing mutex_exit */
|
||||||
} kmutex_t;
|
} kmutex_t;
|
||||||
|
|
||||||
static inline kthread_t *
|
static inline kthread_t *
|
||||||
|
@ -70,6 +71,7 @@ mutex_owner(kmutex_t *mp)
|
||||||
ASSERT(type == MUTEX_DEFAULT); \
|
ASSERT(type == MUTEX_DEFAULT); \
|
||||||
\
|
\
|
||||||
__mutex_init(&(mp)->m, #mp, &__key); \
|
__mutex_init(&(mp)->m, #mp, &__key); \
|
||||||
|
spin_lock_init(&(mp)->m_lock); \
|
||||||
})
|
})
|
||||||
|
|
||||||
#undef mutex_destroy
|
#undef mutex_destroy
|
||||||
|
@ -84,12 +86,37 @@ mutex_owner(kmutex_t *mp)
|
||||||
ASSERT3P(mutex_owner(mp), !=, current); \
|
ASSERT3P(mutex_owner(mp), !=, current); \
|
||||||
mutex_lock(&(mp)->m); \
|
mutex_lock(&(mp)->m); \
|
||||||
})
|
})
|
||||||
#define mutex_exit(mp) mutex_unlock(&(mp)->m)
|
/*
|
||||||
|
* The reason for the spinlock:
|
||||||
|
*
|
||||||
|
* The Linux mutex is designed with a fast-path/slow-path design such that it
|
||||||
|
* does not guarantee serialization upon itself, allowing a race where latter
|
||||||
|
* acquirers finish mutex_unlock before former ones.
|
||||||
|
*
|
||||||
|
* The race renders it unsafe to be used for serializing the freeing of an
|
||||||
|
* object in which the mutex is embedded, where the latter acquirer could go
|
||||||
|
* on to free the object while the former one is still doing mutex_unlock and
|
||||||
|
* causing memory corruption.
|
||||||
|
*
|
||||||
|
* However, there are many places in ZFS where the mutex is used for
|
||||||
|
* serializing object freeing, and the code is shared among other OSes without
|
||||||
|
* this issue. Thus, we need the spinlock to force the serialization on
|
||||||
|
* mutex_exit().
|
||||||
|
*
|
||||||
|
* See http://lwn.net/Articles/575477/ for the information about the race.
|
||||||
|
*/
|
||||||
|
#define mutex_exit(mp) \
|
||||||
|
({ \
|
||||||
|
spin_lock(&(mp)->m_lock); \
|
||||||
|
mutex_unlock(&(mp)->m); \
|
||||||
|
spin_unlock(&(mp)->m_lock); \
|
||||||
|
})
|
||||||
|
|
||||||
#else /* HAVE_MUTEX_OWNER */
|
#else /* HAVE_MUTEX_OWNER */
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
struct mutex m_mutex;
|
struct mutex m_mutex;
|
||||||
|
spinlock_t m_lock;
|
||||||
kthread_t *m_owner;
|
kthread_t *m_owner;
|
||||||
} kmutex_t;
|
} kmutex_t;
|
||||||
|
|
||||||
|
@ -125,6 +152,7 @@ spl_mutex_clear_owner(kmutex_t *mp)
|
||||||
ASSERT(type == MUTEX_DEFAULT); \
|
ASSERT(type == MUTEX_DEFAULT); \
|
||||||
\
|
\
|
||||||
__mutex_init(MUTEX(mp), #mp, &__key); \
|
__mutex_init(MUTEX(mp), #mp, &__key); \
|
||||||
|
spin_lock_init(&(mp)->m_lock); \
|
||||||
spl_mutex_clear_owner(mp); \
|
spl_mutex_clear_owner(mp); \
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -153,8 +181,10 @@ spl_mutex_clear_owner(kmutex_t *mp)
|
||||||
|
|
||||||
#define mutex_exit(mp) \
|
#define mutex_exit(mp) \
|
||||||
({ \
|
({ \
|
||||||
|
spin_lock(&(mp)->m_lock); \
|
||||||
spl_mutex_clear_owner(mp); \
|
spl_mutex_clear_owner(mp); \
|
||||||
mutex_unlock(MUTEX(mp)); \
|
mutex_unlock(MUTEX(mp)); \
|
||||||
|
spin_unlock(&(mp)->m_lock); \
|
||||||
})
|
})
|
||||||
|
|
||||||
#endif /* HAVE_MUTEX_OWNER */
|
#endif /* HAVE_MUTEX_OWNER */
|
||||||
|
|
Loading…
Reference in New Issue