mutex: force serialization on mutex_exit() to fix races

It is known that mutexes in Linux are not safe when using them to
synchronize the freeing of object in which the mutex is embedded:

http://lwn.net/Articles/575477/

The known places in ZFS which are suspected to suffer from the race
condition are zio->io_lock and dbuf->db_mtx.

* zio uses zio->io_lock and zio->io_cv to synchronize freeing
  between zio_wait() and zio_done().
* dbuf uses dbuf->db_mtx to protect reference counting.

This patch fixes this kind of race by forcing serialization on
mutex_exit() with a spin lock, making the mutex safe by sacrificing
a bit of performance and memory overhead.

This issue most commonly manifests itself as a deadlock in the zio
pipeline caused by a process spinning on the damaged mutex.  Similar
deadlocks have been reported for the dbuf->db_mtx mutex.  And it can
also cause a NULL dereference or bad paging request under the right
circumstances.

This issue any many like it are linked off the zfsonlinux/zfs#2523
issue.  Specifically this fix resolves at least the following
outstanding issues:

zfsonlinux/zfs#401
zfsonlinux/zfs#2523
zfsonlinux/zfs#2679
zfsonlinux/zfs#2684
zfsonlinux/zfs#2704
zfsonlinux/zfs#2708
zfsonlinux/zfs#2517
zfsonlinux/zfs#2827
zfsonlinux/zfs#2850
zfsonlinux/zfs#2891
zfsonlinux/zfs#2897
zfsonlinux/zfs#2247
zfsonlinux/zfs#2939

Signed-off-by: Chunwei Chen <tuxoko@gmail.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <ryao@gentoo.org>
Closes #421
This commit is contained in:
Chunwei Chen 2014-12-19 11:31:59 +08:00 committed by Brian Behlendorf
parent 52479ecf58
commit a3c1eb7772
1 changed files with 31 additions and 1 deletions

View File

@ -44,6 +44,7 @@ typedef enum {
*/ */
typedef struct { typedef struct {
struct mutex m; struct mutex m;
spinlock_t m_lock; /* used for serializing mutex_exit */
} kmutex_t; } kmutex_t;
static inline kthread_t * static inline kthread_t *
@ -70,6 +71,7 @@ mutex_owner(kmutex_t *mp)
ASSERT(type == MUTEX_DEFAULT); \ ASSERT(type == MUTEX_DEFAULT); \
\ \
__mutex_init(&(mp)->m, #mp, &__key); \ __mutex_init(&(mp)->m, #mp, &__key); \
spin_lock_init(&(mp)->m_lock); \
}) })
#undef mutex_destroy #undef mutex_destroy
@ -84,12 +86,37 @@ mutex_owner(kmutex_t *mp)
ASSERT3P(mutex_owner(mp), !=, current); \ ASSERT3P(mutex_owner(mp), !=, current); \
mutex_lock(&(mp)->m); \ mutex_lock(&(mp)->m); \
}) })
#define mutex_exit(mp) mutex_unlock(&(mp)->m) /*
* The reason for the spinlock:
*
* The Linux mutex is designed with a fast-path/slow-path design such that it
* does not guarantee serialization upon itself, allowing a race where latter
* acquirers finish mutex_unlock before former ones.
*
* The race renders it unsafe to be used for serializing the freeing of an
* object in which the mutex is embedded, where the latter acquirer could go
* on to free the object while the former one is still doing mutex_unlock and
* causing memory corruption.
*
* However, there are many places in ZFS where the mutex is used for
* serializing object freeing, and the code is shared among other OSes without
* this issue. Thus, we need the spinlock to force the serialization on
* mutex_exit().
*
* See http://lwn.net/Articles/575477/ for the information about the race.
*/
#define mutex_exit(mp) \
({ \
spin_lock(&(mp)->m_lock); \
mutex_unlock(&(mp)->m); \
spin_unlock(&(mp)->m_lock); \
})
#else /* HAVE_MUTEX_OWNER */ #else /* HAVE_MUTEX_OWNER */
typedef struct { typedef struct {
struct mutex m_mutex; struct mutex m_mutex;
spinlock_t m_lock;
kthread_t *m_owner; kthread_t *m_owner;
} kmutex_t; } kmutex_t;
@ -125,6 +152,7 @@ spl_mutex_clear_owner(kmutex_t *mp)
ASSERT(type == MUTEX_DEFAULT); \ ASSERT(type == MUTEX_DEFAULT); \
\ \
__mutex_init(MUTEX(mp), #mp, &__key); \ __mutex_init(MUTEX(mp), #mp, &__key); \
spin_lock_init(&(mp)->m_lock); \
spl_mutex_clear_owner(mp); \ spl_mutex_clear_owner(mp); \
}) })
@ -153,8 +181,10 @@ spl_mutex_clear_owner(kmutex_t *mp)
#define mutex_exit(mp) \ #define mutex_exit(mp) \
({ \ ({ \
spin_lock(&(mp)->m_lock); \
spl_mutex_clear_owner(mp); \ spl_mutex_clear_owner(mp); \
mutex_unlock(MUTEX(mp)); \ mutex_unlock(MUTEX(mp)); \
spin_unlock(&(mp)->m_lock); \
}) })
#endif /* HAVE_MUTEX_OWNER */ #endif /* HAVE_MUTEX_OWNER */