Fix lock order inversion with zvol_open()
zfsonlinux issue #3681 - lock order inversion between zvol_open() and dsl_pool_sync()...zvol_rename_minors() Remove trylock of spa_namespace_lock as it is no longer needed when zvol minor operations are performed in a separate context with no prior locking state; the spa_namespace_lock is no longer held when bdev->bd_mutex or zfs_state_lock might be taken in the code paths originating from the zvol minor operation callbacks. Signed-off-by: Boris Protopopov <boris.protopopov@actifio.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #3681
This commit is contained in:
parent
9d02f9557f
commit
d0337e80ca
|
@ -901,56 +901,27 @@ zvol_first_open(zvol_state_t *zv)
|
|||
{
|
||||
objset_t *os;
|
||||
uint64_t volsize;
|
||||
int locked = 0;
|
||||
int error;
|
||||
uint64_t ro;
|
||||
|
||||
/*
|
||||
* In all other cases the spa_namespace_lock is taken before the
|
||||
* bdev->bd_mutex lock. But in this case the Linux __blkdev_get()
|
||||
* function calls fops->open() with the bdev->bd_mutex lock held.
|
||||
*
|
||||
* To avoid a potential lock inversion deadlock we preemptively
|
||||
* try to take the spa_namespace_lock(). Normally it will not
|
||||
* be contended and this is safe because spa_open_common() handles
|
||||
* the case where the caller already holds the spa_namespace_lock.
|
||||
*
|
||||
* When it is contended we risk a lock inversion if we were to
|
||||
* block waiting for the lock. Luckily, the __blkdev_get()
|
||||
* function allows us to return -ERESTARTSYS which will result in
|
||||
* bdev->bd_mutex being dropped, reacquired, and fops->open() being
|
||||
* called again. This process can be repeated safely until both
|
||||
* locks are acquired.
|
||||
*/
|
||||
if (!mutex_owned(&spa_namespace_lock)) {
|
||||
locked = mutex_tryenter(&spa_namespace_lock);
|
||||
if (!locked)
|
||||
return (-SET_ERROR(ERESTARTSYS));
|
||||
}
|
||||
|
||||
error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL);
|
||||
if (error)
|
||||
goto out_mutex;
|
||||
|
||||
/* lie and say we're read-only */
|
||||
error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zvol_tag, &os);
|
||||
if (error)
|
||||
goto out_mutex;
|
||||
|
||||
error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
|
||||
if (error) {
|
||||
dmu_objset_disown(os, zvol_tag);
|
||||
zv->zv_objset = NULL;
|
||||
goto out_mutex;
|
||||
}
|
||||
return (SET_ERROR(-error));
|
||||
|
||||
zv->zv_objset = os;
|
||||
|
||||
error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL);
|
||||
if (error)
|
||||
goto out_owned;
|
||||
|
||||
error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
|
||||
if (error)
|
||||
goto out_owned;
|
||||
|
||||
error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf);
|
||||
if (error) {
|
||||
dmu_objset_disown(os, zvol_tag);
|
||||
zv->zv_objset = NULL;
|
||||
goto out_mutex;
|
||||
}
|
||||
if (error)
|
||||
goto out_owned;
|
||||
|
||||
set_capacity(zv->zv_disk, volsize >> 9);
|
||||
zv->zv_volsize = volsize;
|
||||
|
@ -965,9 +936,11 @@ zvol_first_open(zvol_state_t *zv)
|
|||
zv->zv_flags &= ~ZVOL_RDONLY;
|
||||
}
|
||||
|
||||
out_mutex:
|
||||
if (locked)
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
out_owned:
|
||||
if (error) {
|
||||
dmu_objset_disown(os, zvol_tag);
|
||||
zv->zv_objset = NULL;
|
||||
}
|
||||
|
||||
return (SET_ERROR(-error));
|
||||
}
|
||||
|
@ -1470,6 +1443,8 @@ zvol_create_snap_minor_cb(const char *dsname, void *arg)
|
|||
{
|
||||
const char *name = (const char *)arg;
|
||||
|
||||
ASSERT0(MUTEX_HELD(&spa_namespace_lock));
|
||||
|
||||
/* skip the designated dataset */
|
||||
if (name && strcmp(dsname, name) == 0)
|
||||
return (0);
|
||||
|
@ -1494,6 +1469,8 @@ zvol_create_minors_cb(const char *dsname, void *arg)
|
|||
uint64_t snapdev;
|
||||
int error;
|
||||
|
||||
ASSERT0(MUTEX_HELD(&spa_namespace_lock));
|
||||
|
||||
error = dsl_prop_get_integer(dsname, "snapdev", &snapdev, NULL);
|
||||
if (error)
|
||||
return (0);
|
||||
|
|
Loading…
Reference in New Issue