Fix zvol_open() lock inversion
When restructuring the zvol_open() logic for the Linux 5.13 kernel a lock inversion was accidentally introduced. In the updated code the spa_namespace_lock is now taken before the zv_suspend_lock allowing the following scenario to occur: down_read <=== waiting for zv_suspend_lock zvol_open <=== holds spa_namespace_lock __blkdev_get blkdev_get_by_dev blkdev_open ... mutex_lock <== waiting for spa_namespace_lock spa_open_common spa_open dsl_pool_hold dmu_objset_hold_flags dmu_objset_hold dsl_prop_get dsl_prop_get_integer zvol_create_minor dmu_recv_end zfs_ioc_recv_impl <=== holds zv_suspend_lock via zvol_suspend() zfs_ioc_recv ... This commit resolves the issue by moving the acquisition of the spa_namespace_lock back to after the zv_suspend_lock which restores the original ordering. Additionally, as part of this change the error exit paths were simplified where possible. Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Rich Ercolani <rincebrain@gmail.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #12863
This commit is contained in:
parent
b327131a1f
commit
a4831fa023
|
@ -461,8 +461,7 @@ zvol_open(struct block_device *bdev, fmode_t flag)
|
||||||
{
|
{
|
||||||
zvol_state_t *zv;
|
zvol_state_t *zv;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
boolean_t drop_suspend = B_TRUE;
|
boolean_t drop_suspend = B_FALSE;
|
||||||
boolean_t drop_namespace = B_FALSE;
|
|
||||||
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
|
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
|
||||||
hrtime_t timeout = MSEC2NSEC(zvol_open_timeout_ms);
|
hrtime_t timeout = MSEC2NSEC(zvol_open_timeout_ms);
|
||||||
hrtime_t start = gethrtime();
|
hrtime_t start = gethrtime();
|
||||||
|
@ -482,7 +481,36 @@ retry:
|
||||||
return (SET_ERROR(-ENXIO));
|
return (SET_ERROR(-ENXIO));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (zv->zv_open_count == 0 && !mutex_owned(&spa_namespace_lock)) {
|
mutex_enter(&zv->zv_state_lock);
|
||||||
|
/*
|
||||||
|
* Make sure zvol is not suspended during first open
|
||||||
|
* (hold zv_suspend_lock) and respect proper lock acquisition
|
||||||
|
* ordering - zv_suspend_lock before zv_state_lock
|
||||||
|
*/
|
||||||
|
if (zv->zv_open_count == 0) {
|
||||||
|
if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
|
||||||
|
mutex_exit(&zv->zv_state_lock);
|
||||||
|
rw_enter(&zv->zv_suspend_lock, RW_READER);
|
||||||
|
mutex_enter(&zv->zv_state_lock);
|
||||||
|
/* check to see if zv_suspend_lock is needed */
|
||||||
|
if (zv->zv_open_count != 0) {
|
||||||
|
rw_exit(&zv->zv_suspend_lock);
|
||||||
|
} else {
|
||||||
|
drop_suspend = B_TRUE;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
drop_suspend = B_TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rw_exit(&zvol_state_lock);
|
||||||
|
|
||||||
|
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
|
||||||
|
|
||||||
|
if (zv->zv_open_count == 0) {
|
||||||
|
boolean_t drop_namespace = B_FALSE;
|
||||||
|
|
||||||
|
ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In all other call paths the spa_namespace_lock is taken
|
* In all other call paths the spa_namespace_lock is taken
|
||||||
* before the bdev->bd_mutex lock. However, on open(2)
|
* before the bdev->bd_mutex lock. However, on open(2)
|
||||||
|
@ -507,84 +535,51 @@ retry:
|
||||||
* the kernel so the only option is to return the error for
|
* the kernel so the only option is to return the error for
|
||||||
* the caller to handle it.
|
* the caller to handle it.
|
||||||
*/
|
*/
|
||||||
if (!mutex_tryenter(&spa_namespace_lock)) {
|
if (!mutex_owned(&spa_namespace_lock)) {
|
||||||
rw_exit(&zvol_state_lock);
|
if (!mutex_tryenter(&spa_namespace_lock)) {
|
||||||
|
mutex_exit(&zv->zv_state_lock);
|
||||||
|
rw_exit(&zv->zv_suspend_lock);
|
||||||
|
|
||||||
#ifdef HAVE_BLKDEV_GET_ERESTARTSYS
|
#ifdef HAVE_BLKDEV_GET_ERESTARTSYS
|
||||||
schedule();
|
schedule();
|
||||||
return (SET_ERROR(-ERESTARTSYS));
|
|
||||||
#else
|
|
||||||
if ((gethrtime() - start) > timeout)
|
|
||||||
return (SET_ERROR(-ERESTARTSYS));
|
return (SET_ERROR(-ERESTARTSYS));
|
||||||
|
#else
|
||||||
|
if ((gethrtime() - start) > timeout)
|
||||||
|
return (SET_ERROR(-ERESTARTSYS));
|
||||||
|
|
||||||
schedule_timeout(MSEC_TO_TICK(10));
|
schedule_timeout(MSEC_TO_TICK(10));
|
||||||
goto retry;
|
goto retry;
|
||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
drop_namespace = B_TRUE;
|
drop_namespace = B_TRUE;
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
mutex_enter(&zv->zv_state_lock);
|
|
||||||
/*
|
|
||||||
* make sure zvol is not suspended during first open
|
|
||||||
* (hold zv_suspend_lock) and respect proper lock acquisition
|
|
||||||
* ordering - zv_suspend_lock before zv_state_lock
|
|
||||||
*/
|
|
||||||
if (zv->zv_open_count == 0) {
|
|
||||||
if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
|
|
||||||
mutex_exit(&zv->zv_state_lock);
|
|
||||||
rw_enter(&zv->zv_suspend_lock, RW_READER);
|
|
||||||
mutex_enter(&zv->zv_state_lock);
|
|
||||||
/* check to see if zv_suspend_lock is needed */
|
|
||||||
if (zv->zv_open_count != 0) {
|
|
||||||
rw_exit(&zv->zv_suspend_lock);
|
|
||||||
drop_suspend = B_FALSE;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
drop_suspend = B_FALSE;
|
|
||||||
}
|
|
||||||
rw_exit(&zvol_state_lock);
|
|
||||||
|
|
||||||
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
|
|
||||||
|
|
||||||
if (zv->zv_open_count == 0) {
|
|
||||||
ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
|
|
||||||
error = -zvol_first_open(zv, !(flag & FMODE_WRITE));
|
error = -zvol_first_open(zv, !(flag & FMODE_WRITE));
|
||||||
if (error)
|
|
||||||
goto out_mutex;
|
if (drop_namespace)
|
||||||
|
mutex_exit(&spa_namespace_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
|
if (error == 0) {
|
||||||
error = -EROFS;
|
if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
|
||||||
goto out_open_count;
|
if (zv->zv_open_count == 0)
|
||||||
}
|
zvol_last_close(zv);
|
||||||
|
|
||||||
zv->zv_open_count++;
|
error = SET_ERROR(-EROFS);
|
||||||
|
} else {
|
||||||
|
zv->zv_open_count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
mutex_exit(&zv->zv_state_lock);
|
mutex_exit(&zv->zv_state_lock);
|
||||||
if (drop_namespace)
|
|
||||||
mutex_exit(&spa_namespace_lock);
|
|
||||||
if (drop_suspend)
|
if (drop_suspend)
|
||||||
rw_exit(&zv->zv_suspend_lock);
|
rw_exit(&zv->zv_suspend_lock);
|
||||||
|
|
||||||
zfs_check_media_change(bdev);
|
if (error == 0)
|
||||||
|
zfs_check_media_change(bdev);
|
||||||
|
|
||||||
return (0);
|
return (error);
|
||||||
|
|
||||||
out_open_count:
|
|
||||||
if (zv->zv_open_count == 0)
|
|
||||||
zvol_last_close(zv);
|
|
||||||
|
|
||||||
out_mutex:
|
|
||||||
mutex_exit(&zv->zv_state_lock);
|
|
||||||
if (drop_namespace)
|
|
||||||
mutex_exit(&spa_namespace_lock);
|
|
||||||
if (drop_suspend)
|
|
||||||
rw_exit(&zv->zv_suspend_lock);
|
|
||||||
|
|
||||||
return (SET_ERROR(error));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
Loading…
Reference in New Issue