Fix zvol_open() lock inversion
When restructuring the zvol_open() logic for the Linux 5.13 kernel a lock inversion was accidentally introduced. In the updated code the spa_namespace_lock is now taken before the zv_suspend_lock allowing the following scenario to occur: down_read <=== waiting for zv_suspend_lock zvol_open <=== holds spa_namespace_lock __blkdev_get blkdev_get_by_dev blkdev_open ... mutex_lock <== waiting for spa_namespace_lock spa_open_common spa_open dsl_pool_hold dmu_objset_hold_flags dmu_objset_hold dsl_prop_get dsl_prop_get_integer zvol_create_minor dmu_recv_end zfs_ioc_recv_impl <=== holds zv_suspend_lock via zvol_suspend() zfs_ioc_recv ... This commit resolves the issue by moving the acquisition of the spa_namespace_lock back to after the zv_suspend_lock which restores the original ordering. Additionally, as part of this change the error exit paths were simplified where possible. Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Rich Ercolani <rincebrain@gmail.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #12863
This commit is contained in:
parent
4b2bac5fe9
commit
9ec630ff2c
module/os/linux/zfs
|
@ -496,8 +496,7 @@ zvol_open(struct block_device *bdev, fmode_t flag)
|
|||
{
|
||||
zvol_state_t *zv;
|
||||
int error = 0;
|
||||
boolean_t drop_suspend = B_TRUE;
|
||||
boolean_t drop_namespace = B_FALSE;
|
||||
boolean_t drop_suspend = B_FALSE;
|
||||
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
|
||||
hrtime_t timeout = MSEC2NSEC(zvol_open_timeout_ms);
|
||||
hrtime_t start = gethrtime();
|
||||
|
@ -517,7 +516,36 @@ retry:
|
|||
return (SET_ERROR(-ENXIO));
|
||||
}
|
||||
|
||||
if (zv->zv_open_count == 0 && !mutex_owned(&spa_namespace_lock)) {
|
||||
mutex_enter(&zv->zv_state_lock);
|
||||
/*
|
||||
* Make sure zvol is not suspended during first open
|
||||
* (hold zv_suspend_lock) and respect proper lock acquisition
|
||||
* ordering - zv_suspend_lock before zv_state_lock
|
||||
*/
|
||||
if (zv->zv_open_count == 0) {
|
||||
if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
|
||||
mutex_exit(&zv->zv_state_lock);
|
||||
rw_enter(&zv->zv_suspend_lock, RW_READER);
|
||||
mutex_enter(&zv->zv_state_lock);
|
||||
/* check to see if zv_suspend_lock is needed */
|
||||
if (zv->zv_open_count != 0) {
|
||||
rw_exit(&zv->zv_suspend_lock);
|
||||
} else {
|
||||
drop_suspend = B_TRUE;
|
||||
}
|
||||
} else {
|
||||
drop_suspend = B_TRUE;
|
||||
}
|
||||
}
|
||||
rw_exit(&zvol_state_lock);
|
||||
|
||||
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
|
||||
|
||||
if (zv->zv_open_count == 0) {
|
||||
boolean_t drop_namespace = B_FALSE;
|
||||
|
||||
ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
|
||||
|
||||
/*
|
||||
* In all other call paths the spa_namespace_lock is taken
|
||||
* before the bdev->bd_mutex lock. However, on open(2)
|
||||
|
@ -542,84 +570,51 @@ retry:
|
|||
* the kernel so the only option is to return the error for
|
||||
* the caller to handle it.
|
||||
*/
|
||||
if (!mutex_tryenter(&spa_namespace_lock)) {
|
||||
rw_exit(&zvol_state_lock);
|
||||
if (!mutex_owned(&spa_namespace_lock)) {
|
||||
if (!mutex_tryenter(&spa_namespace_lock)) {
|
||||
mutex_exit(&zv->zv_state_lock);
|
||||
rw_exit(&zv->zv_suspend_lock);
|
||||
|
||||
#ifdef HAVE_BLKDEV_GET_ERESTARTSYS
|
||||
schedule();
|
||||
return (SET_ERROR(-ERESTARTSYS));
|
||||
#else
|
||||
if ((gethrtime() - start) > timeout)
|
||||
schedule();
|
||||
return (SET_ERROR(-ERESTARTSYS));
|
||||
#else
|
||||
if ((gethrtime() - start) > timeout)
|
||||
return (SET_ERROR(-ERESTARTSYS));
|
||||
|
||||
schedule_timeout(MSEC_TO_TICK(10));
|
||||
goto retry;
|
||||
schedule_timeout(MSEC_TO_TICK(10));
|
||||
goto retry;
|
||||
#endif
|
||||
} else {
|
||||
drop_namespace = B_TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_enter(&zv->zv_state_lock);
|
||||
/*
|
||||
* make sure zvol is not suspended during first open
|
||||
* (hold zv_suspend_lock) and respect proper lock acquisition
|
||||
* ordering - zv_suspend_lock before zv_state_lock
|
||||
*/
|
||||
if (zv->zv_open_count == 0) {
|
||||
if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
|
||||
mutex_exit(&zv->zv_state_lock);
|
||||
rw_enter(&zv->zv_suspend_lock, RW_READER);
|
||||
mutex_enter(&zv->zv_state_lock);
|
||||
/* check to see if zv_suspend_lock is needed */
|
||||
if (zv->zv_open_count != 0) {
|
||||
rw_exit(&zv->zv_suspend_lock);
|
||||
drop_suspend = B_FALSE;
|
||||
} else {
|
||||
drop_namespace = B_TRUE;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
drop_suspend = B_FALSE;
|
||||
}
|
||||
rw_exit(&zvol_state_lock);
|
||||
|
||||
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
|
||||
|
||||
if (zv->zv_open_count == 0) {
|
||||
ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
|
||||
error = -zvol_first_open(zv, !(flag & FMODE_WRITE));
|
||||
if (error)
|
||||
goto out_mutex;
|
||||
|
||||
if (drop_namespace)
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
}
|
||||
|
||||
if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
|
||||
error = -EROFS;
|
||||
goto out_open_count;
|
||||
}
|
||||
if (error == 0) {
|
||||
if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
|
||||
if (zv->zv_open_count == 0)
|
||||
zvol_last_close(zv);
|
||||
|
||||
zv->zv_open_count++;
|
||||
error = SET_ERROR(-EROFS);
|
||||
} else {
|
||||
zv->zv_open_count++;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_exit(&zv->zv_state_lock);
|
||||
if (drop_namespace)
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
if (drop_suspend)
|
||||
rw_exit(&zv->zv_suspend_lock);
|
||||
|
||||
zfs_check_media_change(bdev);
|
||||
if (error == 0)
|
||||
zfs_check_media_change(bdev);
|
||||
|
||||
return (0);
|
||||
|
||||
out_open_count:
|
||||
if (zv->zv_open_count == 0)
|
||||
zvol_last_close(zv);
|
||||
|
||||
out_mutex:
|
||||
mutex_exit(&zv->zv_state_lock);
|
||||
if (drop_namespace)
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
if (drop_suspend)
|
||||
rw_exit(&zv->zv_suspend_lock);
|
||||
|
||||
return (SET_ERROR(error));
|
||||
return (error);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
Loading…
Reference in New Issue