From ac09630d8b0bf6c92084a30fdaefd03fd0adbdc1 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 11 Jul 2018 15:49:10 -0700 Subject: [PATCH] Fix zpl_mount() deadlock Commit 93b43af10 inadvertently introduced the following scenario which can result in a deadlock. This issue was most easily reproduced by LXD containers using a ZFS storage backend but should be reproducible under any workload which is frequently mounting and unmounting. -- THREAD A -- spa_sync() spa_sync_upgrades() rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); <- Waiting on B -- THREAD B -- mount_fs() zpl_mount() zpl_mount_impl() dmu_objset_hold() dmu_objset_hold_flags() dsl_pool_hold() dsl_pool_config_enter() rrw_enter(&dp->dp_config_rwlock, RW_READER, tag); sget() sget_userns() grab_super() down_write(&s->s_umount); <- Waiting on C -- THREAD C -- cleanup_mnt() deactivate_super() down_write(&s->s_umount); deactivate_locked_super() zpl_kill_sb() kill_anon_super() generic_shutdown_super() sync_filesystem() zpl_sync_fs() zfs_sync() zil_commit() txg_wait_synced() <- Waiting on A Reviewed by: Alek Pinchuk Signed-off-by: Brian Behlendorf Closes #7598 Closes #7659 Closes #7691 Closes #7693 --- include/sys/zfs_vfsops.h | 1 + module/zfs/zpl_super.c | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h index febfdff97f..31c9c6d7f7 100644 --- a/include/sys/zfs_vfsops.h +++ b/include/sys/zfs_vfsops.h @@ -32,6 +32,7 @@ #include #include #include +#include #include #ifdef __cplusplus diff --git a/module/zfs/zpl_super.c b/module/zfs/zpl_super.c index fc10271b78..5c426b0a9f 100644 --- a/module/zfs/zpl_super.c +++ b/module/zfs/zpl_super.c @@ -271,8 +271,17 @@ zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm) if (err) return (ERR_PTR(-err)); + /* + * The dsl pool lock must be released prior to calling sget(). + * It is possible sget() may block on the lock in grab_super() + * while deactivate_super() holds that same lock and waits for + * a txg sync. If the dsl_pool lock is held over over sget() + * this can prevent the pool sync and cause a deadlock. + */ + dsl_pool_rele(dmu_objset_pool(os), FTAG); s = zpl_sget(fs_type, zpl_test_super, set_anon_super, flags, os); - dmu_objset_rele(os, FTAG); + dsl_dataset_rele(dmu_objset_ds(os), FTAG); + if (IS_ERR(s)) return (ERR_CAST(s));