Fix deadlock in 'zfs rollback'
Currently, the 'zfs rollback' code can end up deadlocked due to the way the kernel handles unreferenced inodes on a suspended fs. Essentially, the zfs_resume_fs() code path may cause zfs to spawn new threads as it reinstantiates the suspended fs's zil. When a new thread is spawned, the kernel may attempt to free memory for that thread by freeing some unreferenced inodes. If it happens to select inodes that are a a part of the suspended fs a deadlock will occur because freeing inodes requires holding the fs's z_teardown_inactive_lock which is still held from the suspend. This patch corrects this issue by adding an additional reference to all inodes that are still present when a suspend is initiated. This prevents them from being freed by the kernel for any reason. Reviewed-by: Alek Pinchuk <apinchuk@datto.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tom Caputi <tcaputi@datto.com> Closes #9203
This commit is contained in:
parent
33374f21f0
commit
95319fc569
|
@ -196,6 +196,7 @@ typedef struct znode {
|
|||
uint8_t z_atime_dirty; /* atime needs to be synced */
|
||||
uint8_t z_zn_prefetch; /* Prefetch znodes? */
|
||||
uint8_t z_moved; /* Has this znode been moved? */
|
||||
boolean_t z_suspended; /* extra ref from a suspend? */
|
||||
uint_t z_blksz; /* block size in bytes */
|
||||
uint_t z_seq; /* modification sequence number */
|
||||
uint64_t z_mapcnt; /* number of pages mapped to file */
|
||||
|
|
|
@ -1736,7 +1736,12 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
|
|||
* will fail with EIO since we have z_teardown_lock for writer (only
|
||||
* relevant for forced unmount).
|
||||
*
|
||||
* Release all holds on dbufs.
|
||||
* Release all holds on dbufs. We also grab an extra reference to all
|
||||
* the remaining inodes so that the kernel does not attempt to free
|
||||
* any inodes of a suspended fs. This can cause deadlocks since the
|
||||
* zfs_resume_fs() process may involve starting threads, which might
|
||||
* attempt to free unreferenced inodes to free up memory for the new
|
||||
* thread.
|
||||
*/
|
||||
if (!unmounting) {
|
||||
mutex_enter(&zfsvfs->z_znodes_lock);
|
||||
|
@ -1744,6 +1749,9 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
|
|||
zp = list_next(&zfsvfs->z_all_znodes, zp)) {
|
||||
if (zp->z_sa_hdl)
|
||||
zfs_znode_dmu_fini(zp);
|
||||
if (igrab(ZTOI(zp)) != NULL)
|
||||
zp->z_suspended = B_TRUE;
|
||||
|
||||
}
|
||||
mutex_exit(&zfsvfs->z_znodes_lock);
|
||||
}
|
||||
|
@ -2192,6 +2200,12 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
|
|||
remove_inode_hash(ZTOI(zp));
|
||||
zp->z_is_stale = B_TRUE;
|
||||
}
|
||||
|
||||
/* see comment in zfs_suspend_fs() */
|
||||
if (zp->z_suspended) {
|
||||
zfs_iput_async(ZTOI(zp));
|
||||
zp->z_suspended = B_FALSE;
|
||||
}
|
||||
}
|
||||
mutex_exit(&zfsvfs->z_znodes_lock);
|
||||
|
||||
|
|
|
@ -540,6 +540,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
|
|||
ASSERT3P(zp->z_acl_cached, ==, NULL);
|
||||
ASSERT3P(zp->z_xattr_cached, ==, NULL);
|
||||
zp->z_moved = 0;
|
||||
zp->z_suspended = B_FALSE;
|
||||
zp->z_sa_hdl = NULL;
|
||||
zp->z_unlinked = 0;
|
||||
zp->z_atime_dirty = 0;
|
||||
|
|
Loading…
Reference in New Issue