Add zfs_object_mutex_size module option

Add a zfs_object_mutex_size module option to facilitate resizing the
the per-dataset znode mutex array.  Increasing this value may help
make the deadlock described in #4106 less common, but this is not a
proper fix.  This patch is primarily to aid debugging and analysis.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tim Chase <tim@chase2k.com>
Issue #4106
This commit is contained in:
Brian Behlendorf 2015-12-18 12:19:14 -08:00
parent d21f279a94
commit 0720116d4d
4 changed files with 26 additions and 12 deletions

View File

@ -112,7 +112,7 @@ typedef struct zfs_sb {
uint64_t z_groupquota_obj; uint64_t z_groupquota_obj;
uint64_t z_replay_eof; /* New end of file - replay only */ uint64_t z_replay_eof; /* New end of file - replay only */
sa_attr_type_t *z_attr_table; /* SA attr mapping->id */ sa_attr_type_t *z_attr_table; /* SA attr mapping->id */
#define ZFS_OBJ_MTX_SZ 256 uint64_t z_hold_mtx_size; /* znode hold locks size */
kmutex_t *z_hold_mtx; /* znode hold locks */ kmutex_t *z_hold_mtx; /* znode hold locks */
} zfs_sb_t; } zfs_sb_t;

View File

@ -273,9 +273,15 @@ typedef struct znode {
/* /*
* Macros for dealing with dmu_buf_hold * Macros for dealing with dmu_buf_hold
*/ */
#define ZFS_OBJ_HASH(obj_num) ((obj_num) & (ZFS_OBJ_MTX_SZ - 1)) #define ZFS_OBJ_MTX_SZ 64
#define ZFS_OBJ_MTX_MAX (1024 * 1024)
extern unsigned int zfs_object_mutex_size;
#define ZFS_OBJ_HASH(zsb, obj_num) \
((obj_num) & ((zsb->z_hold_mtx_size) - 1))
#define ZFS_OBJ_MUTEX(zsb, obj_num) \ #define ZFS_OBJ_MUTEX(zsb, obj_num) \
(&(zsb)->z_hold_mtx[ZFS_OBJ_HASH(obj_num)]) (&(zsb)->z_hold_mtx[ZFS_OBJ_HASH(zsb, obj_num)])
#define ZFS_OBJ_HOLD_ENTER(zsb, obj_num) \ #define ZFS_OBJ_HOLD_ENTER(zsb, obj_num) \
mutex_enter(ZFS_OBJ_MUTEX((zsb), (obj_num))) mutex_enter(ZFS_OBJ_MUTEX((zsb), (obj_num)))
#define ZFS_OBJ_HOLD_TRYENTER(zsb, obj_num) \ #define ZFS_OBJ_HOLD_TRYENTER(zsb, obj_num) \

View File

@ -795,9 +795,11 @@ zfs_sb_create(const char *osname, zfs_mntopts_t *zmo, zfs_sb_t **zsbp)
rw_init(&zsb->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); rw_init(&zsb->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
rw_init(&zsb->z_fuid_lock, NULL, RW_DEFAULT, NULL); rw_init(&zsb->z_fuid_lock, NULL, RW_DEFAULT, NULL);
zsb->z_hold_mtx = vmem_zalloc(sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ, zsb->z_hold_mtx_size = MIN(1 << (highbit64(zfs_object_mutex_size) - 1),
ZFS_OBJ_MTX_MAX);
zsb->z_hold_mtx = vmem_zalloc(sizeof (kmutex_t) * zsb->z_hold_mtx_size,
KM_SLEEP); KM_SLEEP);
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) for (i = 0; i != zsb->z_hold_mtx_size; i++)
mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
*zsbp = zsb; *zsbp = zsb;
@ -807,7 +809,7 @@ out:
dmu_objset_disown(os, zsb); dmu_objset_disown(os, zsb);
*zsbp = NULL; *zsbp = NULL;
vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ); vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * zsb->z_hold_mtx_size);
kmem_free(zsb, sizeof (zfs_sb_t)); kmem_free(zsb, sizeof (zfs_sb_t));
return (error); return (error);
} }
@ -909,9 +911,9 @@ zfs_sb_free(zfs_sb_t *zsb)
rrm_destroy(&zsb->z_teardown_lock); rrm_destroy(&zsb->z_teardown_lock);
rw_destroy(&zsb->z_teardown_inactive_lock); rw_destroy(&zsb->z_teardown_inactive_lock);
rw_destroy(&zsb->z_fuid_lock); rw_destroy(&zsb->z_fuid_lock);
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) for (i = 0; i != zsb->z_hold_mtx_size; i++)
mutex_destroy(&zsb->z_hold_mtx[i]); mutex_destroy(&zsb->z_hold_mtx[i]);
vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ); vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * zsb->z_hold_mtx_size);
zfs_mntopts_free(zsb->z_mntopts); zfs_mntopts_free(zsb->z_mntopts);
kmem_free(zsb, sizeof (zfs_sb_t)); kmem_free(zsb, sizeof (zfs_sb_t));
} }

View File

@ -95,6 +95,7 @@
#ifdef _KERNEL #ifdef _KERNEL
static kmem_cache_t *znode_cache = NULL; static kmem_cache_t *znode_cache = NULL;
unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ;
/*ARGSUSED*/ /*ARGSUSED*/
static int static int
@ -1734,9 +1735,11 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
list_create(&zsb->z_all_znodes, sizeof (znode_t), list_create(&zsb->z_all_znodes, sizeof (znode_t),
offsetof(znode_t, z_link_node)); offsetof(znode_t, z_link_node));
zsb->z_hold_mtx = vmem_zalloc(sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ, zsb->z_hold_mtx_size = MIN(1 << (highbit64(zfs_object_mutex_size) - 1),
ZFS_OBJ_MTX_MAX);
zsb->z_hold_mtx = vmem_zalloc(sizeof (kmutex_t) * zsb->z_hold_mtx_size,
KM_SLEEP); KM_SLEEP);
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) for (i = 0; i != zsb->z_hold_mtx_size; i++)
mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
@ -1757,10 +1760,10 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
error = zfs_create_share_dir(zsb, tx); error = zfs_create_share_dir(zsb, tx);
ASSERT(error == 0); ASSERT(error == 0);
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) for (i = 0; i != zsb->z_hold_mtx_size; i++)
mutex_destroy(&zsb->z_hold_mtx[i]); mutex_destroy(&zsb->z_hold_mtx[i]);
vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ); vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * zsb->z_hold_mtx_size);
kmem_free(sb, sizeof (struct super_block)); kmem_free(sb, sizeof (struct super_block));
kmem_free(zsb, sizeof (zfs_sb_t)); kmem_free(zsb, sizeof (zfs_sb_t));
} }
@ -2023,4 +2026,7 @@ zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
#if defined(_KERNEL) && defined(HAVE_SPL) #if defined(_KERNEL) && defined(HAVE_SPL)
EXPORT_SYMBOL(zfs_create_fs); EXPORT_SYMBOL(zfs_create_fs);
EXPORT_SYMBOL(zfs_obj_to_path); EXPORT_SYMBOL(zfs_obj_to_path);
module_param(zfs_object_mutex_size, uint, 0644);
MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array");
#endif #endif