From 29bc31f62f3021e703513b269d5fa629063df1da Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 9 Sep 2020 19:15:52 +0200 Subject: [PATCH] FreeBSD: convert teardown inactive lock to a read-mostly sleepable lock The lock is taken all the time and as a regular read-write lock avoidably serves as a mount point-wide contention point. This forward ports FreeBSD revision r357322. To quote aforementioned commit: Sample result doing an incremental -j 40 build: before: 173.30s user 458.97s system 2595% cpu 24.358 total after: 168.58s user 254.92s system 2211% cpu 19.147 total Reviewed-by: Alexander Motin Reviewed-by: Ryan Moeller Signed-off-by: Mateusz Guzik Closes #10896 --- include/os/freebsd/zfs/sys/zfs_vfsops_os.h | 65 +++++++++++++++++++++- module/os/freebsd/zfs/zfs_vfsops.c | 18 +++--- module/os/freebsd/zfs/zfs_vnops.c | 16 +++--- module/os/freebsd/zfs/zfs_znode.c | 2 +- 4 files changed, 82 insertions(+), 19 deletions(-) diff --git a/include/os/freebsd/zfs/sys/zfs_vfsops_os.h b/include/os/freebsd/zfs/sys/zfs_vfsops_os.h index 1b80ee7cb1..e816e39337 100644 --- a/include/os/freebsd/zfs/sys/zfs_vfsops_os.h +++ b/include/os/freebsd/zfs/sys/zfs_vfsops_os.h @@ -27,18 +27,31 @@ #ifndef _SYS_FS_ZFS_VFSOPS_H #define _SYS_FS_ZFS_VFSOPS_H +#if __FreeBSD_version >= 1300109 +#define TEARDOWN_INACTIVE_RMS +#endif + #include #include #include #include #include #include +#ifdef TEARDOWN_INACTIVE_RMS +#include +#endif #include #ifdef __cplusplus extern "C" { #endif +#ifdef TEARDOWN_INACTIVE_RMS +typedef struct rmslock zfs_teardown_lock_t; +#else +#define zfs_teardown_lock_t krwlock_t +#endif + typedef struct zfsvfs zfsvfs_t; struct znode; @@ -67,7 +80,7 @@ struct zfsvfs { boolean_t z_atime; /* enable atimes mount option */ boolean_t z_unmounted; /* unmounted */ rrmlock_t z_teardown_lock; - krwlock_t z_teardown_inactive_lock; + zfs_teardown_lock_t z_teardown_inactive_lock; list_t z_all_znodes; /* all vnodes in the fs */ uint64_t z_nr_znodes; /* number of znodes in the fs */ kmutex_t z_znodes_lock; /* lock for z_all_znodes */ @@ -98,6 +111,56 @@ struct zfsvfs { struct task z_unlinked_drain_task; }; +#ifdef TEARDOWN_INACTIVE_RMS +#define ZFS_INIT_TEARDOWN_INACTIVE(zfsvfs) \ + rms_init(&(zfsvfs)->z_teardown_inactive_lock, "zfs teardown inactive") + +#define ZFS_DESTROY_TEARDOWN_INACTIVE(zfsvfs) \ + rms_destroy(&(zfsvfs)->z_teardown_inactive_lock) + +#define ZFS_TRYRLOCK_TEARDOWN_INACTIVE(zfsvfs) \ + rms_try_rlock(&(zfsvfs)->z_teardown_inactive_lock) + +#define ZFS_RLOCK_TEARDOWN_INACTIVE(zfsvfs) \ + rms_rlock(&(zfsvfs)->z_teardown_inactive_lock) + +#define ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs) \ + rms_runlock(&(zfsvfs)->z_teardown_inactive_lock) + +#define ZFS_WLOCK_TEARDOWN_INACTIVE(zfsvfs) \ + rms_wlock(&(zfsvfs)->z_teardown_inactive_lock) + +#define ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs) \ + rms_wunlock(&(zfsvfs)->z_teardown_inactive_lock) + +#define ZFS_TEARDOWN_INACTIVE_WLOCKED(zfsvfs) \ + rms_wowned(&(zfsvfs)->z_teardown_inactive_lock) +#else +#define ZFS_INIT_TEARDOWN_INACTIVE(zfsvfs) \ + rw_init(&(zfsvfs)->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL) + +#define ZFS_DESTROY_TEARDOWN_INACTIVE(zfsvfs) \ + rw_destroy(&(zfsvfs)->z_teardown_inactive_lock) + +#define ZFS_TRYRLOCK_TEARDOWN_INACTIVE(zfsvfs) \ + rw_tryenter(&(zfsvfs)->z_teardown_inactive_lock, RW_READER) + +#define ZFS_RLOCK_TEARDOWN_INACTIVE(zfsvfs) \ + rw_enter(&(zfsvfs)->z_teardown_inactive_lock, RW_READER) + +#define ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs) \ + rw_exit(&(zfsvfs)->z_teardown_inactive_lock) + +#define ZFS_WLOCK_TEARDOWN_INACTIVE(zfsvfs) \ + rw_enter(&(zfsvfs)->z_teardown_inactive_lock, RW_WRITER) + +#define ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs) \ + rw_exit(&(zfsvfs)->z_teardown_inactive_lock) + +#define ZFS_TEARDOWN_INACTIVE_WLOCKED(zfsvfs) \ + RW_WRITE_HELD(&(zfsvfs)->z_teardown_inactive_lock) +#endif + #define ZSB_XATTR 0x0001 /* Enable user xattrs */ /* * Normal filesystems (those not under .zfs/snapshot) have a total diff --git a/module/os/freebsd/zfs/zfs_vfsops.c b/module/os/freebsd/zfs/zfs_vfsops.c index b6cf0c92b7..77812ca8d4 100644 --- a/module/os/freebsd/zfs/zfs_vfsops.c +++ b/module/os/freebsd/zfs/zfs_vfsops.c @@ -975,7 +975,7 @@ zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os) #else rrm_init(&zfsvfs->z_teardown_lock, B_FALSE); #endif - rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); + ZFS_INIT_TEARDOWN_INACTIVE(zfsvfs); rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); @@ -1126,7 +1126,7 @@ zfsvfs_free(zfsvfs_t *zfsvfs) ASSERT(zfsvfs->z_nr_znodes == 0); list_destroy(&zfsvfs->z_all_znodes); rrm_destroy(&zfsvfs->z_teardown_lock); - rw_destroy(&zfsvfs->z_teardown_inactive_lock); + ZFS_DESTROY_TEARDOWN_INACTIVE(zfsvfs); rw_destroy(&zfsvfs->z_fuid_lock); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_destroy(&zfsvfs->z_hold_mtx[i]); @@ -1545,7 +1545,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) zfsvfs->z_log = NULL; } - rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); + ZFS_WLOCK_TEARDOWN_INACTIVE(zfsvfs); /* * If we are not unmounting (ie: online recv) and someone already @@ -1553,7 +1553,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) * or a reopen of z_os failed then just bail out now. */ if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { - rw_exit(&zfsvfs->z_teardown_inactive_lock); + ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs); rrm_exit(&zfsvfs->z_teardown_lock, FTAG); return (SET_ERROR(EIO)); } @@ -1581,7 +1581,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) */ if (unmounting) { zfsvfs->z_unmounted = B_TRUE; - rw_exit(&zfsvfs->z_teardown_inactive_lock); + ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs); rrm_exit(&zfsvfs->z_teardown_lock, FTAG); } @@ -1901,7 +1901,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) znode_t *zp; ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock)); - ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); + ASSERT(ZFS_TEARDOWN_INACTIVE_WLOCKED(zfsvfs)); /* * We already own this, so just update the objset_t, as the one we @@ -1939,7 +1939,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) bail: /* release the VOPs */ - rw_exit(&zfsvfs->z_teardown_inactive_lock); + ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs); rrm_exit(&zfsvfs->z_teardown_lock, FTAG); if (err) { @@ -2056,7 +2056,7 @@ int zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) { ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock)); - ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); + ASSERT(ZFS_TEARDOWN_INACTIVE_WLOCKED(zfsvfs)); /* * We already own this, so just hold and rele it to update the @@ -2072,7 +2072,7 @@ zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) zfsvfs->z_os = os; /* release the VOPs */ - rw_exit(&zfsvfs->z_teardown_inactive_lock); + ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs); rrm_exit(&zfsvfs->z_teardown_lock, FTAG); /* diff --git a/module/os/freebsd/zfs/zfs_vnops.c b/module/os/freebsd/zfs/zfs_vnops.c index 2a4acf2158..2dde5b1f9d 100644 --- a/module/os/freebsd/zfs/zfs_vnops.c +++ b/module/os/freebsd/zfs/zfs_vnops.c @@ -4638,13 +4638,13 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) zfsvfs_t *zfsvfs = zp->z_zfsvfs; int error; - rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); + ZFS_RLOCK_TEARDOWN_INACTIVE(zfsvfs); if (zp->z_sa_hdl == NULL) { /* * The fs has been unmounted, or we did a * suspend/resume and this file no longer exists. */ - rw_exit(&zfsvfs->z_teardown_inactive_lock); + ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs); vrecycle(vp); return; } @@ -4653,7 +4653,7 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) /* * Fast path to recycle a vnode of a removed file. */ - rw_exit(&zfsvfs->z_teardown_inactive_lock); + ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs); vrecycle(vp); return; } @@ -4673,7 +4673,7 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) dmu_tx_commit(tx); } } - rw_exit(&zfsvfs->z_teardown_inactive_lock); + ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs); } @@ -5823,10 +5823,10 @@ zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap) if (vn_need_pageq_flush(vp)) return (1); - if (!rw_tryenter(&zfsvfs->z_teardown_inactive_lock, RW_READER)) + if (!ZFS_TRYRLOCK_TEARDOWN_INACTIVE(zfsvfs)) return (1); need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty); - rw_exit(&zfsvfs->z_teardown_inactive_lock); + ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs); return (need); } @@ -5857,12 +5857,12 @@ zfs_freebsd_reclaim(struct vop_reclaim_args *ap) * zfs_znode_dmu_fini in zfsvfs_teardown during * force unmount. */ - rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); + ZFS_RLOCK_TEARDOWN_INACTIVE(zfsvfs); if (zp->z_sa_hdl == NULL) zfs_znode_free(zp); else zfs_zinactive(zp); - rw_exit(&zfsvfs->z_teardown_inactive_lock); + ZFS_RUNLOCK_TEARDOWN_INACTIVE(zfsvfs); vp->v_data = NULL; return (0); diff --git a/module/os/freebsd/zfs/zfs_znode.c b/module/os/freebsd/zfs/zfs_znode.c index 76e24b1bdf..653f42239d 100644 --- a/module/os/freebsd/zfs/zfs_znode.c +++ b/module/os/freebsd/zfs/zfs_znode.c @@ -384,7 +384,7 @@ zfs_znode_dmu_fini(znode_t *zp) { ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) || zp->z_unlinked || - RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock)); + ZFS_TEARDOWN_INACTIVE_WLOCKED(zp->z_zfsvfs)); sa_handle_destroy(zp->z_sa_hdl); zp->z_sa_hdl = NULL;