Replace zf_rwlock with a mutex

The rwlock implementation on linux does not perform as well as mutexes.
We can realize a performance benefit by replacing the zf_rwlock with a
mutex.  Local microbenchmarks show ~50% improvement, and over NFS we see
~5% improvement on several of the ZFS Performance Tests cases,
especially randwrite and seq_write.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Tony Nguyen <tony.nguyen@delphix.com>
Reviewed-by: Olaf Faaland <faaland1@llnl.gov>
Signed-off-by: Matthew Ahrens <mahrens@delphix.com>
Closes #9062
This commit is contained in:
Matthew Ahrens 2019-07-25 11:57:58 -07:00 committed by Brian Behlendorf
parent 09276fde1c
commit 1ff46825e2
3 changed files with 12 additions and 23 deletions

View File

@ -56,7 +56,7 @@ typedef struct zstream {
} zstream_t;
typedef struct zfetch {
krwlock_t zf_rwlock; /* protects zfetch structure */
kmutex_t zf_lock; /* protects zfetch structure */
list_t zf_stream; /* list of zstream_t's */
struct dnode *zf_dnode; /* dnode that owns this zfetch */
} zfetch_t;

View File

@ -110,13 +110,13 @@ dmu_zfetch_init(zfetch_t *zf, dnode_t *dno)
list_create(&zf->zf_stream, sizeof (zstream_t),
offsetof(zstream_t, zs_node));
rw_init(&zf->zf_rwlock, NULL, RW_DEFAULT, NULL);
mutex_init(&zf->zf_lock, NULL, MUTEX_DEFAULT, NULL);
}
static void
dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
{
ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
ASSERT(MUTEX_HELD(&zf->zf_lock));
list_remove(&zf->zf_stream, zs);
mutex_destroy(&zs->zs_lock);
kmem_free(zs, sizeof (*zs));
@ -131,14 +131,12 @@ dmu_zfetch_fini(zfetch_t *zf)
{
zstream_t *zs;
ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock));
rw_enter(&zf->zf_rwlock, RW_WRITER);
mutex_enter(&zf->zf_lock);
while ((zs = list_head(&zf->zf_stream)) != NULL)
dmu_zfetch_stream_remove(zf, zs);
rw_exit(&zf->zf_rwlock);
mutex_exit(&zf->zf_lock);
list_destroy(&zf->zf_stream);
rw_destroy(&zf->zf_rwlock);
mutex_destroy(&zf->zf_lock);
zf->zf_dnode = NULL;
}
@ -155,7 +153,7 @@ dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
zstream_t *zs_next;
int numstreams = 0;
ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
ASSERT(MUTEX_HELD(&zf->zf_lock));
/*
* Clean up old streams.
@ -215,7 +213,6 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
uint64_t end_of_access_blkid;
end_of_access_blkid = blkid + nblks;
spa_t *spa = zf->zf_dnode->dn_objset->os_spa;
krw_t rw = RW_READER;
if (zfs_prefetch_disable)
return;
@ -236,10 +233,9 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
if (blkid == 0)
return;
retry:
if (!have_lock)
rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER);
rw_enter(&zf->zf_rwlock, rw);
mutex_enter(&zf->zf_lock);
/*
* Find matching prefetch stream. Depending on whether the accesses
@ -262,7 +258,7 @@ retry:
if (nblks == 0) {
/* Already prefetched this before. */
mutex_exit(&zs->zs_lock);
rw_exit(&zf->zf_rwlock);
mutex_exit(&zf->zf_lock);
if (!have_lock) {
rw_exit(&zf->zf_dnode->
dn_struct_rwlock);
@ -281,16 +277,9 @@ retry:
* a new stream for it.
*/
ZFETCHSTAT_BUMP(zfetchstat_misses);
if (rw == RW_READER && !rw_tryupgrade(&zf->zf_rwlock)) {
rw_exit(&zf->zf_rwlock);
if (!have_lock)
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
rw = RW_WRITER;
goto retry;
}
dmu_zfetch_stream_create(zf, end_of_access_blkid);
rw_exit(&zf->zf_rwlock);
mutex_exit(&zf->zf_lock);
if (!have_lock)
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
return;
@ -356,7 +345,7 @@ retry:
zs->zs_atime = gethrtime();
zs->zs_blkid = end_of_access_blkid;
mutex_exit(&zs->zs_lock);
rw_exit(&zf->zf_rwlock);
mutex_exit(&zf->zf_lock);
/*
* dbuf_prefetch() is asynchronous (even when it needs to read

View File

@ -752,7 +752,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
ASSERT(!RW_LOCK_HELD(&odn->dn_struct_rwlock));
ASSERT(MUTEX_NOT_HELD(&odn->dn_mtx));
ASSERT(MUTEX_NOT_HELD(&odn->dn_dbufs_mtx));
ASSERT(!RW_LOCK_HELD(&odn->dn_zfetch.zf_rwlock));
ASSERT(!MUTEX_HELD(&odn->dn_zfetch.zf_lock));
/* Copy fields. */
ndn->dn_objset = odn->dn_objset;