Replace zf_rwlock with a mutex
The rwlock implementation on linux does not perform as well as mutexes. We can realize a performance benefit by replacing the zf_rwlock with a mutex. Local microbenchmarks show ~50% improvement, and over NFS we see ~5% improvement on several of the ZFS Performance Tests cases, especially randwrite and seq_write. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Tony Nguyen <tony.nguyen@delphix.com> Reviewed-by: Olaf Faaland <faaland1@llnl.gov> Signed-off-by: Matthew Ahrens <mahrens@delphix.com> Closes #9062
This commit is contained in:
parent
09276fde1c
commit
1ff46825e2
|
@ -56,7 +56,7 @@ typedef struct zstream {
|
||||||
} zstream_t;
|
} zstream_t;
|
||||||
|
|
||||||
typedef struct zfetch {
|
typedef struct zfetch {
|
||||||
krwlock_t zf_rwlock; /* protects zfetch structure */
|
kmutex_t zf_lock; /* protects zfetch structure */
|
||||||
list_t zf_stream; /* list of zstream_t's */
|
list_t zf_stream; /* list of zstream_t's */
|
||||||
struct dnode *zf_dnode; /* dnode that owns this zfetch */
|
struct dnode *zf_dnode; /* dnode that owns this zfetch */
|
||||||
} zfetch_t;
|
} zfetch_t;
|
||||||
|
|
|
@ -110,13 +110,13 @@ dmu_zfetch_init(zfetch_t *zf, dnode_t *dno)
|
||||||
list_create(&zf->zf_stream, sizeof (zstream_t),
|
list_create(&zf->zf_stream, sizeof (zstream_t),
|
||||||
offsetof(zstream_t, zs_node));
|
offsetof(zstream_t, zs_node));
|
||||||
|
|
||||||
rw_init(&zf->zf_rwlock, NULL, RW_DEFAULT, NULL);
|
mutex_init(&zf->zf_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
|
dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
|
||||||
{
|
{
|
||||||
ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
|
ASSERT(MUTEX_HELD(&zf->zf_lock));
|
||||||
list_remove(&zf->zf_stream, zs);
|
list_remove(&zf->zf_stream, zs);
|
||||||
mutex_destroy(&zs->zs_lock);
|
mutex_destroy(&zs->zs_lock);
|
||||||
kmem_free(zs, sizeof (*zs));
|
kmem_free(zs, sizeof (*zs));
|
||||||
|
@ -131,14 +131,12 @@ dmu_zfetch_fini(zfetch_t *zf)
|
||||||
{
|
{
|
||||||
zstream_t *zs;
|
zstream_t *zs;
|
||||||
|
|
||||||
ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock));
|
mutex_enter(&zf->zf_lock);
|
||||||
|
|
||||||
rw_enter(&zf->zf_rwlock, RW_WRITER);
|
|
||||||
while ((zs = list_head(&zf->zf_stream)) != NULL)
|
while ((zs = list_head(&zf->zf_stream)) != NULL)
|
||||||
dmu_zfetch_stream_remove(zf, zs);
|
dmu_zfetch_stream_remove(zf, zs);
|
||||||
rw_exit(&zf->zf_rwlock);
|
mutex_exit(&zf->zf_lock);
|
||||||
list_destroy(&zf->zf_stream);
|
list_destroy(&zf->zf_stream);
|
||||||
rw_destroy(&zf->zf_rwlock);
|
mutex_destroy(&zf->zf_lock);
|
||||||
|
|
||||||
zf->zf_dnode = NULL;
|
zf->zf_dnode = NULL;
|
||||||
}
|
}
|
||||||
|
@ -155,7 +153,7 @@ dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
|
||||||
zstream_t *zs_next;
|
zstream_t *zs_next;
|
||||||
int numstreams = 0;
|
int numstreams = 0;
|
||||||
|
|
||||||
ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
|
ASSERT(MUTEX_HELD(&zf->zf_lock));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Clean up old streams.
|
* Clean up old streams.
|
||||||
|
@ -215,7 +213,6 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
|
||||||
uint64_t end_of_access_blkid;
|
uint64_t end_of_access_blkid;
|
||||||
end_of_access_blkid = blkid + nblks;
|
end_of_access_blkid = blkid + nblks;
|
||||||
spa_t *spa = zf->zf_dnode->dn_objset->os_spa;
|
spa_t *spa = zf->zf_dnode->dn_objset->os_spa;
|
||||||
krw_t rw = RW_READER;
|
|
||||||
|
|
||||||
if (zfs_prefetch_disable)
|
if (zfs_prefetch_disable)
|
||||||
return;
|
return;
|
||||||
|
@ -236,10 +233,9 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
|
||||||
if (blkid == 0)
|
if (blkid == 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
retry:
|
|
||||||
if (!have_lock)
|
if (!have_lock)
|
||||||
rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER);
|
rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER);
|
||||||
rw_enter(&zf->zf_rwlock, rw);
|
mutex_enter(&zf->zf_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find matching prefetch stream. Depending on whether the accesses
|
* Find matching prefetch stream. Depending on whether the accesses
|
||||||
|
@ -262,7 +258,7 @@ retry:
|
||||||
if (nblks == 0) {
|
if (nblks == 0) {
|
||||||
/* Already prefetched this before. */
|
/* Already prefetched this before. */
|
||||||
mutex_exit(&zs->zs_lock);
|
mutex_exit(&zs->zs_lock);
|
||||||
rw_exit(&zf->zf_rwlock);
|
mutex_exit(&zf->zf_lock);
|
||||||
if (!have_lock) {
|
if (!have_lock) {
|
||||||
rw_exit(&zf->zf_dnode->
|
rw_exit(&zf->zf_dnode->
|
||||||
dn_struct_rwlock);
|
dn_struct_rwlock);
|
||||||
|
@ -281,16 +277,9 @@ retry:
|
||||||
* a new stream for it.
|
* a new stream for it.
|
||||||
*/
|
*/
|
||||||
ZFETCHSTAT_BUMP(zfetchstat_misses);
|
ZFETCHSTAT_BUMP(zfetchstat_misses);
|
||||||
if (rw == RW_READER && !rw_tryupgrade(&zf->zf_rwlock)) {
|
|
||||||
rw_exit(&zf->zf_rwlock);
|
|
||||||
if (!have_lock)
|
|
||||||
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
|
|
||||||
rw = RW_WRITER;
|
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
|
|
||||||
dmu_zfetch_stream_create(zf, end_of_access_blkid);
|
dmu_zfetch_stream_create(zf, end_of_access_blkid);
|
||||||
rw_exit(&zf->zf_rwlock);
|
mutex_exit(&zf->zf_lock);
|
||||||
if (!have_lock)
|
if (!have_lock)
|
||||||
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
|
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
|
||||||
return;
|
return;
|
||||||
|
@ -356,7 +345,7 @@ retry:
|
||||||
zs->zs_atime = gethrtime();
|
zs->zs_atime = gethrtime();
|
||||||
zs->zs_blkid = end_of_access_blkid;
|
zs->zs_blkid = end_of_access_blkid;
|
||||||
mutex_exit(&zs->zs_lock);
|
mutex_exit(&zs->zs_lock);
|
||||||
rw_exit(&zf->zf_rwlock);
|
mutex_exit(&zf->zf_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* dbuf_prefetch() is asynchronous (even when it needs to read
|
* dbuf_prefetch() is asynchronous (even when it needs to read
|
||||||
|
|
|
@ -752,7 +752,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
|
||||||
ASSERT(!RW_LOCK_HELD(&odn->dn_struct_rwlock));
|
ASSERT(!RW_LOCK_HELD(&odn->dn_struct_rwlock));
|
||||||
ASSERT(MUTEX_NOT_HELD(&odn->dn_mtx));
|
ASSERT(MUTEX_NOT_HELD(&odn->dn_mtx));
|
||||||
ASSERT(MUTEX_NOT_HELD(&odn->dn_dbufs_mtx));
|
ASSERT(MUTEX_NOT_HELD(&odn->dn_dbufs_mtx));
|
||||||
ASSERT(!RW_LOCK_HELD(&odn->dn_zfetch.zf_rwlock));
|
ASSERT(!MUTEX_HELD(&odn->dn_zfetch.zf_lock));
|
||||||
|
|
||||||
/* Copy fields. */
|
/* Copy fields. */
|
||||||
ndn->dn_objset = odn->dn_objset;
|
ndn->dn_objset = odn->dn_objset;
|
||||||
|
|
Loading…
Reference in New Issue