Revert "Reduce dbuf_find() lock contention"
This reverts commit 34dbc618f5
. While this
change resolved the lock contention observed for certain workloads, it
inadventantly reduced the maximum hash inserts/removes per second. This
appears to be due to the slightly higher acquisition cost of a rwlock vs
a mutex.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
parent
b66f8d3c2b
commit
91e02156dd
|
@ -321,12 +321,13 @@ typedef struct dmu_buf_impl {
|
||||||
uint8_t db_dirtycnt;
|
uint8_t db_dirtycnt;
|
||||||
} dmu_buf_impl_t;
|
} dmu_buf_impl_t;
|
||||||
|
|
||||||
#define DBUF_RWLOCKS 8192
|
/* Note: the dbuf hash table is exposed only for the mdb module */
|
||||||
#define DBUF_HASH_RWLOCK(h, idx) (&(h)->hash_rwlocks[(idx) & (DBUF_RWLOCKS-1)])
|
#define DBUF_MUTEXES 2048
|
||||||
|
#define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)])
|
||||||
typedef struct dbuf_hash_table {
|
typedef struct dbuf_hash_table {
|
||||||
uint64_t hash_table_mask;
|
uint64_t hash_table_mask;
|
||||||
dmu_buf_impl_t **hash_table;
|
dmu_buf_impl_t **hash_table;
|
||||||
krwlock_t hash_rwlocks[DBUF_RWLOCKS] ____cacheline_aligned;
|
kmutex_t hash_mutexes[DBUF_MUTEXES] ____cacheline_aligned;
|
||||||
} dbuf_hash_table_t;
|
} dbuf_hash_table_t;
|
||||||
|
|
||||||
typedef void (*dbuf_prefetch_fn)(void *, uint64_t, uint64_t, boolean_t);
|
typedef void (*dbuf_prefetch_fn)(void *, uint64_t, uint64_t, boolean_t);
|
||||||
|
|
|
@ -339,18 +339,18 @@ dbuf_find(objset_t *os, uint64_t obj, uint8_t level, uint64_t blkid)
|
||||||
hv = dbuf_hash(os, obj, level, blkid);
|
hv = dbuf_hash(os, obj, level, blkid);
|
||||||
idx = hv & h->hash_table_mask;
|
idx = hv & h->hash_table_mask;
|
||||||
|
|
||||||
rw_enter(DBUF_HASH_RWLOCK(h, idx), RW_READER);
|
mutex_enter(DBUF_HASH_MUTEX(h, idx));
|
||||||
for (db = h->hash_table[idx]; db != NULL; db = db->db_hash_next) {
|
for (db = h->hash_table[idx]; db != NULL; db = db->db_hash_next) {
|
||||||
if (DBUF_EQUAL(db, os, obj, level, blkid)) {
|
if (DBUF_EQUAL(db, os, obj, level, blkid)) {
|
||||||
mutex_enter(&db->db_mtx);
|
mutex_enter(&db->db_mtx);
|
||||||
if (db->db_state != DB_EVICTING) {
|
if (db->db_state != DB_EVICTING) {
|
||||||
rw_exit(DBUF_HASH_RWLOCK(h, idx));
|
mutex_exit(DBUF_HASH_MUTEX(h, idx));
|
||||||
return (db);
|
return (db);
|
||||||
}
|
}
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
rw_exit(DBUF_HASH_RWLOCK(h, idx));
|
mutex_exit(DBUF_HASH_MUTEX(h, idx));
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -393,13 +393,13 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
|
||||||
hv = dbuf_hash(os, obj, level, blkid);
|
hv = dbuf_hash(os, obj, level, blkid);
|
||||||
idx = hv & h->hash_table_mask;
|
idx = hv & h->hash_table_mask;
|
||||||
|
|
||||||
rw_enter(DBUF_HASH_RWLOCK(h, idx), RW_WRITER);
|
mutex_enter(DBUF_HASH_MUTEX(h, idx));
|
||||||
for (dbf = h->hash_table[idx], i = 0; dbf != NULL;
|
for (dbf = h->hash_table[idx], i = 0; dbf != NULL;
|
||||||
dbf = dbf->db_hash_next, i++) {
|
dbf = dbf->db_hash_next, i++) {
|
||||||
if (DBUF_EQUAL(dbf, os, obj, level, blkid)) {
|
if (DBUF_EQUAL(dbf, os, obj, level, blkid)) {
|
||||||
mutex_enter(&dbf->db_mtx);
|
mutex_enter(&dbf->db_mtx);
|
||||||
if (dbf->db_state != DB_EVICTING) {
|
if (dbf->db_state != DB_EVICTING) {
|
||||||
rw_exit(DBUF_HASH_RWLOCK(h, idx));
|
mutex_exit(DBUF_HASH_MUTEX(h, idx));
|
||||||
return (dbf);
|
return (dbf);
|
||||||
}
|
}
|
||||||
mutex_exit(&dbf->db_mtx);
|
mutex_exit(&dbf->db_mtx);
|
||||||
|
@ -417,7 +417,7 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
|
||||||
mutex_enter(&db->db_mtx);
|
mutex_enter(&db->db_mtx);
|
||||||
db->db_hash_next = h->hash_table[idx];
|
db->db_hash_next = h->hash_table[idx];
|
||||||
h->hash_table[idx] = db;
|
h->hash_table[idx] = db;
|
||||||
rw_exit(DBUF_HASH_RWLOCK(h, idx));
|
mutex_exit(DBUF_HASH_MUTEX(h, idx));
|
||||||
uint64_t he = atomic_inc_64_nv(&dbuf_stats.hash_elements.value.ui64);
|
uint64_t he = atomic_inc_64_nv(&dbuf_stats.hash_elements.value.ui64);
|
||||||
DBUF_STAT_MAX(hash_elements_max, he);
|
DBUF_STAT_MAX(hash_elements_max, he);
|
||||||
|
|
||||||
|
@ -474,13 +474,13 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We mustn't hold db_mtx to maintain lock ordering:
|
* We mustn't hold db_mtx to maintain lock ordering:
|
||||||
* DBUF_HASH_RWLOCK > db_mtx.
|
* DBUF_HASH_MUTEX > db_mtx.
|
||||||
*/
|
*/
|
||||||
ASSERT(zfs_refcount_is_zero(&db->db_holds));
|
ASSERT(zfs_refcount_is_zero(&db->db_holds));
|
||||||
ASSERT(db->db_state == DB_EVICTING);
|
ASSERT(db->db_state == DB_EVICTING);
|
||||||
ASSERT(!MUTEX_HELD(&db->db_mtx));
|
ASSERT(!MUTEX_HELD(&db->db_mtx));
|
||||||
|
|
||||||
rw_enter(DBUF_HASH_RWLOCK(h, idx), RW_WRITER);
|
mutex_enter(DBUF_HASH_MUTEX(h, idx));
|
||||||
dbp = &h->hash_table[idx];
|
dbp = &h->hash_table[idx];
|
||||||
while ((dbf = *dbp) != db) {
|
while ((dbf = *dbp) != db) {
|
||||||
dbp = &dbf->db_hash_next;
|
dbp = &dbf->db_hash_next;
|
||||||
|
@ -491,7 +491,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
|
||||||
if (h->hash_table[idx] &&
|
if (h->hash_table[idx] &&
|
||||||
h->hash_table[idx]->db_hash_next == NULL)
|
h->hash_table[idx]->db_hash_next == NULL)
|
||||||
DBUF_STAT_BUMPDOWN(hash_chains);
|
DBUF_STAT_BUMPDOWN(hash_chains);
|
||||||
rw_exit(DBUF_HASH_RWLOCK(h, idx));
|
mutex_exit(DBUF_HASH_MUTEX(h, idx));
|
||||||
atomic_dec_64(&dbuf_stats.hash_elements.value.ui64);
|
atomic_dec_64(&dbuf_stats.hash_elements.value.ui64);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -914,8 +914,8 @@ retry:
|
||||||
sizeof (dmu_buf_impl_t),
|
sizeof (dmu_buf_impl_t),
|
||||||
0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0);
|
0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0);
|
||||||
|
|
||||||
for (i = 0; i < DBUF_RWLOCKS; i++)
|
for (i = 0; i < DBUF_MUTEXES; i++)
|
||||||
rw_init(&h->hash_rwlocks[i], NULL, RW_DEFAULT, NULL);
|
mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL);
|
||||||
|
|
||||||
dbuf_stats_init(h);
|
dbuf_stats_init(h);
|
||||||
|
|
||||||
|
@ -981,8 +981,8 @@ dbuf_fini(void)
|
||||||
|
|
||||||
dbuf_stats_destroy();
|
dbuf_stats_destroy();
|
||||||
|
|
||||||
for (i = 0; i < DBUF_RWLOCKS; i++)
|
for (i = 0; i < DBUF_MUTEXES; i++)
|
||||||
rw_destroy(&h->hash_rwlocks[i]);
|
mutex_destroy(&h->hash_mutexes[i]);
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
/*
|
/*
|
||||||
* Large allocations which do not require contiguous pages
|
* Large allocations which do not require contiguous pages
|
||||||
|
|
|
@ -137,7 +137,7 @@ dbuf_stats_hash_table_data(char *buf, size_t size, void *data)
|
||||||
if (size)
|
if (size)
|
||||||
buf[0] = 0;
|
buf[0] = 0;
|
||||||
|
|
||||||
rw_enter(DBUF_HASH_RWLOCK(h, dsh->idx), RW_READER);
|
mutex_enter(DBUF_HASH_MUTEX(h, dsh->idx));
|
||||||
for (db = h->hash_table[dsh->idx]; db != NULL; db = db->db_hash_next) {
|
for (db = h->hash_table[dsh->idx]; db != NULL; db = db->db_hash_next) {
|
||||||
/*
|
/*
|
||||||
* Returning ENOMEM will cause the data and header functions
|
* Returning ENOMEM will cause the data and header functions
|
||||||
|
@ -158,7 +158,7 @@ dbuf_stats_hash_table_data(char *buf, size_t size, void *data)
|
||||||
|
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
}
|
}
|
||||||
rw_exit(DBUF_HASH_RWLOCK(h, dsh->idx));
|
mutex_exit(DBUF_HASH_MUTEX(h, dsh->idx));
|
||||||
|
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue