Remove lock from dsl_pool_need_dirty_delay()
Torn reads/writes of dp_dirty_total are unlikely: on 64-bit systems due to register size, while on 32-bit due to memory constraints. And even if we hit some race, the code implementing the delay takes the lock any way. Removal of the poll-wide lock acquisition saves ~1% of CPU time on 8-thread 8KB write workload. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Closes #15390
This commit is contained in:
parent
0ef1964c79
commit
79f7de5752
|
@ -965,18 +965,18 @@ dsl_pool_need_dirty_delay(dsl_pool_t *dp)
|
||||||
uint64_t delay_min_bytes =
|
uint64_t delay_min_bytes =
|
||||||
zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100;
|
zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100;
|
||||||
|
|
||||||
mutex_enter(&dp->dp_lock);
|
/*
|
||||||
uint64_t dirty = dp->dp_dirty_total;
|
* We are not taking the dp_lock here and few other places, since torn
|
||||||
mutex_exit(&dp->dp_lock);
|
* reads are unlikely: on 64-bit systems due to register size and on
|
||||||
|
* 32-bit due to memory constraints. Pool-wide locks in hot path may
|
||||||
return (dirty > delay_min_bytes);
|
* be too expensive, while we do not need a precise result here.
|
||||||
|
*/
|
||||||
|
return (dp->dp_dirty_total > delay_min_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
static boolean_t
|
static boolean_t
|
||||||
dsl_pool_need_dirty_sync(dsl_pool_t *dp, uint64_t txg)
|
dsl_pool_need_dirty_sync(dsl_pool_t *dp, uint64_t txg)
|
||||||
{
|
{
|
||||||
ASSERT(MUTEX_HELD(&dp->dp_lock));
|
|
||||||
|
|
||||||
uint64_t dirty_min_bytes =
|
uint64_t dirty_min_bytes =
|
||||||
zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100;
|
zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100;
|
||||||
uint64_t dirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
|
uint64_t dirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
|
||||||
|
|
Loading…
Reference in New Issue