OpenZFS 9617 - too-frequent TXG sync causes excessive write inflation
Porting notes: * Renamed zfs_dirty_data_sync_pct to zfs_dirty_data_sync_percent and changed the type to be consistent with the other dirty module params. * Updated zfs-module-parameters.5 accordingly. Authored by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Serapheim Dimitropoulos <serapheim.dimitro@delphix.com> Reviewed by: Brad Lewis <brad.lewis@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Andrew Stormont <andyjstormont@gmail.com> Reviewed-by: George Melikov <mail@gmelikov.ru> Approved by: Robert Mustacchi <rm@joyent.com> Ported-by: Brian Behlendorf <behlendorf1@llnl.gov> OpenZFS-issue: https://illumos.org/issues/9617 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/7928f4ba Closes #7976
This commit is contained in:
parent
58c0f374f1
commit
dfbe267503
|
@ -57,7 +57,7 @@ struct dsl_crypto_params;
|
||||||
|
|
||||||
extern unsigned long zfs_dirty_data_max;
|
extern unsigned long zfs_dirty_data_max;
|
||||||
extern unsigned long zfs_dirty_data_max_max;
|
extern unsigned long zfs_dirty_data_max_max;
|
||||||
extern unsigned long zfs_dirty_data_sync;
|
extern int zfs_dirty_data_sync_percent;
|
||||||
extern int zfs_dirty_data_max_percent;
|
extern int zfs_dirty_data_max_percent;
|
||||||
extern int zfs_dirty_data_max_max_percent;
|
extern int zfs_dirty_data_max_max_percent;
|
||||||
extern int zfs_delay_min_dirty_percent;
|
extern int zfs_delay_min_dirty_percent;
|
||||||
|
|
|
@ -1225,12 +1225,14 @@ Default value: \fB10\fR%, subject to \fBzfs_dirty_data_max_max\fR.
|
||||||
.sp
|
.sp
|
||||||
.ne 2
|
.ne 2
|
||||||
.na
|
.na
|
||||||
\fBzfs_dirty_data_sync\fR (int)
|
\fBzfs_dirty_data_sync_percent\fR (int)
|
||||||
.ad
|
.ad
|
||||||
.RS 12n
|
.RS 12n
|
||||||
Start syncing out a transaction group if there is at least this much dirty data.
|
Start syncing out a transaction group if there's at least this much dirty data
|
||||||
|
as a percentage of \fBzfs_dirty_data_max\fR. This should be less than
|
||||||
|
\fBzfs_vdev_async_write_active_min_dirty_percent\fR.
|
||||||
.sp
|
.sp
|
||||||
Default value: \fB67,108,864\fR.
|
Default value: \fB20\fR% of \fBzfs_dirty_data_max\fR.
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
|
|
|
@ -106,9 +106,11 @@ int zfs_dirty_data_max_percent = 10;
|
||||||
int zfs_dirty_data_max_max_percent = 25;
|
int zfs_dirty_data_max_max_percent = 25;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there is at least this much dirty data, push out a txg.
|
* If there's at least this much dirty data (as a percentage of
|
||||||
|
* zfs_dirty_data_max), push out a txg. This should be less than
|
||||||
|
* zfs_vdev_async_write_active_min_dirty_percent.
|
||||||
*/
|
*/
|
||||||
unsigned long zfs_dirty_data_sync = 64 * 1024 * 1024;
|
int zfs_dirty_data_sync_percent = 20;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Once there is this amount of dirty data, the dmu_tx_delay() will kick in
|
* Once there is this amount of dirty data, the dmu_tx_delay() will kick in
|
||||||
|
@ -879,10 +881,12 @@ dsl_pool_need_dirty_delay(dsl_pool_t *dp)
|
||||||
{
|
{
|
||||||
uint64_t delay_min_bytes =
|
uint64_t delay_min_bytes =
|
||||||
zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100;
|
zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100;
|
||||||
|
uint64_t dirty_min_bytes =
|
||||||
|
zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100;
|
||||||
boolean_t rv;
|
boolean_t rv;
|
||||||
|
|
||||||
mutex_enter(&dp->dp_lock);
|
mutex_enter(&dp->dp_lock);
|
||||||
if (dp->dp_dirty_total > zfs_dirty_data_sync)
|
if (dp->dp_dirty_total > dirty_min_bytes)
|
||||||
txg_kick(dp);
|
txg_kick(dp);
|
||||||
rv = (dp->dp_dirty_total > delay_min_bytes);
|
rv = (dp->dp_dirty_total > delay_min_bytes);
|
||||||
mutex_exit(&dp->dp_lock);
|
mutex_exit(&dp->dp_lock);
|
||||||
|
@ -1345,7 +1349,7 @@ module_param(zfs_dirty_data_max_max, ulong, 0444);
|
||||||
MODULE_PARM_DESC(zfs_dirty_data_max_max,
|
MODULE_PARM_DESC(zfs_dirty_data_max_max,
|
||||||
"zfs_dirty_data_max upper bound in bytes");
|
"zfs_dirty_data_max upper bound in bytes");
|
||||||
|
|
||||||
module_param(zfs_dirty_data_sync, ulong, 0644);
|
module_param(zfs_dirty_data_sync_percent, int, 0644);
|
||||||
MODULE_PARM_DESC(zfs_dirty_data_sync, "sync txg when this much dirty data");
|
MODULE_PARM_DESC(zfs_dirty_data_sync, "sync txg when this much dirty data");
|
||||||
|
|
||||||
module_param(zfs_delay_scale, ulong, 0644);
|
module_param(zfs_delay_scale, ulong, 0644);
|
||||||
|
|
|
@ -517,7 +517,8 @@ txg_sync_thread(void *arg)
|
||||||
clock_t timeout = zfs_txg_timeout * hz;
|
clock_t timeout = zfs_txg_timeout * hz;
|
||||||
clock_t timer;
|
clock_t timer;
|
||||||
uint64_t txg;
|
uint64_t txg;
|
||||||
txg_stat_t *ts;
|
uint64_t dirty_min_bytes =
|
||||||
|
zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We sync when we're scanning, there's someone waiting
|
* We sync when we're scanning, there's someone waiting
|
||||||
|
@ -529,7 +530,7 @@ txg_sync_thread(void *arg)
|
||||||
!tx->tx_exiting && timer > 0 &&
|
!tx->tx_exiting && timer > 0 &&
|
||||||
tx->tx_synced_txg >= tx->tx_sync_txg_waiting &&
|
tx->tx_synced_txg >= tx->tx_sync_txg_waiting &&
|
||||||
!txg_has_quiesced_to_sync(dp) &&
|
!txg_has_quiesced_to_sync(dp) &&
|
||||||
dp->dp_dirty_total < zfs_dirty_data_sync) {
|
dp->dp_dirty_total < dirty_min_bytes) {
|
||||||
dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n",
|
dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n",
|
||||||
tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp);
|
tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp);
|
||||||
txg_thread_wait(tx, &cpr, &tx->tx_sync_more_cv, timer);
|
txg_thread_wait(tx, &cpr, &tx->tx_sync_more_cv, timer);
|
||||||
|
@ -561,7 +562,7 @@ txg_sync_thread(void *arg)
|
||||||
tx->tx_quiesced_txg = 0;
|
tx->tx_quiesced_txg = 0;
|
||||||
tx->tx_syncing_txg = txg;
|
tx->tx_syncing_txg = txg;
|
||||||
DTRACE_PROBE2(txg__syncing, dsl_pool_t *, dp, uint64_t, txg);
|
DTRACE_PROBE2(txg__syncing, dsl_pool_t *, dp, uint64_t, txg);
|
||||||
ts = spa_txg_history_init_io(spa, txg, dp);
|
txg_stat_t *ts = spa_txg_history_init_io(spa, txg, dp);
|
||||||
cv_broadcast(&tx->tx_quiesce_more_cv);
|
cv_broadcast(&tx->tx_quiesce_more_cv);
|
||||||
|
|
||||||
dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
|
dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
|
||||||
|
|
Loading…
Reference in New Issue