Add Module Parameter Regarding Log Size Limit
zfs_wrlog_data_max The upper limit of TX_WRITE log data. Once it is reached, write operation is blocked, until log data is cleared out after txg sync. It only counts TX_WRITE log with WR_COPIED or WR_NEED_COPY. Reviewed-by: Prakash Surya <prakash.surya@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: jxdking <lostking2008@hotmail.com> Closes #12284
This commit is contained in:
parent
999830a021
commit
d05f3039f7
|
@ -124,6 +124,7 @@ typedef struct dmu_tx_stats {
|
|||
kstat_named_t dmu_tx_dirty_throttle;
|
||||
kstat_named_t dmu_tx_dirty_delay;
|
||||
kstat_named_t dmu_tx_dirty_over_max;
|
||||
kstat_named_t dmu_tx_wrlog_over_max;
|
||||
kstat_named_t dmu_tx_dirty_frees_delay;
|
||||
kstat_named_t dmu_tx_quota;
|
||||
} dmu_tx_stats_t;
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
#include <sys/rrwlock.h>
|
||||
#include <sys/dsl_synctask.h>
|
||||
#include <sys/mmp.h>
|
||||
#include <sys/aggsum.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
@ -58,6 +59,7 @@ struct dsl_deadlist;
|
|||
|
||||
extern unsigned long zfs_dirty_data_max;
|
||||
extern unsigned long zfs_dirty_data_max_max;
|
||||
extern unsigned long zfs_wrlog_data_max;
|
||||
extern int zfs_dirty_data_sync_percent;
|
||||
extern int zfs_dirty_data_max_percent;
|
||||
extern int zfs_dirty_data_max_max_percent;
|
||||
|
@ -118,6 +120,9 @@ typedef struct dsl_pool {
|
|||
uint64_t dp_mos_compressed_delta;
|
||||
uint64_t dp_mos_uncompressed_delta;
|
||||
|
||||
aggsum_t dp_wrlog_pertxg[TXG_SIZE];
|
||||
aggsum_t dp_wrlog_total;
|
||||
|
||||
/*
|
||||
* Time of most recently scheduled (furthest in the future)
|
||||
* wakeup for delayed transactions.
|
||||
|
@ -158,6 +163,8 @@ uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, zfs_space_check_t slop_policy);
|
|||
uint64_t dsl_pool_unreserved_space(dsl_pool_t *dp,
|
||||
zfs_space_check_t slop_policy);
|
||||
uint64_t dsl_pool_deferred_space(dsl_pool_t *dp);
|
||||
void dsl_pool_wrlog_count(dsl_pool_t *dp, int64_t size, uint64_t txg);
|
||||
boolean_t dsl_pool_wrlog_over_max(dsl_pool_t *dp);
|
||||
void dsl_pool_dirty_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
|
||||
void dsl_pool_undirty_space(dsl_pool_t *dp, int64_t space, uint64_t txg);
|
||||
void dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp);
|
||||
|
|
|
@ -1096,6 +1096,18 @@ Start syncing out a transaction group if there's at least this much dirty data
|
|||
This should be less than
|
||||
.Sy zfs_vdev_async_write_active_min_dirty_percent .
|
||||
.
|
||||
.It Sy zfs_wrlog_data_max Ns = Pq int
|
||||
The upper limit of write-transaction zil log data size in bytes.
|
||||
Once it is reached, write operation is blocked, until log data is cleared out
|
||||
after transaction group sync. Because of some overhead, it should be set
|
||||
at least 2 times the size of
|
||||
.Sy zfs_dirty_data_max
|
||||
.No to prevent harming normal write throughput.
|
||||
It also should be smaller than the size of the slog device if slog is present.
|
||||
.Pp
|
||||
Defaults to
|
||||
.Sy zfs_dirty_data_max*2
|
||||
.
|
||||
.It Sy zfs_fallocate_reserve_percent Ns = Ns Sy 110 Ns % Pq uint
|
||||
Since ZFS is a copy-on-write filesystem with snapshots, blocks cannot be
|
||||
preallocated for a file in order to guarantee that later writes will not
|
||||
|
|
|
@ -8062,6 +8062,18 @@ arc_init(void)
|
|||
zfs_dirty_data_max = MIN(zfs_dirty_data_max,
|
||||
zfs_dirty_data_max_max);
|
||||
}
|
||||
|
||||
if (zfs_wrlog_data_max == 0) {
|
||||
|
||||
/*
|
||||
* dp_wrlog_total is reduced for each txg at the end of
|
||||
* spa_sync(). However, dp_dirty_total is reduced every time
|
||||
* a block is written out. Thus under normal operation,
|
||||
* dp_wrlog_total could grow 2 times as big as
|
||||
* zfs_dirty_data_max.
|
||||
*/
|
||||
zfs_wrlog_data_max = zfs_dirty_data_max * 2;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -53,6 +53,7 @@ dmu_tx_stats_t dmu_tx_stats = {
|
|||
{ "dmu_tx_dirty_throttle", KSTAT_DATA_UINT64 },
|
||||
{ "dmu_tx_dirty_delay", KSTAT_DATA_UINT64 },
|
||||
{ "dmu_tx_dirty_over_max", KSTAT_DATA_UINT64 },
|
||||
{ "dmu_tx_wrlog_over_max", KSTAT_DATA_UINT64 },
|
||||
{ "dmu_tx_dirty_frees_delay", KSTAT_DATA_UINT64 },
|
||||
{ "dmu_tx_quota", KSTAT_DATA_UINT64 },
|
||||
};
|
||||
|
@ -884,6 +885,12 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
|
|||
return (SET_ERROR(ERESTART));
|
||||
}
|
||||
|
||||
if (!tx->tx_dirty_delayed &&
|
||||
dsl_pool_wrlog_over_max(tx->tx_pool)) {
|
||||
DMU_TX_STAT_BUMP(dmu_tx_wrlog_over_max);
|
||||
return (SET_ERROR(ERESTART));
|
||||
}
|
||||
|
||||
if (!tx->tx_dirty_delayed &&
|
||||
dsl_pool_need_dirty_delay(tx->tx_pool)) {
|
||||
tx->tx_wait_dirty = B_TRUE;
|
||||
|
|
|
@ -104,6 +104,14 @@ unsigned long zfs_dirty_data_max_max = 0;
|
|||
int zfs_dirty_data_max_percent = 10;
|
||||
int zfs_dirty_data_max_max_percent = 25;
|
||||
|
||||
/*
|
||||
* zfs_wrlog_data_max, the upper limit of TX_WRITE log data.
|
||||
* Once it is reached, write operation is blocked,
|
||||
* until log data is cleared out after txg sync.
|
||||
* It only counts TX_WRITE log with WR_COPIED or WR_NEED_COPY.
|
||||
*/
|
||||
unsigned long zfs_wrlog_data_max = 0;
|
||||
|
||||
/*
|
||||
* If there's at least this much dirty data (as a percentage of
|
||||
* zfs_dirty_data_max), push out a txg. This should be less than
|
||||
|
@ -220,6 +228,11 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
|
|||
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);
|
||||
|
||||
aggsum_init(&dp->dp_wrlog_total, 0);
|
||||
for (int i = 0; i < TXG_SIZE; i++) {
|
||||
aggsum_init(&dp->dp_wrlog_pertxg[i], 0);
|
||||
}
|
||||
|
||||
dp->dp_zrele_taskq = taskq_create("z_zrele", 100, defclsyspri,
|
||||
boot_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC |
|
||||
TASKQ_THREADS_CPU_PCT);
|
||||
|
@ -416,6 +429,14 @@ dsl_pool_close(dsl_pool_t *dp)
|
|||
rrw_destroy(&dp->dp_config_rwlock);
|
||||
mutex_destroy(&dp->dp_lock);
|
||||
cv_destroy(&dp->dp_spaceavail_cv);
|
||||
|
||||
ASSERT0(aggsum_value(&dp->dp_wrlog_total));
|
||||
aggsum_fini(&dp->dp_wrlog_total);
|
||||
for (int i = 0; i < TXG_SIZE; i++) {
|
||||
ASSERT0(aggsum_value(&dp->dp_wrlog_pertxg[i]));
|
||||
aggsum_fini(&dp->dp_wrlog_pertxg[i]);
|
||||
}
|
||||
|
||||
taskq_destroy(dp->dp_unlinked_drain_taskq);
|
||||
taskq_destroy(dp->dp_zrele_taskq);
|
||||
if (dp->dp_blkstats != NULL)
|
||||
|
@ -590,6 +611,36 @@ dsl_pool_dirty_delta(dsl_pool_t *dp, int64_t delta)
|
|||
cv_signal(&dp->dp_spaceavail_cv);
|
||||
}
|
||||
|
||||
void
|
||||
dsl_pool_wrlog_count(dsl_pool_t *dp, int64_t size, uint64_t txg)
|
||||
{
|
||||
ASSERT3S(size, >=, 0);
|
||||
|
||||
aggsum_add(&dp->dp_wrlog_pertxg[txg & TXG_MASK], size);
|
||||
aggsum_add(&dp->dp_wrlog_total, size);
|
||||
|
||||
/* Choose a value slightly bigger than min dirty sync bytes */
|
||||
uint64_t sync_min =
|
||||
zfs_dirty_data_max * (zfs_dirty_data_sync_percent + 10) / 100;
|
||||
if (aggsum_compare(&dp->dp_wrlog_pertxg[txg & TXG_MASK], sync_min) > 0)
|
||||
txg_kick(dp, txg);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
dsl_pool_wrlog_over_max(dsl_pool_t *dp)
|
||||
{
|
||||
return (aggsum_compare(&dp->dp_wrlog_total, zfs_wrlog_data_max) > 0);
|
||||
}
|
||||
|
||||
static void
|
||||
dsl_pool_wrlog_clear(dsl_pool_t *dp, uint64_t txg)
|
||||
{
|
||||
int64_t delta;
|
||||
delta = -(int64_t)aggsum_value(&dp->dp_wrlog_pertxg[txg & TXG_MASK]);
|
||||
aggsum_add(&dp->dp_wrlog_pertxg[txg & TXG_MASK], delta);
|
||||
aggsum_add(&dp->dp_wrlog_total, delta);
|
||||
}
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
static boolean_t
|
||||
dsl_early_sync_task_verify(dsl_pool_t *dp, uint64_t txg)
|
||||
|
@ -814,6 +865,9 @@ dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg)
|
|||
ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg));
|
||||
dmu_buf_rele(ds->ds_dbuf, zilog);
|
||||
}
|
||||
|
||||
dsl_pool_wrlog_clear(dp, txg);
|
||||
|
||||
ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg));
|
||||
}
|
||||
|
||||
|
@ -1409,6 +1463,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, delay_min_dirty_percent, INT, ZMOD_RW,
|
|||
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max, ULONG, ZMOD_RW,
|
||||
"Determines the dirty space limit");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs, zfs_, wrlog_data_max, ULONG, ZMOD_RW,
|
||||
"The size limit of write-transaction zil log data");
|
||||
|
||||
/* zfs_dirty_data_max_max only applied at module load in arc_init(). */
|
||||
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max, ULONG, ZMOD_RD,
|
||||
"zfs_dirty_data_max upper bound in bytes");
|
||||
|
|
|
@ -538,6 +538,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
|||
itx_wr_state_t write_state;
|
||||
uintptr_t fsync_cnt;
|
||||
uint64_t gen = 0;
|
||||
ssize_t size = resid;
|
||||
|
||||
if (zil_replaying(zilog, tx) || zp->z_unlinked ||
|
||||
zfs_xattr_owner_unlinked(zp)) {
|
||||
|
@ -623,6 +624,10 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
|||
off += len;
|
||||
resid -= len;
|
||||
}
|
||||
|
||||
if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
|
||||
dsl_pool_wrlog_count(zilog->zl_dmu_pool, size, tx->tx_txg);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -84,10 +84,8 @@
|
|||
#include <sys/zfs_rlock.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/zvol.h>
|
||||
|
||||
#include <sys/zvol_impl.h>
|
||||
|
||||
|
||||
unsigned int zvol_inhibit_dev = 0;
|
||||
unsigned int zvol_volmode = ZFS_VOLMODE_GEOM;
|
||||
|
||||
|
@ -577,6 +575,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
|
|||
uint32_t blocksize = zv->zv_volblocksize;
|
||||
zilog_t *zilog = zv->zv_zilog;
|
||||
itx_wr_state_t write_state;
|
||||
uint64_t sz = size;
|
||||
|
||||
if (zil_replaying(zilog, tx))
|
||||
return;
|
||||
|
@ -628,6 +627,10 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
|
|||
offset += len;
|
||||
size -= len;
|
||||
}
|
||||
|
||||
if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
|
||||
dsl_pool_wrlog_count(zilog->zl_dmu_pool, sz, tx->tx_txg);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
Loading…
Reference in New Issue