Add Module Parameter Regarding Log Size Limit
* Add Module Parameters Regarding Log Size Limit zfs_wrlog_data_max The upper limit of TX_WRITE log data. Once it is reached, write operation is blocked, until log data is cleared out after txg sync. It only counts TX_WRITE log with WR_COPIED or WR_NEED_COPY. Reviewed-by: Prakash Surya <prakash.surya@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: jxdking <lostking2008@hotmail.com> Closes #12284
This commit is contained in:
parent
8172df643b
commit
a7bd20e309
|
@ -124,6 +124,7 @@ typedef struct dmu_tx_stats {
|
||||||
kstat_named_t dmu_tx_dirty_throttle;
|
kstat_named_t dmu_tx_dirty_throttle;
|
||||||
kstat_named_t dmu_tx_dirty_delay;
|
kstat_named_t dmu_tx_dirty_delay;
|
||||||
kstat_named_t dmu_tx_dirty_over_max;
|
kstat_named_t dmu_tx_dirty_over_max;
|
||||||
|
kstat_named_t dmu_tx_wrlog_over_max;
|
||||||
kstat_named_t dmu_tx_dirty_frees_delay;
|
kstat_named_t dmu_tx_dirty_frees_delay;
|
||||||
kstat_named_t dmu_tx_quota;
|
kstat_named_t dmu_tx_quota;
|
||||||
} dmu_tx_stats_t;
|
} dmu_tx_stats_t;
|
||||||
|
|
|
@ -40,6 +40,7 @@
|
||||||
#include <sys/rrwlock.h>
|
#include <sys/rrwlock.h>
|
||||||
#include <sys/dsl_synctask.h>
|
#include <sys/dsl_synctask.h>
|
||||||
#include <sys/mmp.h>
|
#include <sys/mmp.h>
|
||||||
|
#include <sys/aggsum.h>
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
@ -58,6 +59,7 @@ struct dsl_deadlist;
|
||||||
|
|
||||||
extern unsigned long zfs_dirty_data_max;
|
extern unsigned long zfs_dirty_data_max;
|
||||||
extern unsigned long zfs_dirty_data_max_max;
|
extern unsigned long zfs_dirty_data_max_max;
|
||||||
|
extern unsigned long zfs_wrlog_data_max;
|
||||||
extern int zfs_dirty_data_sync_percent;
|
extern int zfs_dirty_data_sync_percent;
|
||||||
extern int zfs_dirty_data_max_percent;
|
extern int zfs_dirty_data_max_percent;
|
||||||
extern int zfs_dirty_data_max_max_percent;
|
extern int zfs_dirty_data_max_max_percent;
|
||||||
|
@ -119,6 +121,9 @@ typedef struct dsl_pool {
|
||||||
uint64_t dp_mos_compressed_delta;
|
uint64_t dp_mos_compressed_delta;
|
||||||
uint64_t dp_mos_uncompressed_delta;
|
uint64_t dp_mos_uncompressed_delta;
|
||||||
|
|
||||||
|
aggsum_t dp_wrlog_pertxg[TXG_SIZE];
|
||||||
|
aggsum_t dp_wrlog_total;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Time of most recently scheduled (furthest in the future)
|
* Time of most recently scheduled (furthest in the future)
|
||||||
* wakeup for delayed transactions.
|
* wakeup for delayed transactions.
|
||||||
|
@ -158,6 +163,8 @@ int dsl_pool_sync_context(dsl_pool_t *dp);
|
||||||
uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, zfs_space_check_t slop_policy);
|
uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, zfs_space_check_t slop_policy);
|
||||||
uint64_t dsl_pool_unreserved_space(dsl_pool_t *dp,
|
uint64_t dsl_pool_unreserved_space(dsl_pool_t *dp,
|
||||||
zfs_space_check_t slop_policy);
|
zfs_space_check_t slop_policy);
|
||||||
|
void dsl_pool_wrlog_count(dsl_pool_t *dp, int64_t size, uint64_t txg);
|
||||||
|
boolean_t dsl_pool_wrlog_over_max(dsl_pool_t *dp);
|
||||||
void dsl_pool_dirty_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
|
void dsl_pool_dirty_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
|
||||||
void dsl_pool_undirty_space(dsl_pool_t *dp, int64_t space, uint64_t txg);
|
void dsl_pool_undirty_space(dsl_pool_t *dp, int64_t space, uint64_t txg);
|
||||||
void dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp);
|
void dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp);
|
||||||
|
|
|
@ -1068,6 +1068,18 @@ Start syncing out a transaction group if there's at least this much dirty data
|
||||||
This should be less than
|
This should be less than
|
||||||
.Sy zfs_vdev_async_write_active_min_dirty_percent .
|
.Sy zfs_vdev_async_write_active_min_dirty_percent .
|
||||||
.
|
.
|
||||||
|
.It Sy zfs_wrlog_data_max Ns = Pq int
|
||||||
|
The upper limit of write-transaction zil log data size in bytes.
|
||||||
|
Once it is reached, write operation is blocked, until log data is cleared out
|
||||||
|
after transaction group sync. Because of some overhead, it should be set
|
||||||
|
at least 2 times the size of
|
||||||
|
.Sy zfs_dirty_data_max
|
||||||
|
.No to prevent harming normal write throughput.
|
||||||
|
It also should be smaller than the size of the slog device if slog is present.
|
||||||
|
.Pp
|
||||||
|
Defaults to
|
||||||
|
.Sy zfs_dirty_data_max*2
|
||||||
|
.
|
||||||
.It Sy zfs_fallocate_reserve_percent Ns = Ns Sy 110 Ns % Pq uint
|
.It Sy zfs_fallocate_reserve_percent Ns = Ns Sy 110 Ns % Pq uint
|
||||||
Since ZFS is a copy-on-write filesystem with snapshots, blocks cannot be
|
Since ZFS is a copy-on-write filesystem with snapshots, blocks cannot be
|
||||||
preallocated for a file in order to guarantee that later writes will not
|
preallocated for a file in order to guarantee that later writes will not
|
||||||
|
|
|
@ -7980,6 +7980,18 @@ arc_init(void)
|
||||||
zfs_dirty_data_max = MIN(zfs_dirty_data_max,
|
zfs_dirty_data_max = MIN(zfs_dirty_data_max,
|
||||||
zfs_dirty_data_max_max);
|
zfs_dirty_data_max_max);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (zfs_wrlog_data_max == 0) {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* dp_wrlog_total is reduced for each txg at the end of
|
||||||
|
* spa_sync(). However, dp_dirty_total is reduced every time
|
||||||
|
* a block is written out. Thus under normal operation,
|
||||||
|
* dp_wrlog_total could grow 2 times as big as
|
||||||
|
* zfs_dirty_data_max.
|
||||||
|
*/
|
||||||
|
zfs_wrlog_data_max = zfs_dirty_data_max * 2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
@ -53,6 +53,7 @@ dmu_tx_stats_t dmu_tx_stats = {
|
||||||
{ "dmu_tx_dirty_throttle", KSTAT_DATA_UINT64 },
|
{ "dmu_tx_dirty_throttle", KSTAT_DATA_UINT64 },
|
||||||
{ "dmu_tx_dirty_delay", KSTAT_DATA_UINT64 },
|
{ "dmu_tx_dirty_delay", KSTAT_DATA_UINT64 },
|
||||||
{ "dmu_tx_dirty_over_max", KSTAT_DATA_UINT64 },
|
{ "dmu_tx_dirty_over_max", KSTAT_DATA_UINT64 },
|
||||||
|
{ "dmu_tx_wrlog_over_max", KSTAT_DATA_UINT64 },
|
||||||
{ "dmu_tx_dirty_frees_delay", KSTAT_DATA_UINT64 },
|
{ "dmu_tx_dirty_frees_delay", KSTAT_DATA_UINT64 },
|
||||||
{ "dmu_tx_quota", KSTAT_DATA_UINT64 },
|
{ "dmu_tx_quota", KSTAT_DATA_UINT64 },
|
||||||
};
|
};
|
||||||
|
@ -884,6 +885,12 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
|
||||||
return (SET_ERROR(ERESTART));
|
return (SET_ERROR(ERESTART));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!tx->tx_dirty_delayed &&
|
||||||
|
dsl_pool_wrlog_over_max(tx->tx_pool)) {
|
||||||
|
DMU_TX_STAT_BUMP(dmu_tx_wrlog_over_max);
|
||||||
|
return (SET_ERROR(ERESTART));
|
||||||
|
}
|
||||||
|
|
||||||
if (!tx->tx_dirty_delayed &&
|
if (!tx->tx_dirty_delayed &&
|
||||||
dsl_pool_need_dirty_delay(tx->tx_pool)) {
|
dsl_pool_need_dirty_delay(tx->tx_pool)) {
|
||||||
tx->tx_wait_dirty = B_TRUE;
|
tx->tx_wait_dirty = B_TRUE;
|
||||||
|
|
|
@ -104,6 +104,14 @@ unsigned long zfs_dirty_data_max_max = 0;
|
||||||
int zfs_dirty_data_max_percent = 10;
|
int zfs_dirty_data_max_percent = 10;
|
||||||
int zfs_dirty_data_max_max_percent = 25;
|
int zfs_dirty_data_max_max_percent = 25;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* zfs_wrlog_data_max, the upper limit of TX_WRITE log data.
|
||||||
|
* Once it is reached, write operation is blocked,
|
||||||
|
* until log data is cleared out after txg sync.
|
||||||
|
* It only counts TX_WRITE log with WR_COPIED or WR_NEED_COPY.
|
||||||
|
*/
|
||||||
|
unsigned long zfs_wrlog_data_max = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there's at least this much dirty data (as a percentage of
|
* If there's at least this much dirty data (as a percentage of
|
||||||
* zfs_dirty_data_max), push out a txg. This should be less than
|
* zfs_dirty_data_max), push out a txg. This should be less than
|
||||||
|
@ -220,6 +228,11 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
|
||||||
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
|
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||||
cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);
|
cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);
|
||||||
|
|
||||||
|
aggsum_init(&dp->dp_wrlog_total, 0);
|
||||||
|
for (int i = 0; i < TXG_SIZE; i++) {
|
||||||
|
aggsum_init(&dp->dp_wrlog_pertxg[i], 0);
|
||||||
|
}
|
||||||
|
|
||||||
dp->dp_zrele_taskq = taskq_create("z_zrele", 100, defclsyspri,
|
dp->dp_zrele_taskq = taskq_create("z_zrele", 100, defclsyspri,
|
||||||
boot_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC |
|
boot_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC |
|
||||||
TASKQ_THREADS_CPU_PCT);
|
TASKQ_THREADS_CPU_PCT);
|
||||||
|
@ -416,6 +429,14 @@ dsl_pool_close(dsl_pool_t *dp)
|
||||||
rrw_destroy(&dp->dp_config_rwlock);
|
rrw_destroy(&dp->dp_config_rwlock);
|
||||||
mutex_destroy(&dp->dp_lock);
|
mutex_destroy(&dp->dp_lock);
|
||||||
cv_destroy(&dp->dp_spaceavail_cv);
|
cv_destroy(&dp->dp_spaceavail_cv);
|
||||||
|
|
||||||
|
ASSERT0(aggsum_value(&dp->dp_wrlog_total));
|
||||||
|
aggsum_fini(&dp->dp_wrlog_total);
|
||||||
|
for (int i = 0; i < TXG_SIZE; i++) {
|
||||||
|
ASSERT0(aggsum_value(&dp->dp_wrlog_pertxg[i]));
|
||||||
|
aggsum_fini(&dp->dp_wrlog_pertxg[i]);
|
||||||
|
}
|
||||||
|
|
||||||
taskq_destroy(dp->dp_unlinked_drain_taskq);
|
taskq_destroy(dp->dp_unlinked_drain_taskq);
|
||||||
taskq_destroy(dp->dp_zrele_taskq);
|
taskq_destroy(dp->dp_zrele_taskq);
|
||||||
if (dp->dp_blkstats != NULL) {
|
if (dp->dp_blkstats != NULL) {
|
||||||
|
@ -592,6 +613,36 @@ dsl_pool_dirty_delta(dsl_pool_t *dp, int64_t delta)
|
||||||
cv_signal(&dp->dp_spaceavail_cv);
|
cv_signal(&dp->dp_spaceavail_cv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
dsl_pool_wrlog_count(dsl_pool_t *dp, int64_t size, uint64_t txg)
|
||||||
|
{
|
||||||
|
ASSERT3S(size, >=, 0);
|
||||||
|
|
||||||
|
aggsum_add(&dp->dp_wrlog_pertxg[txg & TXG_MASK], size);
|
||||||
|
aggsum_add(&dp->dp_wrlog_total, size);
|
||||||
|
|
||||||
|
/* Choose a value slightly bigger than min dirty sync bytes */
|
||||||
|
uint64_t sync_min =
|
||||||
|
zfs_dirty_data_max * (zfs_dirty_data_sync_percent + 10) / 100;
|
||||||
|
if (aggsum_compare(&dp->dp_wrlog_pertxg[txg & TXG_MASK], sync_min) > 0)
|
||||||
|
txg_kick(dp, txg);
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean_t
|
||||||
|
dsl_pool_wrlog_over_max(dsl_pool_t *dp)
|
||||||
|
{
|
||||||
|
return (aggsum_compare(&dp->dp_wrlog_total, zfs_wrlog_data_max) > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
dsl_pool_wrlog_clear(dsl_pool_t *dp, uint64_t txg)
|
||||||
|
{
|
||||||
|
int64_t delta;
|
||||||
|
delta = -(int64_t)aggsum_value(&dp->dp_wrlog_pertxg[txg & TXG_MASK]);
|
||||||
|
aggsum_add(&dp->dp_wrlog_pertxg[txg & TXG_MASK], delta);
|
||||||
|
aggsum_add(&dp->dp_wrlog_total, delta);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef ZFS_DEBUG
|
#ifdef ZFS_DEBUG
|
||||||
static boolean_t
|
static boolean_t
|
||||||
dsl_early_sync_task_verify(dsl_pool_t *dp, uint64_t txg)
|
dsl_early_sync_task_verify(dsl_pool_t *dp, uint64_t txg)
|
||||||
|
@ -816,6 +867,9 @@ dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg)
|
||||||
ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg));
|
ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg));
|
||||||
dmu_buf_rele(ds->ds_dbuf, zilog);
|
dmu_buf_rele(ds->ds_dbuf, zilog);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dsl_pool_wrlog_clear(dp, txg);
|
||||||
|
|
||||||
ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg));
|
ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1405,6 +1459,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, delay_min_dirty_percent, INT, ZMOD_RW,
|
||||||
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max, ULONG, ZMOD_RW,
|
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max, ULONG, ZMOD_RW,
|
||||||
"Determines the dirty space limit");
|
"Determines the dirty space limit");
|
||||||
|
|
||||||
|
ZFS_MODULE_PARAM(zfs, zfs_, wrlog_data_max, ULONG, ZMOD_RW,
|
||||||
|
"The size limit of write-transaction zil log data");
|
||||||
|
|
||||||
/* zfs_dirty_data_max_max only applied at module load in arc_init(). */
|
/* zfs_dirty_data_max_max only applied at module load in arc_init(). */
|
||||||
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max, ULONG, ZMOD_RD,
|
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max, ULONG, ZMOD_RD,
|
||||||
"zfs_dirty_data_max upper bound in bytes");
|
"zfs_dirty_data_max upper bound in bytes");
|
||||||
|
|
|
@ -541,6 +541,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||||
itx_wr_state_t write_state;
|
itx_wr_state_t write_state;
|
||||||
uintptr_t fsync_cnt;
|
uintptr_t fsync_cnt;
|
||||||
uint64_t gen = 0;
|
uint64_t gen = 0;
|
||||||
|
ssize_t size = resid;
|
||||||
|
|
||||||
if (zil_replaying(zilog, tx) || zp->z_unlinked ||
|
if (zil_replaying(zilog, tx) || zp->z_unlinked ||
|
||||||
zfs_xattr_owner_unlinked(zp)) {
|
zfs_xattr_owner_unlinked(zp)) {
|
||||||
|
@ -626,6 +627,10 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||||
off += len;
|
off += len;
|
||||||
resid -= len;
|
resid -= len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
|
||||||
|
dsl_pool_wrlog_count(zilog->zl_dmu_pool, size, tx->tx_txg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -84,10 +84,8 @@
|
||||||
#include <sys/zfs_rlock.h>
|
#include <sys/zfs_rlock.h>
|
||||||
#include <sys/spa_impl.h>
|
#include <sys/spa_impl.h>
|
||||||
#include <sys/zvol.h>
|
#include <sys/zvol.h>
|
||||||
|
|
||||||
#include <sys/zvol_impl.h>
|
#include <sys/zvol_impl.h>
|
||||||
|
|
||||||
|
|
||||||
unsigned int zvol_inhibit_dev = 0;
|
unsigned int zvol_inhibit_dev = 0;
|
||||||
unsigned int zvol_volmode = ZFS_VOLMODE_GEOM;
|
unsigned int zvol_volmode = ZFS_VOLMODE_GEOM;
|
||||||
|
|
||||||
|
@ -577,6 +575,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
|
||||||
uint32_t blocksize = zv->zv_volblocksize;
|
uint32_t blocksize = zv->zv_volblocksize;
|
||||||
zilog_t *zilog = zv->zv_zilog;
|
zilog_t *zilog = zv->zv_zilog;
|
||||||
itx_wr_state_t write_state;
|
itx_wr_state_t write_state;
|
||||||
|
uint64_t sz = size;
|
||||||
|
|
||||||
if (zil_replaying(zilog, tx))
|
if (zil_replaying(zilog, tx))
|
||||||
return;
|
return;
|
||||||
|
@ -628,6 +627,10 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
|
||||||
offset += len;
|
offset += len;
|
||||||
size -= len;
|
size -= len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
|
||||||
|
dsl_pool_wrlog_count(zilog->zl_dmu_pool, sz, tx->tx_txg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue