From b6ad9671acdd245385744bcc1fe6c0f21f252570 Mon Sep 17 00:00:00 2001 From: Etienne Dechamps Date: Fri, 15 Jun 2012 16:22:14 +0200 Subject: [PATCH] Add ZIL statistics. The performance of the ZIL is usually the main bottleneck when dealing with synchronous, write-heavy workloads (e.g. databases). Understanding the behavior of the ZIL is required to diagnose performance issues for these workloads, and to tune ZIL parameters (like zil_slog_limit) accordingly. This commit adds a new kstat page dedicated to the ZIL with some counters which, hopefully, scheds some light into what the ZIL is doing, and how it is doing it. Currently, these statistics are available in /proc/spl/kstat/zfs/zil. A description of the fields can be found in zil.h. Signed-off-by: Brian Behlendorf Closes #786 --- include/sys/zil.h | 59 ++++++++++++++++++++++++++++++++++++++++++++ module/zfs/zil.c | 63 +++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 120 insertions(+), 2 deletions(-) diff --git a/include/sys/zil.h b/include/sys/zil.h index 45900c9816..c583887cd3 100644 --- a/include/sys/zil.h +++ b/include/sys/zil.h @@ -372,6 +372,65 @@ typedef struct itx { /* followed by type-specific part of lr_xx_t and its immediate data */ } itx_t; +/* + * Used for zil kstat. + */ +typedef struct zil_stats { + /* + * Number of times a ZIL commit (e.g. fsync) has been requested. + */ + kstat_named_t zil_commit_count; + + /* + * Number of times the ZIL has been flushed to stable storage. + * This is less than zil_commit_count when commits are "merged" + * (see the documentation above zil_commit()). + */ + kstat_named_t zil_commit_writer_count; + + /* + * Number of transactions (reads, writes, renames, etc.) + * that have been commited. + */ + kstat_named_t zil_itx_count; + + /* + * See the documentation for itx_wr_state_t above. + * Note that "bytes" accumulates the length of the transactions + * (i.e. data), not the actual log record sizes. + */ + kstat_named_t zil_itx_indirect_count; + kstat_named_t zil_itx_indirect_bytes; + kstat_named_t zil_itx_copied_count; + kstat_named_t zil_itx_copied_bytes; + kstat_named_t zil_itx_needcopy_count; + kstat_named_t zil_itx_needcopy_bytes; + + /* + * Transactions which have been allocated to the "normal" + * (i.e. not slog) storage pool. Note that "bytes" accumulate + * the actual log record sizes - which do not include the actual + * data in case of indirect writes. + */ + kstat_named_t zil_itx_metaslab_normal_count; + kstat_named_t zil_itx_metaslab_normal_bytes; + + /* + * Transactions which have been allocated to the "slog" storage pool. + * If there are no separate log devices, this is the same as the + * "normal" pool. + */ + kstat_named_t zil_itx_metaslab_slog_count; + kstat_named_t zil_itx_metaslab_slog_bytes; +} zil_stats_t; + +extern zil_stats_t zil_stats; + +#define ZIL_STAT_INCR(stat, val) \ + atomic_add_64(&zil_stats.stat.value.ui64, (val)); +#define ZIL_STAT_BUMP(stat) \ + ZIL_STAT_INCR(stat, 1); + typedef int zil_parse_blk_func_t(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t txg); typedef int zil_parse_lr_func_t(zilog_t *zilog, lr_t *lr, void *arg, diff --git a/module/zfs/zil.c b/module/zfs/zil.c index fb05036285..9ab02d70c0 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -65,6 +65,27 @@ * needed from the blocks available. Figure X shows the ZIL structure: */ +/* + * See zil.h for more information about these fields. + */ +zil_stats_t zil_stats = { + { "zil_commit_count", KSTAT_DATA_UINT64 }, + { "zil_commit_writer_count", KSTAT_DATA_UINT64 }, + { "zil_itx_count", KSTAT_DATA_UINT64 }, + { "zil_itx_indirect_count", KSTAT_DATA_UINT64 }, + { "zil_itx_indirect_bytes", KSTAT_DATA_UINT64 }, + { "zil_itx_copied_count", KSTAT_DATA_UINT64 }, + { "zil_itx_copied_bytes", KSTAT_DATA_UINT64 }, + { "zil_itx_needcopy_count", KSTAT_DATA_UINT64 }, + { "zil_itx_needcopy_bytes", KSTAT_DATA_UINT64 }, + { "zil_itx_metaslab_normal_count", KSTAT_DATA_UINT64 }, + { "zil_itx_metaslab_normal_bytes", KSTAT_DATA_UINT64 }, + { "zil_itx_metaslab_slog_count", KSTAT_DATA_UINT64 }, + { "zil_itx_metaslab_slog_bytes", KSTAT_DATA_UINT64 }, +}; + +static kstat_t *zil_ksp; + /* * This global ZIL switch affects all pools */ @@ -879,6 +900,7 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb) uint64_t txg; uint64_t zil_blksz, wsz; int i, error; + boolean_t use_slog; if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) { zilc = (zil_chain_t *)lwb->lwb_buf; @@ -935,8 +957,19 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb) BP_ZERO(bp); /* pass the old blkptr in order to spread log blocks across devs */ + use_slog = USE_SLOG(zilog); error = zio_alloc_zil(spa, txg, bp, &lwb->lwb_blk, zil_blksz, - USE_SLOG(zilog)); + use_slog); + if (use_slog) + { + ZIL_STAT_BUMP(zil_itx_metaslab_slog_count); + ZIL_STAT_INCR(zil_itx_metaslab_slog_bytes, lwb->lwb_nused); + } + else + { + ZIL_STAT_BUMP(zil_itx_metaslab_normal_count); + ZIL_STAT_INCR(zil_itx_metaslab_normal_bytes, lwb->lwb_nused); + } if (!error) { ASSERT3U(bp->blk_birth, ==, txg); bp->blk_cksum = lwb->lwb_blk.blk_cksum; @@ -1022,13 +1055,18 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb) lrc = (lr_t *)lr_buf; lrw = (lr_write_t *)lrc; + ZIL_STAT_BUMP(zil_itx_count); + /* * If it's a write, fetch the data or get its blkptr as appropriate. */ if (lrc->lrc_txtype == TX_WRITE) { if (txg > spa_freeze_txg(zilog->zl_spa)) txg_wait_synced(zilog->zl_dmu_pool, txg); - if (itx->itx_wr_state != WR_COPIED) { + if (itx->itx_wr_state == WR_COPIED) { + ZIL_STAT_BUMP(zil_itx_copied_count); + ZIL_STAT_INCR(zil_itx_copied_bytes, lrw->lr_length); + } else { char *dbuf; int error; @@ -1036,9 +1074,13 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb) ASSERT(itx->itx_wr_state == WR_NEED_COPY); dbuf = lr_buf + reclen; lrw->lr_common.lrc_reclen += dlen; + ZIL_STAT_BUMP(zil_itx_needcopy_count); + ZIL_STAT_INCR(zil_itx_needcopy_bytes, lrw->lr_length); } else { ASSERT(itx->itx_wr_state == WR_INDIRECT); dbuf = NULL; + ZIL_STAT_BUMP(zil_itx_indirect_count); + ZIL_STAT_INCR(zil_itx_indirect_bytes, lrw->lr_length); } error = zilog->zl_get_data( itx->itx_private, lrw, dbuf, lwb->lwb_zio); @@ -1497,6 +1539,8 @@ zil_commit(zilog_t *zilog, uint64_t foid) if (zilog->zl_sync == ZFS_SYNC_DISABLED) return; + ZIL_STAT_BUMP(zil_commit_count); + /* move the async itxs for the foid to the sync queues */ zil_async_to_sync(zilog, foid); @@ -1512,6 +1556,7 @@ zil_commit(zilog_t *zilog, uint64_t foid) zilog->zl_next_batch++; zilog->zl_writer = B_TRUE; + ZIL_STAT_BUMP(zil_commit_writer_count); zil_commit_writer(zilog); zilog->zl_com_batch = mybatch; zilog->zl_writer = B_FALSE; @@ -1600,12 +1645,26 @@ zil_init(void) { zil_lwb_cache = kmem_cache_create("zil_lwb_cache", sizeof (struct lwb), 0, NULL, NULL, NULL, NULL, NULL, 0); + + zil_ksp = kstat_create("zfs", 0, "zil", "misc", + KSTAT_TYPE_NAMED, sizeof(zil_stats) / sizeof(kstat_named_t), + KSTAT_FLAG_VIRTUAL); + + if (zil_ksp != NULL) { + zil_ksp->ks_data = &zil_stats; + kstat_install(zil_ksp); + } } void zil_fini(void) { kmem_cache_destroy(zil_lwb_cache); + + if (zil_ksp != NULL) { + kstat_delete(zil_ksp); + zil_ksp = NULL; + } } void