zil: Add some more statistics.

In addition to a number of actual log bytes written, account also a
total written bytes including padding and total allocated bytes (bytes
<= write <= alloc).  It should allow to monitor zil traffic and space
efficiency.

Add dtrace probe for zil block size selection.

Make zilstat report more information and fit it into less width.

Reviewed-by: Ameer Hamza <ahamza@ixsystems.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by:  Alexander Motin <mav@FreeBSD.org>
Sponsored by:   iXsystems, Inc.
Closes #14863
This commit is contained in:
Alexander Motin 2023-05-25 16:51:53 -04:00 committed by GitHub
parent f63811f072
commit b6fbe61fa6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 213 additions and 52 deletions

View File

@ -36,31 +36,49 @@ import argparse
from argparse import RawTextHelpFormatter
cols = {
# hdr: [size, scale, kstat name]
# hdr: [size, scale, kstat name]
"time": [8, -1, "time"],
"pool": [12, -1, "pool"],
"ds": [12, -1, "dataset_name"],
"obj": [12, -1, "objset"],
"zcc": [10, 1000, "zil_commit_count"],
"zcwc": [10, 1000, "zil_commit_writer_count"],
"ziic": [10, 1000, "zil_itx_indirect_count"],
"zic": [10, 1000, "zil_itx_count"],
"ziib": [10, 1024, "zil_itx_indirect_bytes"],
"zicc": [10, 1000, "zil_itx_copied_count"],
"zicb": [10, 1024, "zil_itx_copied_bytes"],
"zinc": [10, 1000, "zil_itx_needcopy_count"],
"zinb": [10, 1024, "zil_itx_needcopy_bytes"],
"zimnc": [10, 1000, "zil_itx_metaslab_normal_count"],
"zimnb": [10, 1024, "zil_itx_metaslab_normal_bytes"],
"zimsc": [10, 1000, "zil_itx_metaslab_slog_count"],
"zimsb": [10, 1024, "zil_itx_metaslab_slog_bytes"],
"cc": [5, 1000, "zil_commit_count"],
"cwc": [5, 1000, "zil_commit_writer_count"],
"ic": [5, 1000, "zil_itx_count"],
"iic": [5, 1000, "zil_itx_indirect_count"],
"iib": [5, 1024, "zil_itx_indirect_bytes"],
"icc": [5, 1000, "zil_itx_copied_count"],
"icb": [5, 1024, "zil_itx_copied_bytes"],
"inc": [5, 1000, "zil_itx_needcopy_count"],
"inb": [5, 1024, "zil_itx_needcopy_bytes"],
"idc": [5, 1000, "icc+inc"],
"idb": [5, 1024, "icb+inb"],
"iwc": [5, 1000, "iic+idc"],
"iwb": [5, 1024, "iib+idb"],
"imnc": [6, 1000, "zil_itx_metaslab_normal_count"],
"imnb": [6, 1024, "zil_itx_metaslab_normal_bytes"],
"imnw": [6, 1024, "zil_itx_metaslab_normal_write"],
"imna": [6, 1024, "zil_itx_metaslab_normal_alloc"],
"imsc": [6, 1000, "zil_itx_metaslab_slog_count"],
"imsb": [6, 1024, "zil_itx_metaslab_slog_bytes"],
"imsw": [6, 1024, "zil_itx_metaslab_slog_write"],
"imsa": [6, 1024, "zil_itx_metaslab_slog_alloc"],
"imc": [5, 1000, "imnc+imsc"],
"imb": [5, 1024, "imnb+imsb"],
"imw": [5, 1024, "imnw+imsw"],
"ima": [5, 1024, "imna+imsa"],
"se%": [3, 100, "imb/ima"],
"sen%": [4, 100, "imnb/imna"],
"ses%": [4, 100, "imsb/imsa"],
"te%": [3, 100, "imb/imw"],
"ten%": [4, 100, "imnb/imnw"],
"tes%": [4, 100, "imsb/imsw"],
}
hdr = ["time", "pool", "ds", "obj", "zcc", "zcwc", "ziic", "zic", "ziib", \
"zicc", "zicb", "zinc", "zinb", "zimnc", "zimnb", "zimsc", "zimsb"]
hdr = ["time", "ds", "cc", "ic", "idc", "idb", "iic", "iib",
"imnc", "imnw", "imsc", "imsw"]
ghdr = ["time", "zcc", "zcwc", "ziic", "zic", "ziib", "zicc", "zicb",
"zinc", "zinb", "zimnc", "zimnb", "zimsc", "zimsb"]
ghdr = ["time", "cc", "ic", "idc", "idb", "iic", "iib",
"imnc", "imnw", "imsc", "imsw"]
cmd = ("Usage: zilstat [-hgdv] [-i interval] [-p pool_name]")
@ -105,7 +123,7 @@ def print_header():
global sep
for col in hdr:
new_col = col
if interval > 0 and col not in ['time', 'pool', 'ds', 'obj']:
if interval > 0 and cols[col][1] > 100:
new_col += "/s"
sys.stdout.write("%*s%s" % (cols[col][0], new_col, sep))
sys.stdout.write("\n")
@ -115,7 +133,7 @@ def print_values(v):
global sep
for col in hdr:
val = v[cols[col][2]]
if col not in ['time', 'pool', 'ds', 'obj'] and interval > 0:
if interval > 0 and cols[col][1] > 100:
val = v[cols[col][2]] // interval
sys.stdout.write("%s%s" % (
prettynum(cols[col][0], cols[col][1], val), sep))
@ -237,9 +255,7 @@ def init():
invalid = []
for ele in hdr:
if gFlag and ele not in ghdr:
invalid.append(ele)
elif ele not in cols:
if ele not in cols:
invalid.append(ele)
if len(invalid) > 0:
@ -403,17 +419,17 @@ def calculate_diff():
diff = copy.deepcopy(curr)
for pool in curr:
for objset in curr[pool]:
for col in hdr:
if col not in ['time', 'pool', 'ds', 'obj']:
key = cols[col][2]
# If prev is NULL, this is the
# first time we are here
if not prev:
diff[pool][objset][key] = 0
else:
diff[pool][objset][key] \
= curr[pool][objset][key] \
- prev[pool][objset][key]
for key in curr[pool][objset]:
if not isinstance(diff[pool][objset][key], int):
continue
# If prev is NULL, this is the
# first time we are here
if not prev:
diff[pool][objset][key] = 0
else:
diff[pool][objset][key] \
= curr[pool][objset][key] \
- prev[pool][objset][key]
def zil_build_dict(pool = "GLOBAL"):
global kstat
@ -425,10 +441,77 @@ def zil_build_dict(pool = "GLOBAL"):
if objset not in curr[pool]:
curr[pool][objset] = dict()
curr[pool][objset][key] = val
curr[pool][objset]["pool"] = pool
curr[pool][objset]["objset"] = objset
curr[pool][objset]["time"] = time.strftime("%H:%M:%S", \
time.localtime())
def zil_extend_dict():
global diff
for pool in diff:
for objset in diff[pool]:
diff[pool][objset]["pool"] = pool
diff[pool][objset]["objset"] = objset
diff[pool][objset]["time"] = time.strftime("%H:%M:%S", \
time.localtime())
diff[pool][objset]["icc+inc"] = \
diff[pool][objset]["zil_itx_copied_count"] + \
diff[pool][objset]["zil_itx_needcopy_count"]
diff[pool][objset]["icb+inb"] = \
diff[pool][objset]["zil_itx_copied_bytes"] + \
diff[pool][objset]["zil_itx_needcopy_bytes"]
diff[pool][objset]["iic+idc"] = \
diff[pool][objset]["zil_itx_indirect_count"] + \
diff[pool][objset]["zil_itx_copied_count"] + \
diff[pool][objset]["zil_itx_needcopy_count"]
diff[pool][objset]["iib+idb"] = \
diff[pool][objset]["zil_itx_indirect_bytes"] + \
diff[pool][objset]["zil_itx_copied_bytes"] + \
diff[pool][objset]["zil_itx_needcopy_bytes"]
diff[pool][objset]["imnc+imsc"] = \
diff[pool][objset]["zil_itx_metaslab_normal_count"] + \
diff[pool][objset]["zil_itx_metaslab_slog_count"]
diff[pool][objset]["imnb+imsb"] = \
diff[pool][objset]["zil_itx_metaslab_normal_bytes"] + \
diff[pool][objset]["zil_itx_metaslab_slog_bytes"]
diff[pool][objset]["imnw+imsw"] = \
diff[pool][objset]["zil_itx_metaslab_normal_write"] + \
diff[pool][objset]["zil_itx_metaslab_slog_write"]
diff[pool][objset]["imna+imsa"] = \
diff[pool][objset]["zil_itx_metaslab_normal_alloc"] + \
diff[pool][objset]["zil_itx_metaslab_slog_alloc"]
if diff[pool][objset]["imna+imsa"] > 0:
diff[pool][objset]["imb/ima"] = 100 * \
diff[pool][objset]["imnb+imsb"] // \
diff[pool][objset]["imna+imsa"]
else:
diff[pool][objset]["imb/ima"] = 100
if diff[pool][objset]["zil_itx_metaslab_normal_alloc"] > 0:
diff[pool][objset]["imnb/imna"] = 100 * \
diff[pool][objset]["zil_itx_metaslab_normal_bytes"] // \
diff[pool][objset]["zil_itx_metaslab_normal_alloc"]
else:
diff[pool][objset]["imnb/imna"] = 100
if diff[pool][objset]["zil_itx_metaslab_slog_alloc"] > 0:
diff[pool][objset]["imsb/imsa"] = 100 * \
diff[pool][objset]["zil_itx_metaslab_slog_bytes"] // \
diff[pool][objset]["zil_itx_metaslab_slog_alloc"]
else:
diff[pool][objset]["imsb/imsa"] = 100
if diff[pool][objset]["imnw+imsw"] > 0:
diff[pool][objset]["imb/imw"] = 100 * \
diff[pool][objset]["imnb+imsb"] // \
diff[pool][objset]["imnw+imsw"]
else:
diff[pool][objset]["imb/imw"] = 100
if diff[pool][objset]["zil_itx_metaslab_normal_alloc"] > 0:
diff[pool][objset]["imnb/imnw"] = 100 * \
diff[pool][objset]["zil_itx_metaslab_normal_bytes"] // \
diff[pool][objset]["zil_itx_metaslab_normal_write"]
else:
diff[pool][objset]["imnb/imnw"] = 100
if diff[pool][objset]["zil_itx_metaslab_slog_alloc"] > 0:
diff[pool][objset]["imsb/imsw"] = 100 * \
diff[pool][objset]["zil_itx_metaslab_slog_bytes"] // \
diff[pool][objset]["zil_itx_metaslab_slog_write"]
else:
diff[pool][objset]["imsb/imsw"] = 100
def sign_handler_epipe(sig, frame):
print("Caught EPIPE signal: " + str(frame))
@ -437,30 +520,31 @@ def sign_handler_epipe(sig, frame):
def main():
global interval
global curr
global curr, diff
hprint = False
init()
signal.signal(signal.SIGINT, signal.SIG_DFL)
signal.signal(signal.SIGPIPE, sign_handler_epipe)
zil_process_kstat()
if not curr:
print ("Error: No stats to show")
sys.exit(0)
print_header()
if interval > 0:
time.sleep(interval)
while True:
calculate_diff()
if not diff:
print ("Error: No stats to show")
sys.exit(0)
if hprint == False:
print_header()
hprint = True
zil_extend_dict()
print_dict(diff)
time.sleep(interval)
else:
zil_process_kstat()
if not curr:
print ("Error: No stats to show")
sys.exit(0)
print_header()
print_dict(curr)
diff = curr
zil_extend_dict()
print_dict(diff)
if __name__ == '__main__':
main()

View File

@ -215,6 +215,39 @@ DEFINE_EVENT(zfs_zil_commit_io_error_class, name, \
TP_ARGS(zilog, zcw))
DEFINE_ZIL_COMMIT_IO_ERROR_EVENT(zfs_zil__commit__io__error);
/*
* Generic support for three argument tracepoints of the form:
*
* DTRACE_PROBE3(...,
* zilog_t *, ...,
* uint64_t, ...,
* uint64_t, ...);
*/
/* BEGIN CSTYLED */
DECLARE_EVENT_CLASS(zfs_zil_block_size_class,
TP_PROTO(zilog_t *zilog, uint64_t res, uint64_t s1),
TP_ARGS(zilog, res, s1),
TP_STRUCT__entry(
ZILOG_TP_STRUCT_ENTRY
__field(uint64_t, res)
__field(uint64_t, s1)
),
TP_fast_assign(
ZILOG_TP_FAST_ASSIGN
__entry->res = res;
__entry->s1 = s1;
),
TP_printk(
ZILOG_TP_PRINTK_FMT " res %llu s1 %llu",
ZILOG_TP_PRINTK_ARGS, __entry->res, __entry->s1)
);
#define DEFINE_ZIL_BLOCK_SIZE_EVENT(name) \
DEFINE_EVENT(zfs_zil_block_size_class, name, \
TP_PROTO(zilog_t *zilog, uint64_t res, uint64_t s1), \
TP_ARGS(zilog, res, s1))
DEFINE_ZIL_BLOCK_SIZE_EVENT(zfs_zil__block__size);
#endif /* _TRACE_ZIL_H */
#undef TRACE_INCLUDE_PATH
@ -228,6 +261,7 @@ DEFINE_ZIL_COMMIT_IO_ERROR_EVENT(zfs_zil__commit__io__error);
DEFINE_DTRACE_PROBE2(zil__process__commit__itx);
DEFINE_DTRACE_PROBE2(zil__process__normal__itx);
DEFINE_DTRACE_PROBE2(zil__commit__io__error);
DEFINE_DTRACE_PROBE3(zil__block__size);
#endif /* HAVE_DECLARE_EVENT_CLASS */
#endif /* _KERNEL */

View File

@ -489,18 +489,22 @@ typedef struct zil_stats {
* Transactions which have been allocated to the "normal"
* (i.e. not slog) storage pool. Note that "bytes" accumulate
* the actual log record sizes - which do not include the actual
* data in case of indirect writes.
* data in case of indirect writes. bytes <= write <= alloc.
*/
kstat_named_t zil_itx_metaslab_normal_count;
kstat_named_t zil_itx_metaslab_normal_bytes;
kstat_named_t zil_itx_metaslab_normal_write;
kstat_named_t zil_itx_metaslab_normal_alloc;
/*
* Transactions which have been allocated to the "slog" storage pool.
* If there are no separate log devices, this is the same as the
* "normal" pool.
* "normal" pool. bytes <= write <= alloc.
*/
kstat_named_t zil_itx_metaslab_slog_count;
kstat_named_t zil_itx_metaslab_slog_bytes;
kstat_named_t zil_itx_metaslab_slog_write;
kstat_named_t zil_itx_metaslab_slog_alloc;
} zil_kstat_values_t;
typedef struct zil_sums {
@ -515,8 +519,12 @@ typedef struct zil_sums {
wmsum_t zil_itx_needcopy_bytes;
wmsum_t zil_itx_metaslab_normal_count;
wmsum_t zil_itx_metaslab_normal_bytes;
wmsum_t zil_itx_metaslab_normal_write;
wmsum_t zil_itx_metaslab_normal_alloc;
wmsum_t zil_itx_metaslab_slog_count;
wmsum_t zil_itx_metaslab_slog_bytes;
wmsum_t zil_itx_metaslab_slog_write;
wmsum_t zil_itx_metaslab_slog_alloc;
} zil_sums_t;
#define ZIL_STAT_INCR(zil, stat, val) \

View File

@ -49,8 +49,12 @@ static dataset_kstat_values_t empty_dataset_kstats = {
{ "zil_itx_needcopy_bytes", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_normal_count", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_normal_bytes", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_normal_write", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_normal_alloc", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_slog_count", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_slog_bytes", KSTAT_DATA_UINT64 }
{ "zil_itx_metaslab_slog_bytes", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_slog_write", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_slog_alloc", KSTAT_DATA_UINT64 }
}
};

View File

@ -116,8 +116,12 @@ static zil_kstat_values_t zil_stats = {
{ "zil_itx_needcopy_bytes", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_normal_count", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_normal_bytes", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_normal_write", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_normal_alloc", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_slog_count", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_slog_bytes", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_slog_write", KSTAT_DATA_UINT64 },
{ "zil_itx_metaslab_slog_alloc", KSTAT_DATA_UINT64 },
};
static zil_sums_t zil_sums_global;
@ -378,8 +382,12 @@ zil_sums_init(zil_sums_t *zs)
wmsum_init(&zs->zil_itx_needcopy_bytes, 0);
wmsum_init(&zs->zil_itx_metaslab_normal_count, 0);
wmsum_init(&zs->zil_itx_metaslab_normal_bytes, 0);
wmsum_init(&zs->zil_itx_metaslab_normal_write, 0);
wmsum_init(&zs->zil_itx_metaslab_normal_alloc, 0);
wmsum_init(&zs->zil_itx_metaslab_slog_count, 0);
wmsum_init(&zs->zil_itx_metaslab_slog_bytes, 0);
wmsum_init(&zs->zil_itx_metaslab_slog_write, 0);
wmsum_init(&zs->zil_itx_metaslab_slog_alloc, 0);
}
void
@ -396,8 +404,12 @@ zil_sums_fini(zil_sums_t *zs)
wmsum_fini(&zs->zil_itx_needcopy_bytes);
wmsum_fini(&zs->zil_itx_metaslab_normal_count);
wmsum_fini(&zs->zil_itx_metaslab_normal_bytes);
wmsum_fini(&zs->zil_itx_metaslab_normal_write);
wmsum_fini(&zs->zil_itx_metaslab_normal_alloc);
wmsum_fini(&zs->zil_itx_metaslab_slog_count);
wmsum_fini(&zs->zil_itx_metaslab_slog_bytes);
wmsum_fini(&zs->zil_itx_metaslab_slog_write);
wmsum_fini(&zs->zil_itx_metaslab_slog_alloc);
}
void
@ -425,10 +437,18 @@ zil_kstat_values_update(zil_kstat_values_t *zs, zil_sums_t *zil_sums)
wmsum_value(&zil_sums->zil_itx_metaslab_normal_count);
zs->zil_itx_metaslab_normal_bytes.value.ui64 =
wmsum_value(&zil_sums->zil_itx_metaslab_normal_bytes);
zs->zil_itx_metaslab_normal_write.value.ui64 =
wmsum_value(&zil_sums->zil_itx_metaslab_normal_write);
zs->zil_itx_metaslab_normal_alloc.value.ui64 =
wmsum_value(&zil_sums->zil_itx_metaslab_normal_alloc);
zs->zil_itx_metaslab_slog_count.value.ui64 =
wmsum_value(&zil_sums->zil_itx_metaslab_slog_count);
zs->zil_itx_metaslab_slog_bytes.value.ui64 =
wmsum_value(&zil_sums->zil_itx_metaslab_slog_bytes);
zs->zil_itx_metaslab_slog_write.value.ui64 =
wmsum_value(&zil_sums->zil_itx_metaslab_slog_write);
zs->zil_itx_metaslab_slog_alloc.value.ui64 =
wmsum_value(&zil_sums->zil_itx_metaslab_slog_alloc);
}
/*
@ -1814,6 +1834,9 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb)
zilog->zl_prev_blks[zilog->zl_prev_rotor] = zil_blksz;
for (i = 0; i < ZIL_PREV_BLKS; i++)
zil_blksz = MAX(zil_blksz, zilog->zl_prev_blks[i]);
DTRACE_PROBE3(zil__block__size, zilog_t *, zilog,
uint64_t, zil_blksz,
uint64_t, zilog->zl_prev_blks[zilog->zl_prev_rotor]);
zilog->zl_prev_rotor = (zilog->zl_prev_rotor + 1) & (ZIL_PREV_BLKS - 1);
if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2)
@ -1888,10 +1911,18 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
ZIL_STAT_BUMP(zilog, zil_itx_metaslab_slog_count);
ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_bytes,
lwb->lwb_nused);
ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_write,
wsz);
ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_alloc,
BP_GET_LSIZE(&lwb->lwb_blk));
} else {
ZIL_STAT_BUMP(zilog, zil_itx_metaslab_normal_count);
ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_bytes,
lwb->lwb_nused);
ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_write,
wsz);
ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_alloc,
BP_GET_LSIZE(&lwb->lwb_blk));
}
spa_config_enter(zilog->zl_spa, SCL_STATE, lwb, RW_READER);
zil_lwb_add_block(lwb, &lwb->lwb_blk);

View File

@ -25,7 +25,7 @@
is_freebsd && ! python3 -c 'import sysctl' 2>/dev/null && log_unsupported "python3 sysctl module missing"
set -A args "" "-s \",\"" "-v" \
"-f time,zcwc,zimnb,zimsb"
"-f time,cwc,imnb,imsb"
log_assert "zilstat generates output and doesn't return an error code"