2019-09-12 20:32:32 +00:00
|
|
|
#!/usr/bin/env python3
|
2018-02-28 16:52:34 +00:00
|
|
|
#
|
|
|
|
# Copyright (c) 2008 Ben Rockwood <benr@cuddletech.com>,
|
|
|
|
# Copyright (c) 2010 Martin Matuska <mm@FreeBSD.org>,
|
|
|
|
# Copyright (c) 2010-2011 Jason J. Hellenthal <jhell@DataIX.net>,
|
|
|
|
# Copyright (c) 2017 Scot W. Stevenson <scot.stevenson@gmail.com>
|
|
|
|
# All rights reserved.
|
|
|
|
#
|
|
|
|
# Redistribution and use in source and binary forms, with or without
|
|
|
|
# modification, are permitted provided that the following conditions
|
|
|
|
# are met:
|
|
|
|
#
|
|
|
|
# 1. Redistributions of source code must retain the above copyright
|
|
|
|
# notice, this list of conditions and the following disclaimer.
|
|
|
|
# 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
|
|
# documentation and/or other materials provided with the distribution.
|
|
|
|
#
|
|
|
|
# THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
|
|
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
# SUCH DAMAGE.
|
|
|
|
"""Print statistics on the ZFS ARC Cache and other information
|
|
|
|
|
|
|
|
Provides basic information on the ARC, its efficiency, the L2ARC (if present),
|
|
|
|
the Data Management Unit (DMU), Virtual Devices (VDEVs), and tunables. See
|
|
|
|
the in-source documentation and code at
|
2020-10-09 03:10:13 +00:00
|
|
|
https://github.com/openzfs/zfs/blob/master/module/zfs/arc.c for details.
|
2018-02-28 16:52:34 +00:00
|
|
|
The original introduction to arc_summary can be found at
|
|
|
|
http://cuddletech.com/?p=454
|
|
|
|
"""
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
import os
|
|
|
|
import subprocess
|
|
|
|
import sys
|
|
|
|
import time
|
2021-06-01 21:20:50 +00:00
|
|
|
import errno
|
2018-02-28 16:52:34 +00:00
|
|
|
|
2021-05-26 02:02:01 +00:00
|
|
|
# We can't use env -S portably, and we need python3 -u to handle pipes in
|
|
|
|
# the shell abruptly closing the way we want to, so...
|
|
|
|
import io
|
|
|
|
if isinstance(sys.__stderr__.buffer, io.BufferedWriter):
|
|
|
|
os.execv(sys.executable, [sys.executable, "-u"] + sys.argv)
|
|
|
|
|
2020-10-09 03:10:13 +00:00
|
|
|
DESCRIPTION = 'Print ARC and other statistics for OpenZFS'
|
2018-02-28 16:52:34 +00:00
|
|
|
INDENT = ' '*8
|
|
|
|
LINE_LENGTH = 72
|
|
|
|
DATE_FORMAT = '%a %b %d %H:%M:%S %Y'
|
|
|
|
TITLE = 'ZFS Subsystem Report'
|
|
|
|
|
|
|
|
SECTIONS = 'arc archits dmu l2arc spl tunables vdev zil'.split()
|
|
|
|
SECTION_HELP = 'print info from one section ('+' '.join(SECTIONS)+')'
|
|
|
|
|
|
|
|
# Tunables and SPL are handled separately because they come from
|
|
|
|
# different sources
|
|
|
|
SECTION_PATHS = {'arc': 'arcstats',
|
|
|
|
'dmu': 'dmu_tx',
|
|
|
|
'l2arc': 'arcstats', # L2ARC stuff lives in arcstats
|
|
|
|
'vdev': 'vdev_cache_stats',
|
|
|
|
'zfetch': 'zfetchstats',
|
|
|
|
'zil': 'zil'}
|
|
|
|
|
2019-08-30 16:43:30 +00:00
|
|
|
parser = argparse.ArgumentParser(description=DESCRIPTION)
|
2018-02-28 16:52:34 +00:00
|
|
|
parser.add_argument('-a', '--alternate', action='store_true', default=False,
|
|
|
|
help='use alternate formatting for tunables and SPL',
|
|
|
|
dest='alt')
|
|
|
|
parser.add_argument('-d', '--description', action='store_true', default=False,
|
|
|
|
help='print descriptions with tunables and SPL',
|
|
|
|
dest='desc')
|
|
|
|
parser.add_argument('-g', '--graph', action='store_true', default=False,
|
|
|
|
help='print graph on ARC use and exit', dest='graph')
|
|
|
|
parser.add_argument('-p', '--page', type=int, dest='page',
|
|
|
|
help='print page by number (DEPRECATED, use "-s")')
|
|
|
|
parser.add_argument('-r', '--raw', action='store_true', default=False,
|
|
|
|
help='dump all available data with minimal formatting',
|
|
|
|
dest='raw')
|
|
|
|
parser.add_argument('-s', '--section', dest='section', help=SECTION_HELP)
|
|
|
|
ARGS = parser.parse_args()
|
|
|
|
|
|
|
|
|
2019-11-30 23:43:23 +00:00
|
|
|
if sys.platform.startswith('freebsd'):
|
|
|
|
# Requires py36-sysctl on FreeBSD
|
|
|
|
import sysctl
|
|
|
|
|
|
|
|
VDEV_CACHE_SIZE = 'vdev.cache_size'
|
|
|
|
|
2020-12-08 17:02:16 +00:00
|
|
|
def is_value(ctl):
|
|
|
|
return ctl.type != sysctl.CTLTYPE_NODE
|
|
|
|
|
|
|
|
def namefmt(ctl, base='vfs.zfs.'):
|
|
|
|
# base is removed from the name
|
|
|
|
cut = len(base)
|
|
|
|
return ctl.name[cut:]
|
|
|
|
|
2019-11-30 23:43:23 +00:00
|
|
|
def load_kstats(section):
|
|
|
|
base = 'kstat.zfs.misc.{section}.'.format(section=section)
|
2020-12-08 17:02:16 +00:00
|
|
|
fmt = lambda kstat: '{name} : {value}'.format(name=namefmt(kstat, base),
|
2019-11-30 23:43:23 +00:00
|
|
|
value=kstat.value)
|
2020-12-08 17:02:16 +00:00
|
|
|
kstats = sysctl.filter(base)
|
|
|
|
return [fmt(kstat) for kstat in kstats if is_value(kstat)]
|
2019-11-30 23:43:23 +00:00
|
|
|
|
|
|
|
def get_params(base):
|
2020-12-08 17:02:16 +00:00
|
|
|
ctls = sysctl.filter(base)
|
|
|
|
return {namefmt(ctl): str(ctl.value) for ctl in ctls if is_value(ctl)}
|
2019-11-30 23:43:23 +00:00
|
|
|
|
|
|
|
def get_tunable_params():
|
|
|
|
return get_params('vfs.zfs')
|
|
|
|
|
|
|
|
def get_vdev_params():
|
|
|
|
return get_params('vfs.zfs.vdev')
|
|
|
|
|
|
|
|
def get_version_impl(request):
|
|
|
|
# FreeBSD reports versions for zpl and spa instead of zfs and spl.
|
|
|
|
name = {'zfs': 'zpl',
|
|
|
|
'spl': 'spa'}[request]
|
|
|
|
mib = 'vfs.zfs.version.{}'.format(name)
|
|
|
|
version = sysctl.filter(mib)[0].value
|
|
|
|
return '{} version {}'.format(name, version)
|
|
|
|
|
2020-02-28 01:15:06 +00:00
|
|
|
def get_descriptions(_request):
|
2020-12-08 17:02:16 +00:00
|
|
|
ctls = sysctl.filter('vfs.zfs')
|
|
|
|
return {namefmt(ctl): ctl.description for ctl in ctls if is_value(ctl)}
|
2020-02-28 01:15:06 +00:00
|
|
|
|
|
|
|
|
2019-11-30 23:43:23 +00:00
|
|
|
elif sys.platform.startswith('linux'):
|
|
|
|
KSTAT_PATH = '/proc/spl/kstat/zfs'
|
|
|
|
SPL_PATH = '/sys/module/spl/parameters'
|
|
|
|
TUNABLES_PATH = '/sys/module/zfs/parameters'
|
|
|
|
|
2019-11-11 17:24:04 +00:00
|
|
|
VDEV_CACHE_SIZE = 'zfs_vdev_cache_size'
|
2019-09-10 20:27:53 +00:00
|
|
|
|
2019-11-11 17:24:04 +00:00
|
|
|
def load_kstats(section):
|
|
|
|
path = os.path.join(KSTAT_PATH, section)
|
|
|
|
with open(path) as f:
|
|
|
|
return list(f)[2:] # Get rid of header
|
|
|
|
|
|
|
|
def get_params(basepath):
|
|
|
|
"""Collect information on the Solaris Porting Layer (SPL) or the
|
|
|
|
tunables, depending on the PATH given. Does not check if PATH is
|
|
|
|
legal.
|
|
|
|
"""
|
|
|
|
result = {}
|
|
|
|
for name in os.listdir(basepath):
|
|
|
|
path = os.path.join(basepath, name)
|
|
|
|
with open(path) as f:
|
|
|
|
value = f.read()
|
|
|
|
result[name] = value.strip()
|
|
|
|
return result
|
|
|
|
|
|
|
|
def get_spl_params():
|
|
|
|
return get_params(SPL_PATH)
|
|
|
|
|
|
|
|
def get_tunable_params():
|
|
|
|
return get_params(TUNABLES_PATH)
|
|
|
|
|
|
|
|
def get_vdev_params():
|
|
|
|
return get_params(TUNABLES_PATH)
|
|
|
|
|
|
|
|
def get_version_impl(request):
|
|
|
|
# The original arc_summary called /sbin/modinfo/{spl,zfs} to get
|
|
|
|
# the version information. We switch to /sys/module/{spl,zfs}/version
|
|
|
|
# to make sure we get what is really loaded in the kernel
|
2021-04-07 16:02:35 +00:00
|
|
|
try:
|
|
|
|
with open("/sys/module/{}/version".format(request)) as f:
|
|
|
|
return f.read().strip()
|
|
|
|
except:
|
|
|
|
return "(unknown)"
|
2019-09-10 20:27:53 +00:00
|
|
|
|
2020-02-28 01:15:06 +00:00
|
|
|
def get_descriptions(request):
|
|
|
|
"""Get the descriptions of the Solaris Porting Layer (SPL) or the
|
|
|
|
tunables, return with minimal formatting.
|
|
|
|
"""
|
|
|
|
|
|
|
|
if request not in ('spl', 'zfs'):
|
|
|
|
print('ERROR: description of "{0}" requested)'.format(request))
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
descs = {}
|
|
|
|
target_prefix = 'parm:'
|
|
|
|
|
|
|
|
# We would prefer to do this with /sys/modules -- see the discussion at
|
|
|
|
# get_version() -- but there isn't a way to get the descriptions from
|
|
|
|
# there, so we fall back on modinfo
|
|
|
|
command = ["/sbin/modinfo", request, "-0"]
|
|
|
|
|
|
|
|
info = ''
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
2022-01-13 16:51:12 +00:00
|
|
|
info = subprocess.run(command, stdout=subprocess.PIPE,
|
|
|
|
check=True, universal_newlines=True)
|
|
|
|
raw_output = info.stdout.split('\0')
|
2020-02-28 01:15:06 +00:00
|
|
|
|
|
|
|
except subprocess.CalledProcessError:
|
|
|
|
print("Error: Descriptions not available",
|
|
|
|
"(can't access kernel module)")
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
for line in raw_output:
|
|
|
|
|
|
|
|
if not line.startswith(target_prefix):
|
|
|
|
continue
|
|
|
|
|
|
|
|
line = line[len(target_prefix):].strip()
|
|
|
|
name, raw_desc = line.split(':', 1)
|
|
|
|
desc = raw_desc.rsplit('(', 1)[0]
|
|
|
|
|
|
|
|
if desc == '':
|
|
|
|
desc = '(No description found)'
|
|
|
|
|
|
|
|
descs[name.strip()] = desc.strip()
|
|
|
|
|
|
|
|
return descs
|
|
|
|
|
2021-05-26 02:02:01 +00:00
|
|
|
def handle_unraisableException(exc_type, exc_value=None, exc_traceback=None,
|
|
|
|
err_msg=None, object=None):
|
|
|
|
handle_Exception(exc_type, object, exc_traceback)
|
|
|
|
|
|
|
|
def handle_Exception(ex_cls, ex, tb):
|
|
|
|
if ex_cls is KeyboardInterrupt:
|
|
|
|
sys.exit()
|
|
|
|
|
|
|
|
if ex_cls is BrokenPipeError:
|
|
|
|
# It turns out that while sys.exit() triggers an exception
|
|
|
|
# not handled message on Python 3.8+, os._exit() does not.
|
|
|
|
os._exit(0)
|
2021-06-01 21:20:50 +00:00
|
|
|
|
|
|
|
if ex_cls is OSError:
|
|
|
|
if ex.errno == errno.ENOTCONN:
|
|
|
|
sys.exit()
|
|
|
|
|
2021-05-26 02:02:01 +00:00
|
|
|
raise ex
|
|
|
|
|
|
|
|
if hasattr(sys,'unraisablehook'): # Python 3.8+
|
|
|
|
sys.unraisablehook = handle_unraisableException
|
|
|
|
sys.excepthook = handle_Exception
|
|
|
|
|
2019-09-10 20:27:53 +00:00
|
|
|
|
2018-02-28 16:52:34 +00:00
|
|
|
def cleanup_line(single_line):
|
|
|
|
"""Format a raw line of data from /proc and isolate the name value
|
|
|
|
part, returning a tuple with each. Currently, this gets rid of the
|
|
|
|
middle '4'. For example "arc_no_grow 4 0" returns the tuple
|
|
|
|
("arc_no_grow", "0").
|
|
|
|
"""
|
|
|
|
name, _, value = single_line.split()
|
|
|
|
|
|
|
|
return name, value
|
|
|
|
|
|
|
|
|
|
|
|
def draw_graph(kstats_dict):
|
|
|
|
"""Draw a primitive graph representing the basic information on the
|
|
|
|
ARC -- its size and the proportion used by MFU and MRU -- and quit.
|
|
|
|
We use max size of the ARC to calculate how full it is. This is a
|
|
|
|
very rough representation.
|
|
|
|
"""
|
|
|
|
|
|
|
|
arc_stats = isolate_section('arcstats', kstats_dict)
|
|
|
|
|
|
|
|
GRAPH_INDENT = ' '*4
|
|
|
|
GRAPH_WIDTH = 60
|
|
|
|
arc_size = f_bytes(arc_stats['size'])
|
|
|
|
arc_perc = f_perc(arc_stats['size'], arc_stats['c_max'])
|
|
|
|
mfu_size = f_bytes(arc_stats['mfu_size'])
|
|
|
|
mru_size = f_bytes(arc_stats['mru_size'])
|
2018-08-22 16:35:20 +00:00
|
|
|
meta_size = f_bytes(arc_stats['arc_meta_used'])
|
|
|
|
dnode_limit = f_bytes(arc_stats['arc_dnode_limit'])
|
|
|
|
dnode_size = f_bytes(arc_stats['dnode_size'])
|
|
|
|
|
More adaptive ARC eviction
Traditionally ARC adaptation was limited to MRU/MFU distribution. But
for years people with metadata-centric workload demanded mechanisms to
also manage data/metadata distribution, that in original ZFS was just
a FIFO. As result ZFS effectively got separate states for data and
metadata, minimum and maximum metadata limits etc, but it all required
manual tuning, was not adaptive and in its heart remained a bad FIFO.
This change removes most of existing eviction logic, rewriting it from
scratch. This makes MRU/MFU adaptation individual for data and meta-
data, same as the distribution between data and metadata themselves.
Since most of required states separation was already done, it only
required to make arcs_size state field specific per data/metadata.
The adaptation logic is still based on previous concept of ghost hits,
just now it balances ARC capacity between 4 states: MRU data, MRU
metadata, MFU data and MFU metadata. To simplify arc_c changes instead
of arc_p measured in bytes, this code uses 3 variable arc_meta, arc_pd
and arc_pm, representing ARC balance between metadata and data, MRU and
MFU for data, and MRU and MFU for metadata respectively as 32-bit fixed
point fractions. Since we care about the math result only when need to
evict, this moves all the logic from arc_adapt() to arc_evict(), that
reduces per-block overhead, since per-block operations are limited to
stats collection, now moved from arc_adapt() to arc_access() and using
cheaper wmsums. This also allows to remove ugly ARC_HDR_DO_ADAPT flag
from many places.
This change also removes number of metadata specific tunables, part of
which were actually not functioning correctly, since not all metadata
are equal and some (like L2ARC headers) are not really evictable.
Instead it introduced single opaque knob zfs_arc_meta_balance, tuning
ARC's reaction on ghost hits, allowing administrator give more or less
preference to metadata without setting strict limits.
Some of old code parts like arc_evict_meta() are just removed, because
since introduction of ABD ARC they really make no sense: only headers
referenced by small number of buffers are not evictable, and they are
really not evictable no matter what this code do. Instead just call
arc_prune_async() if too much metadata appear not evictable.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Allan Jude <allan@klarasystems.com>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored by: iXsystems, Inc.
Closes #14359
2023-03-08 19:17:23 +00:00
|
|
|
info_form = ('ARC: {0} ({1}) MFU: {2} MRU: {3} META: {4} '
|
|
|
|
'DNODE {5} ({6})')
|
2018-08-22 16:35:20 +00:00
|
|
|
info_line = info_form.format(arc_size, arc_perc, mfu_size, mru_size,
|
More adaptive ARC eviction
Traditionally ARC adaptation was limited to MRU/MFU distribution. But
for years people with metadata-centric workload demanded mechanisms to
also manage data/metadata distribution, that in original ZFS was just
a FIFO. As result ZFS effectively got separate states for data and
metadata, minimum and maximum metadata limits etc, but it all required
manual tuning, was not adaptive and in its heart remained a bad FIFO.
This change removes most of existing eviction logic, rewriting it from
scratch. This makes MRU/MFU adaptation individual for data and meta-
data, same as the distribution between data and metadata themselves.
Since most of required states separation was already done, it only
required to make arcs_size state field specific per data/metadata.
The adaptation logic is still based on previous concept of ghost hits,
just now it balances ARC capacity between 4 states: MRU data, MRU
metadata, MFU data and MFU metadata. To simplify arc_c changes instead
of arc_p measured in bytes, this code uses 3 variable arc_meta, arc_pd
and arc_pm, representing ARC balance between metadata and data, MRU and
MFU for data, and MRU and MFU for metadata respectively as 32-bit fixed
point fractions. Since we care about the math result only when need to
evict, this moves all the logic from arc_adapt() to arc_evict(), that
reduces per-block overhead, since per-block operations are limited to
stats collection, now moved from arc_adapt() to arc_access() and using
cheaper wmsums. This also allows to remove ugly ARC_HDR_DO_ADAPT flag
from many places.
This change also removes number of metadata specific tunables, part of
which were actually not functioning correctly, since not all metadata
are equal and some (like L2ARC headers) are not really evictable.
Instead it introduced single opaque knob zfs_arc_meta_balance, tuning
ARC's reaction on ghost hits, allowing administrator give more or less
preference to metadata without setting strict limits.
Some of old code parts like arc_evict_meta() are just removed, because
since introduction of ABD ARC they really make no sense: only headers
referenced by small number of buffers are not evictable, and they are
really not evictable no matter what this code do. Instead just call
arc_prune_async() if too much metadata appear not evictable.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Allan Jude <allan@klarasystems.com>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored by: iXsystems, Inc.
Closes #14359
2023-03-08 19:17:23 +00:00
|
|
|
meta_size, dnode_size, dnode_limit)
|
2018-02-28 16:52:34 +00:00
|
|
|
info_spc = ' '*int((GRAPH_WIDTH-len(info_line))/2)
|
|
|
|
info_line = GRAPH_INDENT+info_spc+info_line
|
|
|
|
|
|
|
|
graph_line = GRAPH_INDENT+'+'+('-'*(GRAPH_WIDTH-2))+'+'
|
|
|
|
|
|
|
|
mfu_perc = float(int(arc_stats['mfu_size'])/int(arc_stats['c_max']))
|
|
|
|
mru_perc = float(int(arc_stats['mru_size'])/int(arc_stats['c_max']))
|
|
|
|
arc_perc = float(int(arc_stats['size'])/int(arc_stats['c_max']))
|
|
|
|
total_ticks = float(arc_perc)*GRAPH_WIDTH
|
|
|
|
mfu_ticks = mfu_perc*GRAPH_WIDTH
|
|
|
|
mru_ticks = mru_perc*GRAPH_WIDTH
|
|
|
|
other_ticks = total_ticks-(mfu_ticks+mru_ticks)
|
|
|
|
|
|
|
|
core_form = 'F'*int(mfu_ticks)+'R'*int(mru_ticks)+'O'*int(other_ticks)
|
|
|
|
core_spc = ' '*(GRAPH_WIDTH-(2+len(core_form)))
|
|
|
|
core_line = GRAPH_INDENT+'|'+core_form+core_spc+'|'
|
|
|
|
|
|
|
|
for line in ('', info_line, graph_line, core_line, graph_line, ''):
|
|
|
|
print(line)
|
|
|
|
|
|
|
|
|
|
|
|
def f_bytes(byte_string):
|
|
|
|
"""Return human-readable representation of a byte value in
|
|
|
|
powers of 2 (eg "KiB" for "kibibytes", etc) to two decimal
|
|
|
|
points. Values smaller than one KiB are returned without
|
|
|
|
decimal points. Note "bytes" is a reserved keyword.
|
|
|
|
"""
|
|
|
|
|
|
|
|
prefixes = ([2**80, "YiB"], # yobibytes (yotta)
|
|
|
|
[2**70, "ZiB"], # zebibytes (zetta)
|
|
|
|
[2**60, "EiB"], # exbibytes (exa)
|
|
|
|
[2**50, "PiB"], # pebibytes (peta)
|
|
|
|
[2**40, "TiB"], # tebibytes (tera)
|
|
|
|
[2**30, "GiB"], # gibibytes (giga)
|
|
|
|
[2**20, "MiB"], # mebibytes (mega)
|
|
|
|
[2**10, "KiB"]) # kibibytes (kilo)
|
|
|
|
|
|
|
|
bites = int(byte_string)
|
|
|
|
|
|
|
|
if bites >= 2**10:
|
|
|
|
for limit, unit in prefixes:
|
|
|
|
|
|
|
|
if bites >= limit:
|
|
|
|
value = bites / limit
|
|
|
|
break
|
|
|
|
|
|
|
|
result = '{0:.1f} {1}'.format(value, unit)
|
|
|
|
else:
|
|
|
|
result = '{0} Bytes'.format(bites)
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def f_hits(hits_string):
|
|
|
|
"""Create a human-readable representation of the number of hits.
|
|
|
|
The single-letter symbols used are SI to avoid the confusion caused
|
|
|
|
by the different "short scale" and "long scale" representations in
|
|
|
|
English, which use the same words for different values. See
|
|
|
|
https://en.wikipedia.org/wiki/Names_of_large_numbers and:
|
|
|
|
https://physics.nist.gov/cuu/Units/prefixes.html
|
|
|
|
"""
|
|
|
|
|
|
|
|
numbers = ([10**24, 'Y'], # yotta (septillion)
|
|
|
|
[10**21, 'Z'], # zetta (sextillion)
|
|
|
|
[10**18, 'E'], # exa (quintrillion)
|
|
|
|
[10**15, 'P'], # peta (quadrillion)
|
|
|
|
[10**12, 'T'], # tera (trillion)
|
|
|
|
[10**9, 'G'], # giga (billion)
|
|
|
|
[10**6, 'M'], # mega (million)
|
|
|
|
[10**3, 'k']) # kilo (thousand)
|
|
|
|
|
|
|
|
hits = int(hits_string)
|
|
|
|
|
|
|
|
if hits >= 1000:
|
|
|
|
for limit, symbol in numbers:
|
|
|
|
|
|
|
|
if hits >= limit:
|
|
|
|
value = hits/limit
|
|
|
|
break
|
|
|
|
|
|
|
|
result = "%0.1f%s" % (value, symbol)
|
|
|
|
else:
|
|
|
|
result = "%d" % hits
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def f_perc(value1, value2):
|
|
|
|
"""Calculate percentage and return in human-readable form. If
|
|
|
|
rounding produces the result '0.0' though the first number is
|
|
|
|
not zero, include a 'less-than' symbol to avoid confusion.
|
|
|
|
Division by zero is handled by returning 'n/a'; no error
|
|
|
|
is called.
|
|
|
|
"""
|
|
|
|
|
|
|
|
v1 = float(value1)
|
|
|
|
v2 = float(value2)
|
|
|
|
|
|
|
|
try:
|
|
|
|
perc = 100 * v1/v2
|
|
|
|
except ZeroDivisionError:
|
|
|
|
result = 'n/a'
|
|
|
|
else:
|
|
|
|
result = '{0:0.1f} %'.format(perc)
|
|
|
|
|
|
|
|
if result == '0.0 %' and v1 > 0:
|
|
|
|
result = '< 0.1 %'
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def format_raw_line(name, value):
|
|
|
|
"""For the --raw option for the tunable and SPL outputs, decide on the
|
|
|
|
correct formatting based on the --alternate flag.
|
|
|
|
"""
|
|
|
|
|
|
|
|
if ARGS.alt:
|
|
|
|
result = '{0}{1}={2}'.format(INDENT, name, value)
|
|
|
|
else:
|
2020-12-08 20:20:25 +00:00
|
|
|
# Right-align the value within the line length if it fits,
|
|
|
|
# otherwise just separate it from the name by a single space.
|
|
|
|
fit = LINE_LENGTH - len(INDENT) - len(name)
|
|
|
|
overflow = len(value) + 1
|
|
|
|
w = max(fit, overflow)
|
|
|
|
result = '{0}{1}{2:>{w}}'.format(INDENT, name, value, w=w)
|
2018-02-28 16:52:34 +00:00
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def get_kstats():
|
2019-11-11 17:24:04 +00:00
|
|
|
"""Collect information on the ZFS subsystem. The step does not perform any
|
|
|
|
further processing, giving us the option to only work on what is actually
|
|
|
|
needed. The name "kstat" is a holdover from the Solaris utility of the same
|
|
|
|
name.
|
2018-02-28 16:52:34 +00:00
|
|
|
"""
|
|
|
|
|
|
|
|
result = {}
|
|
|
|
|
2019-09-10 20:27:53 +00:00
|
|
|
for section in SECTION_PATHS.values():
|
|
|
|
if section not in result:
|
|
|
|
result[section] = load_kstats(section)
|
2018-02-28 16:52:34 +00:00
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def get_version(request):
|
|
|
|
"""Get the version number of ZFS or SPL on this machine for header.
|
|
|
|
Returns an error string, but does not raise an error, if we can't
|
2019-09-10 20:27:53 +00:00
|
|
|
get the ZFS/SPL version.
|
2018-02-28 16:52:34 +00:00
|
|
|
"""
|
|
|
|
|
|
|
|
if request not in ('spl', 'zfs'):
|
|
|
|
error_msg = '(ERROR: "{0}" requested)'.format(request)
|
|
|
|
return error_msg
|
|
|
|
|
2019-09-10 20:27:53 +00:00
|
|
|
return get_version_impl(request)
|
2018-02-28 16:52:34 +00:00
|
|
|
|
|
|
|
|
|
|
|
def print_header():
|
|
|
|
"""Print the initial heading with date and time as well as info on the
|
2019-11-11 17:24:04 +00:00
|
|
|
kernel and ZFS versions. This is not called for the graph.
|
2018-02-28 16:52:34 +00:00
|
|
|
"""
|
|
|
|
|
|
|
|
# datetime is now recommended over time but we keep the exact formatting
|
pyzfs: python3 support (build system)
Almost all of the Python code in the respository has been updated
to be compatibile with Python 2.6, Python 3.4, or newer. The only
exceptions are arc_summery3.py which requires Python 3, and pyzfs
which requires at least Python 2.7. This allows us to maintain a
single version of the code and support most default versions of
python. This change does the following:
* Sets the default shebang for all Python scripts to python3. If
only Python 2 is available, then at install time scripts which
are compatible with Python 2 will have their shebangs replaced
with /usr/bin/python. This is done for compatibility until
Python 2 goes end of life. Since only the installed versions
are changed this means Python 3 must be installed on the system
for test-runner when testing in-tree.
* Added --with-python=<2|3|3.4,etc> configure option which sets
the PYTHON environment variable to target a specific python
version. By default the newest installed version of Python
will be used or the preferred distribution version when
creating pacakges.
* Fixed --enable-pyzfs configure checks so they are run when
--enable-pyzfs=check and --enable-pyzfs=yes.
* Enabled pyzfs for Python 3.4 and newer, which is now supported.
* Renamed pyzfs package to python<VERSION>-pyzfs and updated to
install in the appropriate site location. For example, when
building with --with-python=3.4 a python34-pyzfs will be
created which installs in /usr/lib/python3.4/site-packages/.
* Renamed the following python scripts according to the Fedora
guidance for packaging utilities in /bin
- dbufstat.py -> dbufstat
- arcstat.py -> arcstat
- arc_summary.py -> arc_summary
- arc_summary3.py -> arc_summary3
* Updated python-cffi package name. On CentOS 6, CentOS 7, and
Amazon Linux it's called python-cffi, not python2-cffi. For
Python3 it's called python3-cffi or python3x-cffi.
* Install one version of arc_summary. Depending on the version
of Python available install either arc_summary2 or arc_summary3
as arc_summary. The user output is only slightly different.
Reviewed-by: John Ramsden <johnramsden@riseup.net>
Reviewed-by: Neal Gompa <ngompa@datto.com>
Reviewed-by: loli10K <ezomori.nozomu@gmail.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #8096
2018-10-31 16:22:59 +00:00
|
|
|
# from the older version of arc_summary in case there are scripts
|
2018-02-28 16:52:34 +00:00
|
|
|
# that expect it in this way
|
|
|
|
daydate = time.strftime(DATE_FORMAT)
|
|
|
|
spc_date = LINE_LENGTH-len(daydate)
|
|
|
|
sys_version = os.uname()
|
|
|
|
|
|
|
|
sys_msg = sys_version.sysname+' '+sys_version.release
|
|
|
|
zfs = get_version('zfs')
|
|
|
|
spc_zfs = LINE_LENGTH-len(zfs)
|
|
|
|
|
|
|
|
machine_msg = 'Machine: '+sys_version.nodename+' ('+sys_version.machine+')'
|
|
|
|
spl = get_version('spl')
|
|
|
|
spc_spl = LINE_LENGTH-len(spl)
|
|
|
|
|
|
|
|
print('\n'+('-'*LINE_LENGTH))
|
|
|
|
print('{0:<{spc}}{1}'.format(TITLE, daydate, spc=spc_date))
|
|
|
|
print('{0:<{spc}}{1}'.format(sys_msg, zfs, spc=spc_zfs))
|
|
|
|
print('{0:<{spc}}{1}\n'.format(machine_msg, spl, spc=spc_spl))
|
|
|
|
|
|
|
|
|
|
|
|
def print_raw(kstats_dict):
|
|
|
|
"""Print all available data from the system in a minimally sorted format.
|
|
|
|
This can be used as a source to be piped through 'grep'.
|
|
|
|
"""
|
|
|
|
|
|
|
|
sections = sorted(kstats_dict.keys())
|
|
|
|
|
|
|
|
for section in sections:
|
|
|
|
|
|
|
|
print('\n{0}:'.format(section.upper()))
|
|
|
|
lines = sorted(kstats_dict[section])
|
|
|
|
|
|
|
|
for line in lines:
|
|
|
|
name, value = cleanup_line(line)
|
|
|
|
print(format_raw_line(name, value))
|
|
|
|
|
|
|
|
# Tunables and SPL must be handled separately because they come from a
|
|
|
|
# different source and have descriptions the user might request
|
|
|
|
print()
|
|
|
|
section_spl()
|
|
|
|
section_tunables()
|
|
|
|
|
|
|
|
|
|
|
|
def isolate_section(section_name, kstats_dict):
|
|
|
|
"""From the complete information on all sections, retrieve only those
|
|
|
|
for one section.
|
|
|
|
"""
|
|
|
|
|
|
|
|
try:
|
|
|
|
section_data = kstats_dict[section_name]
|
|
|
|
except KeyError:
|
|
|
|
print('ERROR: Data on {0} not available'.format(section_data))
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
section_dict = dict(cleanup_line(l) for l in section_data)
|
|
|
|
|
|
|
|
return section_dict
|
|
|
|
|
|
|
|
|
|
|
|
# Formatted output helper functions
|
|
|
|
|
|
|
|
|
|
|
|
def prt_1(text, value):
|
|
|
|
"""Print text and one value, no indent"""
|
|
|
|
spc = ' '*(LINE_LENGTH-(len(text)+len(value)))
|
|
|
|
print('{0}{spc}{1}'.format(text, value, spc=spc))
|
|
|
|
|
|
|
|
|
|
|
|
def prt_i1(text, value):
|
|
|
|
"""Print text and one value, with indent"""
|
|
|
|
spc = ' '*(LINE_LENGTH-(len(INDENT)+len(text)+len(value)))
|
|
|
|
print(INDENT+'{0}{spc}{1}'.format(text, value, spc=spc))
|
|
|
|
|
|
|
|
|
|
|
|
def prt_2(text, value1, value2):
|
|
|
|
"""Print text and two values, no indent"""
|
|
|
|
values = '{0:>9} {1:>9}'.format(value1, value2)
|
|
|
|
spc = ' '*(LINE_LENGTH-(len(text)+len(values)+2))
|
|
|
|
print('{0}{spc} {1}'.format(text, values, spc=spc))
|
|
|
|
|
|
|
|
|
|
|
|
def prt_i2(text, value1, value2):
|
|
|
|
"""Print text and two values, with indent"""
|
|
|
|
values = '{0:>9} {1:>9}'.format(value1, value2)
|
|
|
|
spc = ' '*(LINE_LENGTH-(len(INDENT)+len(text)+len(values)+2))
|
|
|
|
print(INDENT+'{0}{spc} {1}'.format(text, values, spc=spc))
|
|
|
|
|
|
|
|
|
|
|
|
# The section output concentrates on important parameters instead of
|
|
|
|
# being exhaustive (that is what the --raw parameter is for)
|
|
|
|
|
|
|
|
|
|
|
|
def section_arc(kstats_dict):
|
|
|
|
"""Give basic information on the ARC, MRU and MFU. This is the first
|
|
|
|
and most used section.
|
|
|
|
"""
|
|
|
|
|
|
|
|
arc_stats = isolate_section('arcstats', kstats_dict)
|
|
|
|
|
|
|
|
throttle = arc_stats['memory_throttle_count']
|
|
|
|
|
|
|
|
if throttle == '0':
|
|
|
|
health = 'HEALTHY'
|
|
|
|
else:
|
|
|
|
health = 'THROTTLED'
|
|
|
|
|
|
|
|
prt_1('ARC status:', health)
|
|
|
|
prt_i1('Memory throttle count:', throttle)
|
|
|
|
print()
|
|
|
|
|
|
|
|
arc_size = arc_stats['size']
|
|
|
|
arc_target_size = arc_stats['c']
|
|
|
|
arc_max = arc_stats['c_max']
|
|
|
|
arc_min = arc_stats['c_min']
|
More adaptive ARC eviction
Traditionally ARC adaptation was limited to MRU/MFU distribution. But
for years people with metadata-centric workload demanded mechanisms to
also manage data/metadata distribution, that in original ZFS was just
a FIFO. As result ZFS effectively got separate states for data and
metadata, minimum and maximum metadata limits etc, but it all required
manual tuning, was not adaptive and in its heart remained a bad FIFO.
This change removes most of existing eviction logic, rewriting it from
scratch. This makes MRU/MFU adaptation individual for data and meta-
data, same as the distribution between data and metadata themselves.
Since most of required states separation was already done, it only
required to make arcs_size state field specific per data/metadata.
The adaptation logic is still based on previous concept of ghost hits,
just now it balances ARC capacity between 4 states: MRU data, MRU
metadata, MFU data and MFU metadata. To simplify arc_c changes instead
of arc_p measured in bytes, this code uses 3 variable arc_meta, arc_pd
and arc_pm, representing ARC balance between metadata and data, MRU and
MFU for data, and MRU and MFU for metadata respectively as 32-bit fixed
point fractions. Since we care about the math result only when need to
evict, this moves all the logic from arc_adapt() to arc_evict(), that
reduces per-block overhead, since per-block operations are limited to
stats collection, now moved from arc_adapt() to arc_access() and using
cheaper wmsums. This also allows to remove ugly ARC_HDR_DO_ADAPT flag
from many places.
This change also removes number of metadata specific tunables, part of
which were actually not functioning correctly, since not all metadata
are equal and some (like L2ARC headers) are not really evictable.
Instead it introduced single opaque knob zfs_arc_meta_balance, tuning
ARC's reaction on ghost hits, allowing administrator give more or less
preference to metadata without setting strict limits.
Some of old code parts like arc_evict_meta() are just removed, because
since introduction of ABD ARC they really make no sense: only headers
referenced by small number of buffers are not evictable, and they are
really not evictable no matter what this code do. Instead just call
arc_prune_async() if too much metadata appear not evictable.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Allan Jude <allan@klarasystems.com>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored by: iXsystems, Inc.
Closes #14359
2023-03-08 19:17:23 +00:00
|
|
|
meta = arc_stats['meta']
|
|
|
|
pd = arc_stats['pd']
|
|
|
|
pm = arc_stats['pm']
|
|
|
|
anon_data = arc_stats['anon_data']
|
|
|
|
anon_metadata = arc_stats['anon_metadata']
|
|
|
|
mfu_data = arc_stats['mfu_data']
|
|
|
|
mfu_metadata = arc_stats['mfu_metadata']
|
|
|
|
mru_data = arc_stats['mru_data']
|
|
|
|
mru_metadata = arc_stats['mru_metadata']
|
|
|
|
mfug_data = arc_stats['mfu_ghost_data']
|
|
|
|
mfug_metadata = arc_stats['mfu_ghost_metadata']
|
|
|
|
mrug_data = arc_stats['mru_ghost_data']
|
|
|
|
mrug_metadata = arc_stats['mru_ghost_metadata']
|
|
|
|
unc_data = arc_stats['uncached_data']
|
|
|
|
unc_metadata = arc_stats['uncached_metadata']
|
|
|
|
bonus_size = arc_stats['bonus_size']
|
2018-08-22 16:35:20 +00:00
|
|
|
dnode_limit = arc_stats['arc_dnode_limit']
|
|
|
|
dnode_size = arc_stats['dnode_size']
|
More adaptive ARC eviction
Traditionally ARC adaptation was limited to MRU/MFU distribution. But
for years people with metadata-centric workload demanded mechanisms to
also manage data/metadata distribution, that in original ZFS was just
a FIFO. As result ZFS effectively got separate states for data and
metadata, minimum and maximum metadata limits etc, but it all required
manual tuning, was not adaptive and in its heart remained a bad FIFO.
This change removes most of existing eviction logic, rewriting it from
scratch. This makes MRU/MFU adaptation individual for data and meta-
data, same as the distribution between data and metadata themselves.
Since most of required states separation was already done, it only
required to make arcs_size state field specific per data/metadata.
The adaptation logic is still based on previous concept of ghost hits,
just now it balances ARC capacity between 4 states: MRU data, MRU
metadata, MFU data and MFU metadata. To simplify arc_c changes instead
of arc_p measured in bytes, this code uses 3 variable arc_meta, arc_pd
and arc_pm, representing ARC balance between metadata and data, MRU and
MFU for data, and MRU and MFU for metadata respectively as 32-bit fixed
point fractions. Since we care about the math result only when need to
evict, this moves all the logic from arc_adapt() to arc_evict(), that
reduces per-block overhead, since per-block operations are limited to
stats collection, now moved from arc_adapt() to arc_access() and using
cheaper wmsums. This also allows to remove ugly ARC_HDR_DO_ADAPT flag
from many places.
This change also removes number of metadata specific tunables, part of
which were actually not functioning correctly, since not all metadata
are equal and some (like L2ARC headers) are not really evictable.
Instead it introduced single opaque knob zfs_arc_meta_balance, tuning
ARC's reaction on ghost hits, allowing administrator give more or less
preference to metadata without setting strict limits.
Some of old code parts like arc_evict_meta() are just removed, because
since introduction of ABD ARC they really make no sense: only headers
referenced by small number of buffers are not evictable, and they are
really not evictable no matter what this code do. Instead just call
arc_prune_async() if too much metadata appear not evictable.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Allan Jude <allan@klarasystems.com>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored by: iXsystems, Inc.
Closes #14359
2023-03-08 19:17:23 +00:00
|
|
|
dbuf_size = arc_stats['dbuf_size']
|
|
|
|
hdr_size = arc_stats['hdr_size']
|
|
|
|
l2_hdr_size = arc_stats['l2_hdr_size']
|
|
|
|
abd_chunk_waste_size = arc_stats['abd_chunk_waste_size']
|
2018-02-28 16:52:34 +00:00
|
|
|
target_size_ratio = '{0}:1'.format(int(arc_max) // int(arc_min))
|
|
|
|
|
|
|
|
prt_2('ARC size (current):',
|
|
|
|
f_perc(arc_size, arc_max), f_bytes(arc_size))
|
|
|
|
prt_i2('Target size (adaptive):',
|
|
|
|
f_perc(arc_target_size, arc_max), f_bytes(arc_target_size))
|
|
|
|
prt_i2('Min size (hard limit):',
|
|
|
|
f_perc(arc_min, arc_max), f_bytes(arc_min))
|
|
|
|
prt_i2('Max size (high water):',
|
|
|
|
target_size_ratio, f_bytes(arc_max))
|
More adaptive ARC eviction
Traditionally ARC adaptation was limited to MRU/MFU distribution. But
for years people with metadata-centric workload demanded mechanisms to
also manage data/metadata distribution, that in original ZFS was just
a FIFO. As result ZFS effectively got separate states for data and
metadata, minimum and maximum metadata limits etc, but it all required
manual tuning, was not adaptive and in its heart remained a bad FIFO.
This change removes most of existing eviction logic, rewriting it from
scratch. This makes MRU/MFU adaptation individual for data and meta-
data, same as the distribution between data and metadata themselves.
Since most of required states separation was already done, it only
required to make arcs_size state field specific per data/metadata.
The adaptation logic is still based on previous concept of ghost hits,
just now it balances ARC capacity between 4 states: MRU data, MRU
metadata, MFU data and MFU metadata. To simplify arc_c changes instead
of arc_p measured in bytes, this code uses 3 variable arc_meta, arc_pd
and arc_pm, representing ARC balance between metadata and data, MRU and
MFU for data, and MRU and MFU for metadata respectively as 32-bit fixed
point fractions. Since we care about the math result only when need to
evict, this moves all the logic from arc_adapt() to arc_evict(), that
reduces per-block overhead, since per-block operations are limited to
stats collection, now moved from arc_adapt() to arc_access() and using
cheaper wmsums. This also allows to remove ugly ARC_HDR_DO_ADAPT flag
from many places.
This change also removes number of metadata specific tunables, part of
which were actually not functioning correctly, since not all metadata
are equal and some (like L2ARC headers) are not really evictable.
Instead it introduced single opaque knob zfs_arc_meta_balance, tuning
ARC's reaction on ghost hits, allowing administrator give more or less
preference to metadata without setting strict limits.
Some of old code parts like arc_evict_meta() are just removed, because
since introduction of ABD ARC they really make no sense: only headers
referenced by small number of buffers are not evictable, and they are
really not evictable no matter what this code do. Instead just call
arc_prune_async() if too much metadata appear not evictable.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Allan Jude <allan@klarasystems.com>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored by: iXsystems, Inc.
Closes #14359
2023-03-08 19:17:23 +00:00
|
|
|
caches_size = int(anon_data)+int(anon_metadata)+\
|
|
|
|
int(mfu_data)+int(mfu_metadata)+int(mru_data)+int(mru_metadata)+\
|
|
|
|
int(unc_data)+int(unc_metadata)
|
|
|
|
prt_i2('Anonymous data size:',
|
|
|
|
f_perc(anon_data, caches_size), f_bytes(anon_data))
|
|
|
|
prt_i2('Anonymous metadata size:',
|
|
|
|
f_perc(anon_metadata, caches_size), f_bytes(anon_metadata))
|
|
|
|
s = 4294967296
|
|
|
|
v = (s-int(pd))*(s-int(meta))/s
|
|
|
|
prt_i2('MFU data target:', f_perc(v, s),
|
|
|
|
f_bytes(v / 65536 * caches_size / 65536))
|
|
|
|
prt_i2('MFU data size:',
|
|
|
|
f_perc(mfu_data, caches_size), f_bytes(mfu_data))
|
|
|
|
prt_i1('MFU ghost data size:', f_bytes(mfug_data))
|
|
|
|
v = (s-int(pm))*int(meta)/s
|
|
|
|
prt_i2('MFU metadata target:', f_perc(v, s),
|
|
|
|
f_bytes(v / 65536 * caches_size / 65536))
|
|
|
|
prt_i2('MFU metadata size:',
|
|
|
|
f_perc(mfu_metadata, caches_size), f_bytes(mfu_metadata))
|
|
|
|
prt_i1('MFU ghost metadata size:', f_bytes(mfug_metadata))
|
|
|
|
v = int(pd)*(s-int(meta))/s
|
|
|
|
prt_i2('MRU data target:', f_perc(v, s),
|
|
|
|
f_bytes(v / 65536 * caches_size / 65536))
|
|
|
|
prt_i2('MRU data size:',
|
|
|
|
f_perc(mru_data, caches_size), f_bytes(mru_data))
|
|
|
|
prt_i1('MRU ghost data size:', f_bytes(mrug_data))
|
|
|
|
v = int(pm)*int(meta)/s
|
|
|
|
prt_i2('MRU metadata target:', f_perc(v, s),
|
|
|
|
f_bytes(v / 65536 * caches_size / 65536))
|
|
|
|
prt_i2('MRU metadata size:',
|
|
|
|
f_perc(mru_metadata, caches_size), f_bytes(mru_metadata))
|
|
|
|
prt_i1('MRU ghost metadata size:', f_bytes(mrug_metadata))
|
2023-01-05 17:29:13 +00:00
|
|
|
prt_i2('Uncached data size:',
|
More adaptive ARC eviction
Traditionally ARC adaptation was limited to MRU/MFU distribution. But
for years people with metadata-centric workload demanded mechanisms to
also manage data/metadata distribution, that in original ZFS was just
a FIFO. As result ZFS effectively got separate states for data and
metadata, minimum and maximum metadata limits etc, but it all required
manual tuning, was not adaptive and in its heart remained a bad FIFO.
This change removes most of existing eviction logic, rewriting it from
scratch. This makes MRU/MFU adaptation individual for data and meta-
data, same as the distribution between data and metadata themselves.
Since most of required states separation was already done, it only
required to make arcs_size state field specific per data/metadata.
The adaptation logic is still based on previous concept of ghost hits,
just now it balances ARC capacity between 4 states: MRU data, MRU
metadata, MFU data and MFU metadata. To simplify arc_c changes instead
of arc_p measured in bytes, this code uses 3 variable arc_meta, arc_pd
and arc_pm, representing ARC balance between metadata and data, MRU and
MFU for data, and MRU and MFU for metadata respectively as 32-bit fixed
point fractions. Since we care about the math result only when need to
evict, this moves all the logic from arc_adapt() to arc_evict(), that
reduces per-block overhead, since per-block operations are limited to
stats collection, now moved from arc_adapt() to arc_access() and using
cheaper wmsums. This also allows to remove ugly ARC_HDR_DO_ADAPT flag
from many places.
This change also removes number of metadata specific tunables, part of
which were actually not functioning correctly, since not all metadata
are equal and some (like L2ARC headers) are not really evictable.
Instead it introduced single opaque knob zfs_arc_meta_balance, tuning
ARC's reaction on ghost hits, allowing administrator give more or less
preference to metadata without setting strict limits.
Some of old code parts like arc_evict_meta() are just removed, because
since introduction of ABD ARC they really make no sense: only headers
referenced by small number of buffers are not evictable, and they are
really not evictable no matter what this code do. Instead just call
arc_prune_async() if too much metadata appear not evictable.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Allan Jude <allan@klarasystems.com>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored by: iXsystems, Inc.
Closes #14359
2023-03-08 19:17:23 +00:00
|
|
|
f_perc(unc_data, caches_size), f_bytes(unc_data))
|
|
|
|
prt_i2('Uncached metadata size:',
|
|
|
|
f_perc(unc_metadata, caches_size), f_bytes(unc_metadata))
|
|
|
|
prt_i2('Bonus size:',
|
|
|
|
f_perc(bonus_size, arc_size), f_bytes(bonus_size))
|
|
|
|
prt_i2('Dnode cache target:',
|
|
|
|
f_perc(dnode_limit, arc_max), f_bytes(dnode_limit))
|
|
|
|
prt_i2('Dnode cache size:',
|
2018-08-22 16:35:20 +00:00
|
|
|
f_perc(dnode_size, dnode_limit), f_bytes(dnode_size))
|
More adaptive ARC eviction
Traditionally ARC adaptation was limited to MRU/MFU distribution. But
for years people with metadata-centric workload demanded mechanisms to
also manage data/metadata distribution, that in original ZFS was just
a FIFO. As result ZFS effectively got separate states for data and
metadata, minimum and maximum metadata limits etc, but it all required
manual tuning, was not adaptive and in its heart remained a bad FIFO.
This change removes most of existing eviction logic, rewriting it from
scratch. This makes MRU/MFU adaptation individual for data and meta-
data, same as the distribution between data and metadata themselves.
Since most of required states separation was already done, it only
required to make arcs_size state field specific per data/metadata.
The adaptation logic is still based on previous concept of ghost hits,
just now it balances ARC capacity between 4 states: MRU data, MRU
metadata, MFU data and MFU metadata. To simplify arc_c changes instead
of arc_p measured in bytes, this code uses 3 variable arc_meta, arc_pd
and arc_pm, representing ARC balance between metadata and data, MRU and
MFU for data, and MRU and MFU for metadata respectively as 32-bit fixed
point fractions. Since we care about the math result only when need to
evict, this moves all the logic from arc_adapt() to arc_evict(), that
reduces per-block overhead, since per-block operations are limited to
stats collection, now moved from arc_adapt() to arc_access() and using
cheaper wmsums. This also allows to remove ugly ARC_HDR_DO_ADAPT flag
from many places.
This change also removes number of metadata specific tunables, part of
which were actually not functioning correctly, since not all metadata
are equal and some (like L2ARC headers) are not really evictable.
Instead it introduced single opaque knob zfs_arc_meta_balance, tuning
ARC's reaction on ghost hits, allowing administrator give more or less
preference to metadata without setting strict limits.
Some of old code parts like arc_evict_meta() are just removed, because
since introduction of ABD ARC they really make no sense: only headers
referenced by small number of buffers are not evictable, and they are
really not evictable no matter what this code do. Instead just call
arc_prune_async() if too much metadata appear not evictable.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Allan Jude <allan@klarasystems.com>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored by: iXsystems, Inc.
Closes #14359
2023-03-08 19:17:23 +00:00
|
|
|
prt_i2('Dbuf size:',
|
|
|
|
f_perc(dbuf_size, arc_size), f_bytes(dbuf_size))
|
|
|
|
prt_i2('Header size:',
|
|
|
|
f_perc(hdr_size, arc_size), f_bytes(hdr_size))
|
|
|
|
prt_i2('L2 header size:',
|
|
|
|
f_perc(l2_hdr_size, arc_size), f_bytes(l2_hdr_size))
|
|
|
|
prt_i2('ABD chunk waste size:',
|
|
|
|
f_perc(abd_chunk_waste_size, arc_size), f_bytes(abd_chunk_waste_size))
|
2018-02-28 16:52:34 +00:00
|
|
|
print()
|
|
|
|
|
|
|
|
print('ARC hash breakdown:')
|
|
|
|
prt_i1('Elements max:', f_hits(arc_stats['hash_elements_max']))
|
|
|
|
prt_i2('Elements current:',
|
|
|
|
f_perc(arc_stats['hash_elements'], arc_stats['hash_elements_max']),
|
|
|
|
f_hits(arc_stats['hash_elements']))
|
|
|
|
prt_i1('Collisions:', f_hits(arc_stats['hash_collisions']))
|
|
|
|
|
|
|
|
prt_i1('Chain max:', f_hits(arc_stats['hash_chain_max']))
|
|
|
|
prt_i1('Chains:', f_hits(arc_stats['hash_chains']))
|
|
|
|
print()
|
|
|
|
|
|
|
|
print('ARC misc:')
|
|
|
|
prt_i1('Deleted:', f_hits(arc_stats['deleted']))
|
|
|
|
prt_i1('Mutex misses:', f_hits(arc_stats['mutex_miss']))
|
|
|
|
prt_i1('Eviction skips:', f_hits(arc_stats['evict_skip']))
|
Add L2ARC arcstats for MFU/MRU buffers and buffer content type
Currently the ARC state (MFU/MRU) of cached L2ARC buffer and their
content type is unknown. Knowing this information may prove beneficial
in adjusting the L2ARC caching policy.
This commit adds L2ARC arcstats that display the aligned size
(in bytes) of L2ARC buffers according to their content type
(data/metadata) and according to their ARC state (MRU/MFU or
prefetch). It also expands the existing evict_l2_eligible arcstat to
differentiate between MFU and MRU buffers.
L2ARC caches buffers from the MRU and MFU lists of ARC. Upon caching a
buffer, its ARC state (MRU/MFU) is stored in the L2 header
(b_arcs_state). The l2_m{f,r}u_asize arcstats reflect the aligned size
(in bytes) of L2ARC buffers according to their ARC state (based on
b_arcs_state). We also account for the case where an L2ARC and ARC
cached MRU or MRU_ghost buffer transitions to MFU. The l2_prefetch_asize
reflects the alinged size (in bytes) of L2ARC buffers that were cached
while they had the prefetch flag set in ARC. This is dynamically updated
as the prefetch flag of L2ARC buffers changes.
When buffers are evicted from ARC, if they are determined to be L2ARC
eligible then their logical size is recorded in
evict_l2_eligible_m{r,f}u arcstats according to their ARC state upon
eviction.
Persistent L2ARC:
When committing an L2ARC buffer to a log block (L2ARC metadata) its
b_arcs_state and prefetch flag is also stored. If the buffer changes
its arcstate or prefetch flag this is reflected in the above arcstats.
However, the L2ARC metadata cannot currently be updated to reflect this
change.
Example: L2ARC caches an MRU buffer. L2ARC metadata and arcstats count
this as an MRU buffer. The buffer transitions to MFU. The arcstats are
updated to reflect this. Upon pool re-import or on/offlining the L2ARC
device the arcstats are cleared and the buffer will now be counted as an
MRU buffer, as the L2ARC metadata were not updated.
Bug fix:
- If l2arc_noprefetch is set, arc_read_done clears the L2CACHE flag of
an ARC buffer. However, prefetches may be issued in a way that
arc_read_done() is bypassed. Instead, move the related code in
l2arc_write_eligible() to account for those cases too.
Also add a test and update manpages for l2arc_mfuonly module parameter,
and update the manpages and code comments for l2arc_noprefetch.
Move persist_l2arc tests to l2arc.
Reviewed-by: Ryan Moeller <freqlabs@FreeBSD.org>
Reviewed-by: Richard Elling <Richard.Elling@RichardElling.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: George Amanakis <gamanakis@gmail.com>
Closes #10743
2020-09-14 17:10:44 +00:00
|
|
|
prt_i1('Eviction skips due to L2 writes:',
|
|
|
|
f_hits(arc_stats['evict_l2_skip']))
|
|
|
|
prt_i1('L2 cached evictions:', f_bytes(arc_stats['evict_l2_cached']))
|
|
|
|
prt_i1('L2 eligible evictions:', f_bytes(arc_stats['evict_l2_eligible']))
|
|
|
|
prt_i2('L2 eligible MFU evictions:',
|
|
|
|
f_perc(arc_stats['evict_l2_eligible_mfu'],
|
|
|
|
arc_stats['evict_l2_eligible']),
|
|
|
|
f_bytes(arc_stats['evict_l2_eligible_mfu']))
|
|
|
|
prt_i2('L2 eligible MRU evictions:',
|
|
|
|
f_perc(arc_stats['evict_l2_eligible_mru'],
|
|
|
|
arc_stats['evict_l2_eligible']),
|
|
|
|
f_bytes(arc_stats['evict_l2_eligible_mru']))
|
|
|
|
prt_i1('L2 ineligible evictions:',
|
|
|
|
f_bytes(arc_stats['evict_l2_ineligible']))
|
2018-02-28 16:52:34 +00:00
|
|
|
print()
|
|
|
|
|
|
|
|
|
|
|
|
def section_archits(kstats_dict):
|
|
|
|
"""Print information on how the caches are accessed ("arc hits").
|
|
|
|
"""
|
|
|
|
|
|
|
|
arc_stats = isolate_section('arcstats', kstats_dict)
|
2023-01-05 17:29:13 +00:00
|
|
|
all_accesses = int(arc_stats['hits'])+int(arc_stats['iohits'])+\
|
|
|
|
int(arc_stats['misses'])
|
2018-02-28 16:52:34 +00:00
|
|
|
|
2023-01-05 17:29:13 +00:00
|
|
|
prt_1('ARC total accesses:', f_hits(all_accesses))
|
|
|
|
ta_todo = (('Total hits:', arc_stats['hits']),
|
|
|
|
('Total I/O hits:', arc_stats['iohits']),
|
|
|
|
('Total misses:', arc_stats['misses']))
|
2018-02-28 16:52:34 +00:00
|
|
|
for title, value in ta_todo:
|
|
|
|
prt_i2(title, f_perc(value, all_accesses), f_hits(value))
|
2023-01-05 17:29:13 +00:00
|
|
|
print()
|
2018-02-28 16:52:34 +00:00
|
|
|
|
|
|
|
dd_total = int(arc_stats['demand_data_hits']) +\
|
2023-01-05 17:29:13 +00:00
|
|
|
int(arc_stats['demand_data_iohits']) +\
|
2018-02-28 16:52:34 +00:00
|
|
|
int(arc_stats['demand_data_misses'])
|
2023-01-05 17:29:13 +00:00
|
|
|
prt_2('ARC demand data accesses:', f_perc(dd_total, all_accesses),
|
|
|
|
f_hits(dd_total))
|
|
|
|
dd_todo = (('Demand data hits:', arc_stats['demand_data_hits']),
|
|
|
|
('Demand data I/O hits:', arc_stats['demand_data_iohits']),
|
|
|
|
('Demand data misses:', arc_stats['demand_data_misses']))
|
|
|
|
for title, value in dd_todo:
|
|
|
|
prt_i2(title, f_perc(value, dd_total), f_hits(value))
|
|
|
|
print()
|
|
|
|
|
|
|
|
dm_total = int(arc_stats['demand_metadata_hits']) +\
|
|
|
|
int(arc_stats['demand_metadata_iohits']) +\
|
|
|
|
int(arc_stats['demand_metadata_misses'])
|
|
|
|
prt_2('ARC demand metadata accesses:', f_perc(dm_total, all_accesses),
|
|
|
|
f_hits(dm_total))
|
|
|
|
dm_todo = (('Demand metadata hits:', arc_stats['demand_metadata_hits']),
|
|
|
|
('Demand metadata I/O hits:',
|
|
|
|
arc_stats['demand_metadata_iohits']),
|
|
|
|
('Demand metadata misses:', arc_stats['demand_metadata_misses']))
|
|
|
|
for title, value in dm_todo:
|
|
|
|
prt_i2(title, f_perc(value, dm_total), f_hits(value))
|
|
|
|
print()
|
2018-02-28 16:52:34 +00:00
|
|
|
|
2023-01-05 17:29:13 +00:00
|
|
|
pd_total = int(arc_stats['prefetch_data_hits']) +\
|
|
|
|
int(arc_stats['prefetch_data_iohits']) +\
|
2018-02-28 16:52:34 +00:00
|
|
|
int(arc_stats['prefetch_data_misses'])
|
2023-01-05 17:29:13 +00:00
|
|
|
prt_2('ARC prefetch metadata accesses:', f_perc(pd_total, all_accesses),
|
|
|
|
f_hits(pd_total))
|
|
|
|
pd_todo = (('Prefetch data hits:', arc_stats['prefetch_data_hits']),
|
|
|
|
('Prefetch data I/O hits:', arc_stats['prefetch_data_iohits']),
|
|
|
|
('Prefetch data misses:', arc_stats['prefetch_data_misses']))
|
|
|
|
for title, value in pd_todo:
|
|
|
|
prt_i2(title, f_perc(value, pd_total), f_hits(value))
|
|
|
|
print()
|
2018-02-28 16:52:34 +00:00
|
|
|
|
2023-01-05 17:29:13 +00:00
|
|
|
pm_total = int(arc_stats['prefetch_metadata_hits']) +\
|
|
|
|
int(arc_stats['prefetch_metadata_iohits']) +\
|
|
|
|
int(arc_stats['prefetch_metadata_misses'])
|
|
|
|
prt_2('ARC prefetch metadata accesses:', f_perc(pm_total, all_accesses),
|
|
|
|
f_hits(pm_total))
|
|
|
|
pm_todo = (('Prefetch metadata hits:',
|
|
|
|
arc_stats['prefetch_metadata_hits']),
|
|
|
|
('Prefetch metadata I/O hits:',
|
|
|
|
arc_stats['prefetch_metadata_iohits']),
|
|
|
|
('Prefetch metadata misses:',
|
|
|
|
arc_stats['prefetch_metadata_misses']))
|
|
|
|
for title, value in pm_todo:
|
|
|
|
prt_i2(title, f_perc(value, pm_total), f_hits(value))
|
|
|
|
print()
|
2018-02-28 16:52:34 +00:00
|
|
|
|
2023-01-05 17:29:13 +00:00
|
|
|
all_prefetches = int(arc_stats['predictive_prefetch'])+\
|
|
|
|
int(arc_stats['prescient_prefetch'])
|
|
|
|
prt_2('ARC predictive prefetches:',
|
|
|
|
f_perc(arc_stats['predictive_prefetch'], all_prefetches),
|
|
|
|
f_hits(arc_stats['predictive_prefetch']))
|
|
|
|
prt_i2('Demand hits after predictive:',
|
|
|
|
f_perc(arc_stats['demand_hit_predictive_prefetch'],
|
|
|
|
arc_stats['predictive_prefetch']),
|
|
|
|
f_hits(arc_stats['demand_hit_predictive_prefetch']))
|
|
|
|
prt_i2('Demand I/O hits after predictive:',
|
|
|
|
f_perc(arc_stats['demand_iohit_predictive_prefetch'],
|
|
|
|
arc_stats['predictive_prefetch']),
|
|
|
|
f_hits(arc_stats['demand_iohit_predictive_prefetch']))
|
|
|
|
never = int(arc_stats['predictive_prefetch']) -\
|
|
|
|
int(arc_stats['demand_hit_predictive_prefetch']) -\
|
|
|
|
int(arc_stats['demand_iohit_predictive_prefetch'])
|
|
|
|
prt_i2('Never demanded after predictive:',
|
|
|
|
f_perc(never, arc_stats['predictive_prefetch']),
|
|
|
|
f_hits(never))
|
|
|
|
print()
|
2018-02-28 16:52:34 +00:00
|
|
|
|
2023-01-05 17:29:13 +00:00
|
|
|
prt_2('ARC prescient prefetches:',
|
|
|
|
f_perc(arc_stats['prescient_prefetch'], all_prefetches),
|
|
|
|
f_hits(arc_stats['prescient_prefetch']))
|
|
|
|
prt_i2('Demand hits after prescient:',
|
|
|
|
f_perc(arc_stats['demand_hit_prescient_prefetch'],
|
|
|
|
arc_stats['prescient_prefetch']),
|
|
|
|
f_hits(arc_stats['demand_hit_prescient_prefetch']))
|
|
|
|
prt_i2('Demand I/O hits after prescient:',
|
|
|
|
f_perc(arc_stats['demand_iohit_prescient_prefetch'],
|
|
|
|
arc_stats['prescient_prefetch']),
|
|
|
|
f_hits(arc_stats['demand_iohit_prescient_prefetch']))
|
|
|
|
never = int(arc_stats['prescient_prefetch'])-\
|
|
|
|
int(arc_stats['demand_hit_prescient_prefetch'])-\
|
|
|
|
int(arc_stats['demand_iohit_prescient_prefetch'])
|
|
|
|
prt_i2('Never demanded after prescient:',
|
|
|
|
f_perc(never, arc_stats['prescient_prefetch']),
|
|
|
|
f_hits(never))
|
2018-02-28 16:52:34 +00:00
|
|
|
print()
|
2023-01-05 17:29:13 +00:00
|
|
|
|
|
|
|
print('ARC states hits of all accesses:')
|
2018-02-28 16:52:34 +00:00
|
|
|
cl_todo = (('Most frequently used (MFU):', arc_stats['mfu_hits']),
|
|
|
|
('Most recently used (MRU):', arc_stats['mru_hits']),
|
|
|
|
('Most frequently used (MFU) ghost:',
|
|
|
|
arc_stats['mfu_ghost_hits']),
|
|
|
|
('Most recently used (MRU) ghost:',
|
2023-01-05 17:29:13 +00:00
|
|
|
arc_stats['mru_ghost_hits']),
|
|
|
|
('Uncached:', arc_stats['uncached_hits']))
|
2018-02-28 16:52:34 +00:00
|
|
|
for title, value in cl_todo:
|
2023-01-05 17:29:13 +00:00
|
|
|
prt_i2(title, f_perc(value, all_accesses), f_hits(value))
|
2018-02-28 16:52:34 +00:00
|
|
|
print()
|
|
|
|
|
|
|
|
|
|
|
|
def section_dmu(kstats_dict):
|
|
|
|
"""Collect information on the DMU"""
|
|
|
|
|
|
|
|
zfetch_stats = isolate_section('zfetchstats', kstats_dict)
|
|
|
|
|
|
|
|
zfetch_access_total = int(zfetch_stats['hits'])+int(zfetch_stats['misses'])
|
|
|
|
|
2023-01-05 17:29:13 +00:00
|
|
|
prt_1('DMU predictive prefetcher calls:', f_hits(zfetch_access_total))
|
|
|
|
prt_i2('Stream hits:',
|
|
|
|
f_perc(zfetch_stats['hits'], zfetch_access_total),
|
2018-02-28 16:52:34 +00:00
|
|
|
f_hits(zfetch_stats['hits']))
|
2023-01-05 17:29:13 +00:00
|
|
|
prt_i2('Stream misses:',
|
|
|
|
f_perc(zfetch_stats['misses'], zfetch_access_total),
|
2018-02-28 16:52:34 +00:00
|
|
|
f_hits(zfetch_stats['misses']))
|
2023-01-05 17:29:13 +00:00
|
|
|
prt_i2('Streams limit reached:',
|
|
|
|
f_perc(zfetch_stats['max_streams'], zfetch_stats['misses']),
|
|
|
|
f_hits(zfetch_stats['max_streams']))
|
|
|
|
prt_i1('Prefetches issued', f_hits(zfetch_stats['io_issued']))
|
2018-02-28 16:52:34 +00:00
|
|
|
print()
|
|
|
|
|
|
|
|
|
|
|
|
def section_l2arc(kstats_dict):
|
|
|
|
"""Collect information on L2ARC device if present. If not, tell user
|
|
|
|
that we're skipping the section.
|
|
|
|
"""
|
|
|
|
|
|
|
|
# The L2ARC statistics live in the same section as the normal ARC stuff
|
|
|
|
arc_stats = isolate_section('arcstats', kstats_dict)
|
|
|
|
|
|
|
|
if arc_stats['l2_size'] == '0':
|
|
|
|
print('L2ARC not detected, skipping section\n')
|
|
|
|
return
|
|
|
|
|
|
|
|
l2_errors = int(arc_stats['l2_writes_error']) +\
|
|
|
|
int(arc_stats['l2_cksum_bad']) +\
|
|
|
|
int(arc_stats['l2_io_error'])
|
|
|
|
|
|
|
|
l2_access_total = int(arc_stats['l2_hits'])+int(arc_stats['l2_misses'])
|
|
|
|
health = 'HEALTHY'
|
|
|
|
|
|
|
|
if l2_errors > 0:
|
|
|
|
health = 'DEGRADED'
|
|
|
|
|
|
|
|
prt_1('L2ARC status:', health)
|
|
|
|
|
|
|
|
l2_todo = (('Low memory aborts:', 'l2_abort_lowmem'),
|
|
|
|
('Free on write:', 'l2_free_on_write'),
|
|
|
|
('R/W clashes:', 'l2_rw_clash'),
|
|
|
|
('Bad checksums:', 'l2_cksum_bad'),
|
2023-06-09 17:14:05 +00:00
|
|
|
('Read errors:', 'l2_io_error'),
|
|
|
|
('Write errors:', 'l2_writes_error'))
|
2018-02-28 16:52:34 +00:00
|
|
|
|
|
|
|
for title, value in l2_todo:
|
|
|
|
prt_i1(title, f_hits(arc_stats[value]))
|
|
|
|
|
|
|
|
print()
|
|
|
|
prt_1('L2ARC size (adaptive):', f_bytes(arc_stats['l2_size']))
|
|
|
|
prt_i2('Compressed:', f_perc(arc_stats['l2_asize'], arc_stats['l2_size']),
|
|
|
|
f_bytes(arc_stats['l2_asize']))
|
|
|
|
prt_i2('Header size:',
|
|
|
|
f_perc(arc_stats['l2_hdr_size'], arc_stats['l2_size']),
|
|
|
|
f_bytes(arc_stats['l2_hdr_size']))
|
Add L2ARC arcstats for MFU/MRU buffers and buffer content type
Currently the ARC state (MFU/MRU) of cached L2ARC buffer and their
content type is unknown. Knowing this information may prove beneficial
in adjusting the L2ARC caching policy.
This commit adds L2ARC arcstats that display the aligned size
(in bytes) of L2ARC buffers according to their content type
(data/metadata) and according to their ARC state (MRU/MFU or
prefetch). It also expands the existing evict_l2_eligible arcstat to
differentiate between MFU and MRU buffers.
L2ARC caches buffers from the MRU and MFU lists of ARC. Upon caching a
buffer, its ARC state (MRU/MFU) is stored in the L2 header
(b_arcs_state). The l2_m{f,r}u_asize arcstats reflect the aligned size
(in bytes) of L2ARC buffers according to their ARC state (based on
b_arcs_state). We also account for the case where an L2ARC and ARC
cached MRU or MRU_ghost buffer transitions to MFU. The l2_prefetch_asize
reflects the alinged size (in bytes) of L2ARC buffers that were cached
while they had the prefetch flag set in ARC. This is dynamically updated
as the prefetch flag of L2ARC buffers changes.
When buffers are evicted from ARC, if they are determined to be L2ARC
eligible then their logical size is recorded in
evict_l2_eligible_m{r,f}u arcstats according to their ARC state upon
eviction.
Persistent L2ARC:
When committing an L2ARC buffer to a log block (L2ARC metadata) its
b_arcs_state and prefetch flag is also stored. If the buffer changes
its arcstate or prefetch flag this is reflected in the above arcstats.
However, the L2ARC metadata cannot currently be updated to reflect this
change.
Example: L2ARC caches an MRU buffer. L2ARC metadata and arcstats count
this as an MRU buffer. The buffer transitions to MFU. The arcstats are
updated to reflect this. Upon pool re-import or on/offlining the L2ARC
device the arcstats are cleared and the buffer will now be counted as an
MRU buffer, as the L2ARC metadata were not updated.
Bug fix:
- If l2arc_noprefetch is set, arc_read_done clears the L2CACHE flag of
an ARC buffer. However, prefetches may be issued in a way that
arc_read_done() is bypassed. Instead, move the related code in
l2arc_write_eligible() to account for those cases too.
Also add a test and update manpages for l2arc_mfuonly module parameter,
and update the manpages and code comments for l2arc_noprefetch.
Move persist_l2arc tests to l2arc.
Reviewed-by: Ryan Moeller <freqlabs@FreeBSD.org>
Reviewed-by: Richard Elling <Richard.Elling@RichardElling.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: George Amanakis <gamanakis@gmail.com>
Closes #10743
2020-09-14 17:10:44 +00:00
|
|
|
prt_i2('MFU allocated size:',
|
|
|
|
f_perc(arc_stats['l2_mfu_asize'], arc_stats['l2_asize']),
|
|
|
|
f_bytes(arc_stats['l2_mfu_asize']))
|
|
|
|
prt_i2('MRU allocated size:',
|
|
|
|
f_perc(arc_stats['l2_mru_asize'], arc_stats['l2_asize']),
|
|
|
|
f_bytes(arc_stats['l2_mru_asize']))
|
|
|
|
prt_i2('Prefetch allocated size:',
|
|
|
|
f_perc(arc_stats['l2_prefetch_asize'], arc_stats['l2_asize']),
|
|
|
|
f_bytes(arc_stats['l2_prefetch_asize']))
|
|
|
|
prt_i2('Data (buffer content) allocated size:',
|
|
|
|
f_perc(arc_stats['l2_bufc_data_asize'], arc_stats['l2_asize']),
|
|
|
|
f_bytes(arc_stats['l2_bufc_data_asize']))
|
|
|
|
prt_i2('Metadata (buffer content) allocated size:',
|
|
|
|
f_perc(arc_stats['l2_bufc_metadata_asize'], arc_stats['l2_asize']),
|
|
|
|
f_bytes(arc_stats['l2_bufc_metadata_asize']))
|
2018-02-28 16:52:34 +00:00
|
|
|
|
|
|
|
print()
|
|
|
|
prt_1('L2ARC breakdown:', f_hits(l2_access_total))
|
|
|
|
prt_i2('Hit ratio:',
|
|
|
|
f_perc(arc_stats['l2_hits'], l2_access_total),
|
2019-12-04 21:24:56 +00:00
|
|
|
f_hits(arc_stats['l2_hits']))
|
2018-02-28 16:52:34 +00:00
|
|
|
prt_i2('Miss ratio:',
|
|
|
|
f_perc(arc_stats['l2_misses'], l2_access_total),
|
2019-12-04 21:24:56 +00:00
|
|
|
f_hits(arc_stats['l2_misses']))
|
2018-02-28 16:52:34 +00:00
|
|
|
|
|
|
|
print()
|
2023-06-09 17:14:05 +00:00
|
|
|
print('L2ARC I/O:')
|
|
|
|
prt_i2('Reads:',
|
|
|
|
f_bytes(arc_stats['l2_read_bytes']),
|
|
|
|
f_hits(arc_stats['l2_hits']))
|
|
|
|
prt_i2('Writes:',
|
|
|
|
f_bytes(arc_stats['l2_write_bytes']),
|
|
|
|
f_hits(arc_stats['l2_writes_sent']))
|
2018-02-28 16:52:34 +00:00
|
|
|
|
|
|
|
print()
|
|
|
|
print('L2ARC evicts:')
|
2023-06-09 17:14:05 +00:00
|
|
|
prt_i1('L1 cached:', f_hits(arc_stats['l2_evict_l1cached']))
|
|
|
|
prt_i1('While reading:', f_hits(arc_stats['l2_evict_reading']))
|
2018-02-28 16:52:34 +00:00
|
|
|
print()
|
|
|
|
|
|
|
|
|
|
|
|
def section_spl(*_):
|
|
|
|
"""Print the SPL parameters, if requested with alternative format
|
2019-08-30 16:43:30 +00:00
|
|
|
and/or descriptions. This does not use kstats.
|
2018-02-28 16:52:34 +00:00
|
|
|
"""
|
|
|
|
|
2019-11-30 23:43:23 +00:00
|
|
|
if sys.platform.startswith('freebsd'):
|
|
|
|
# No SPL support in FreeBSD
|
|
|
|
return
|
|
|
|
|
2019-09-10 20:27:53 +00:00
|
|
|
spls = get_spl_params()
|
2018-02-28 16:52:34 +00:00
|
|
|
keylist = sorted(spls.keys())
|
|
|
|
print('Solaris Porting Layer (SPL):')
|
|
|
|
|
|
|
|
if ARGS.desc:
|
|
|
|
descriptions = get_descriptions('spl')
|
|
|
|
|
|
|
|
for key in keylist:
|
|
|
|
value = spls[key]
|
|
|
|
|
|
|
|
if ARGS.desc:
|
|
|
|
try:
|
|
|
|
print(INDENT+'#', descriptions[key])
|
|
|
|
except KeyError:
|
2019-08-30 16:43:30 +00:00
|
|
|
print(INDENT+'# (No description found)') # paranoid
|
2018-02-28 16:52:34 +00:00
|
|
|
|
|
|
|
print(format_raw_line(key, value))
|
|
|
|
|
|
|
|
print()
|
|
|
|
|
|
|
|
|
|
|
|
def section_tunables(*_):
|
|
|
|
"""Print the tunables, if requested with alternative format and/or
|
2019-08-30 16:43:30 +00:00
|
|
|
descriptions. This does not use kstasts.
|
2018-02-28 16:52:34 +00:00
|
|
|
"""
|
|
|
|
|
2019-09-10 20:27:53 +00:00
|
|
|
tunables = get_tunable_params()
|
2018-02-28 16:52:34 +00:00
|
|
|
keylist = sorted(tunables.keys())
|
|
|
|
print('Tunables:')
|
|
|
|
|
|
|
|
if ARGS.desc:
|
|
|
|
descriptions = get_descriptions('zfs')
|
|
|
|
|
|
|
|
for key in keylist:
|
|
|
|
value = tunables[key]
|
|
|
|
|
|
|
|
if ARGS.desc:
|
|
|
|
try:
|
|
|
|
print(INDENT+'#', descriptions[key])
|
|
|
|
except KeyError:
|
2019-08-30 16:43:30 +00:00
|
|
|
print(INDENT+'# (No description found)') # paranoid
|
2018-02-28 16:52:34 +00:00
|
|
|
|
|
|
|
print(format_raw_line(key, value))
|
|
|
|
|
|
|
|
print()
|
|
|
|
|
|
|
|
|
|
|
|
def section_vdev(kstats_dict):
|
|
|
|
"""Collect information on VDEV caches"""
|
|
|
|
|
|
|
|
# Currently [Nov 2017] the VDEV cache is disabled, because it is actually
|
|
|
|
# harmful. When this is the case, we just skip the whole entry. See
|
2020-10-09 03:10:13 +00:00
|
|
|
# https://github.com/openzfs/zfs/blob/master/module/zfs/vdev_cache.c
|
2018-02-28 16:52:34 +00:00
|
|
|
# for details
|
2019-09-10 20:27:53 +00:00
|
|
|
tunables = get_vdev_params()
|
2018-02-28 16:52:34 +00:00
|
|
|
|
2019-11-11 17:24:04 +00:00
|
|
|
if tunables[VDEV_CACHE_SIZE] == '0':
|
2018-02-28 16:52:34 +00:00
|
|
|
print('VDEV cache disabled, skipping section\n')
|
|
|
|
return
|
|
|
|
|
|
|
|
vdev_stats = isolate_section('vdev_cache_stats', kstats_dict)
|
|
|
|
|
|
|
|
vdev_cache_total = int(vdev_stats['hits']) +\
|
|
|
|
int(vdev_stats['misses']) +\
|
|
|
|
int(vdev_stats['delegations'])
|
|
|
|
|
|
|
|
prt_1('VDEV cache summary:', f_hits(vdev_cache_total))
|
|
|
|
prt_i2('Hit ratio:', f_perc(vdev_stats['hits'], vdev_cache_total),
|
|
|
|
f_hits(vdev_stats['hits']))
|
|
|
|
prt_i2('Miss ratio:', f_perc(vdev_stats['misses'], vdev_cache_total),
|
|
|
|
f_hits(vdev_stats['misses']))
|
|
|
|
prt_i2('Delegations:', f_perc(vdev_stats['delegations'], vdev_cache_total),
|
|
|
|
f_hits(vdev_stats['delegations']))
|
|
|
|
print()
|
|
|
|
|
|
|
|
|
|
|
|
def section_zil(kstats_dict):
|
|
|
|
"""Collect information on the ZFS Intent Log. Some of the information
|
2020-10-09 03:10:13 +00:00
|
|
|
taken from https://github.com/openzfs/zfs/blob/master/include/sys/zil.h
|
2018-02-28 16:52:34 +00:00
|
|
|
"""
|
|
|
|
|
|
|
|
zil_stats = isolate_section('zil', kstats_dict)
|
|
|
|
|
|
|
|
prt_1('ZIL committed transactions:',
|
|
|
|
f_hits(zil_stats['zil_itx_count']))
|
|
|
|
prt_i1('Commit requests:', f_hits(zil_stats['zil_commit_count']))
|
|
|
|
prt_i1('Flushes to stable storage:',
|
|
|
|
f_hits(zil_stats['zil_commit_writer_count']))
|
|
|
|
prt_i2('Transactions to SLOG storage pool:',
|
|
|
|
f_bytes(zil_stats['zil_itx_metaslab_slog_bytes']),
|
|
|
|
f_hits(zil_stats['zil_itx_metaslab_slog_count']))
|
|
|
|
prt_i2('Transactions to non-SLOG storage pool:',
|
|
|
|
f_bytes(zil_stats['zil_itx_metaslab_normal_bytes']),
|
|
|
|
f_hits(zil_stats['zil_itx_metaslab_normal_count']))
|
|
|
|
print()
|
|
|
|
|
|
|
|
|
|
|
|
section_calls = {'arc': section_arc,
|
|
|
|
'archits': section_archits,
|
|
|
|
'dmu': section_dmu,
|
|
|
|
'l2arc': section_l2arc,
|
|
|
|
'spl': section_spl,
|
|
|
|
'tunables': section_tunables,
|
|
|
|
'vdev': section_vdev,
|
|
|
|
'zil': section_zil}
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
"""Run program. The options to draw a graph and to print all data raw are
|
|
|
|
treated separately because they come with their own call.
|
|
|
|
"""
|
|
|
|
|
|
|
|
kstats = get_kstats()
|
|
|
|
|
|
|
|
if ARGS.graph:
|
|
|
|
draw_graph(kstats)
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
print_header()
|
|
|
|
|
|
|
|
if ARGS.raw:
|
|
|
|
print_raw(kstats)
|
|
|
|
|
|
|
|
elif ARGS.section:
|
|
|
|
|
|
|
|
try:
|
|
|
|
section_calls[ARGS.section](kstats)
|
|
|
|
except KeyError:
|
|
|
|
print('Error: Section "{0}" unknown'.format(ARGS.section))
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
elif ARGS.page:
|
|
|
|
print('WARNING: Pages are deprecated, please use "--section"\n')
|
|
|
|
|
|
|
|
pages_to_calls = {1: 'arc',
|
|
|
|
2: 'archits',
|
|
|
|
3: 'l2arc',
|
|
|
|
4: 'dmu',
|
|
|
|
5: 'vdev',
|
|
|
|
6: 'tunables'}
|
|
|
|
|
|
|
|
try:
|
|
|
|
call = pages_to_calls[ARGS.page]
|
|
|
|
except KeyError:
|
|
|
|
print('Error: Page "{0}" not supported'.format(ARGS.page))
|
|
|
|
sys.exit(1)
|
|
|
|
else:
|
|
|
|
section_calls[call](kstats)
|
|
|
|
|
|
|
|
else:
|
|
|
|
# If no parameters were given, we print all sections. We might want to
|
|
|
|
# change the sequence by hand
|
|
|
|
calls = sorted(section_calls.keys())
|
|
|
|
|
|
|
|
for section in calls:
|
|
|
|
section_calls[section](kstats)
|
|
|
|
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|