Add dbuf hash and dbuf cache kstats

Introduce kstats about the dbuf hash and dbuf cache
to make it easier to inspect state. This should help
with debugging and understanding of these portions
of the codebase.

Correct format of dbuf kstat file.

Introduce a dbc column to dbufs kstat to indicate if
a dbuf is in the dbuf cache.

Introduce field filtering in the dbufstat python script.

Introduce a no header option to the dbufstat python script.

Introduce a test case to test basic mru->mfu list movement
in the ARC.

Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Giuseppe Di Natale <dinatale2@llnl.gov>
Closes #6906
This commit is contained in:
Giuseppe Di Natale 2018-01-29 10:24:52 -08:00 committed by Brian Behlendorf
parent 0735ecb334
commit 5e021f56d3
12 changed files with 508 additions and 44 deletions

View File

@ -31,10 +31,11 @@
import sys import sys
import getopt import getopt
import errno import errno
import re
bhdr = ["pool", "objset", "object", "level", "blkid", "offset", "dbsize"] bhdr = ["pool", "objset", "object", "level", "blkid", "offset", "dbsize"]
bxhdr = ["pool", "objset", "object", "level", "blkid", "offset", "dbsize", bxhdr = ["pool", "objset", "object", "level", "blkid", "offset", "dbsize",
"meta", "state", "dbholds", "list", "atype", "flags", "meta", "state", "dbholds", "dbc", "list", "atype", "flags",
"count", "asize", "access", "mru", "gmru", "mfu", "gmfu", "l2", "count", "asize", "access", "mru", "gmru", "mfu", "gmfu", "l2",
"l2_dattr", "l2_asize", "l2_comp", "aholds", "dtype", "btype", "l2_dattr", "l2_asize", "l2_comp", "aholds", "dtype", "btype",
"data_bs", "meta_bs", "bsize", "lvls", "dholds", "blocks", "dsize"] "data_bs", "meta_bs", "bsize", "lvls", "dholds", "blocks", "dsize"]
@ -45,7 +46,7 @@ dxhdr = ["pool", "objset", "object", "dtype", "btype", "data_bs", "meta_bs",
"bsize", "lvls", "dholds", "blocks", "dsize", "cached", "direct", "bsize", "lvls", "dholds", "blocks", "dsize", "cached", "direct",
"indirect", "bonus", "spill"] "indirect", "bonus", "spill"]
dincompat = ["level", "blkid", "offset", "dbsize", "meta", "state", "dbholds", dincompat = ["level", "blkid", "offset", "dbsize", "meta", "state", "dbholds",
"list", "atype", "flags", "count", "asize", "access", "dbc", "list", "atype", "flags", "count", "asize", "access",
"mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr", "l2_asize", "mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr", "l2_asize",
"l2_comp", "aholds"] "l2_comp", "aholds"]
@ -53,7 +54,7 @@ thdr = ["pool", "objset", "dtype", "cached"]
txhdr = ["pool", "objset", "dtype", "cached", "direct", "indirect", txhdr = ["pool", "objset", "dtype", "cached", "direct", "indirect",
"bonus", "spill"] "bonus", "spill"]
tincompat = ["object", "level", "blkid", "offset", "dbsize", "meta", "state", tincompat = ["object", "level", "blkid", "offset", "dbsize", "meta", "state",
"dbholds", "list", "atype", "flags", "count", "asize", "dbc", "dbholds", "list", "atype", "flags", "count", "asize",
"access", "mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr", "access", "mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr",
"l2_asize", "l2_comp", "aholds", "btype", "data_bs", "meta_bs", "l2_asize", "l2_comp", "aholds", "btype", "data_bs", "meta_bs",
"bsize", "lvls", "dholds", "blocks", "dsize"] "bsize", "lvls", "dholds", "blocks", "dsize"]
@ -70,9 +71,10 @@ cols = {
"meta": [4, -1, "is this buffer metadata?"], "meta": [4, -1, "is this buffer metadata?"],
"state": [5, -1, "state of buffer (read, cached, etc)"], "state": [5, -1, "state of buffer (read, cached, etc)"],
"dbholds": [7, 1000, "number of holds on buffer"], "dbholds": [7, 1000, "number of holds on buffer"],
"dbc": [3, -1, "in dbuf cache"],
"list": [4, -1, "which ARC list contains this buffer"], "list": [4, -1, "which ARC list contains this buffer"],
"atype": [7, -1, "ARC header type (data or metadata)"], "atype": [7, -1, "ARC header type (data or metadata)"],
"flags": [8, -1, "ARC read flags"], "flags": [9, -1, "ARC read flags"],
"count": [5, -1, "ARC data count"], "count": [5, -1, "ARC data count"],
"asize": [7, 1024, "size of this ARC buffer"], "asize": [7, 1024, "size of this ARC buffer"],
"access": [10, -1, "time this ARC buffer was last accessed"], "access": [10, -1, "time this ARC buffer was last accessed"],
@ -104,8 +106,8 @@ cols = {
hdr = None hdr = None
xhdr = None xhdr = None
sep = " " # Default separator is 2 spaces sep = " " # Default separator is 2 spaces
cmd = ("Usage: dbufstat.py [-bdhrtvx] [-i file] [-f fields] [-o file] " cmd = ("Usage: dbufstat.py [-bdhnrtvx] [-i file] [-f fields] [-o file] "
"[-s string]\n") "[-s string] [-F filter]\n")
raw = 0 raw = 0
@ -151,6 +153,7 @@ def usage():
sys.stderr.write("\t -b : Print table of information for each dbuf\n") sys.stderr.write("\t -b : Print table of information for each dbuf\n")
sys.stderr.write("\t -d : Print table of information for each dnode\n") sys.stderr.write("\t -d : Print table of information for each dnode\n")
sys.stderr.write("\t -h : Print this help message\n") sys.stderr.write("\t -h : Print this help message\n")
sys.stderr.write("\t -n : Exclude header from output\n")
sys.stderr.write("\t -r : Print raw values\n") sys.stderr.write("\t -r : Print raw values\n")
sys.stderr.write("\t -t : Print table of information for each dnode type" sys.stderr.write("\t -t : Print table of information for each dnode type"
"\n") "\n")
@ -162,11 +165,13 @@ def usage():
sys.stderr.write("\t -o : Redirect output to the specified file\n") sys.stderr.write("\t -o : Redirect output to the specified file\n")
sys.stderr.write("\t -s : Override default field separator with custom " sys.stderr.write("\t -s : Override default field separator with custom "
"character or string\n") "character or string\n")
sys.stderr.write("\t -F : Filter output by value or regex\n")
sys.stderr.write("\nExamples:\n") sys.stderr.write("\nExamples:\n")
sys.stderr.write("\tdbufstat.py -d -o /tmp/d.log\n") sys.stderr.write("\tdbufstat.py -d -o /tmp/d.log\n")
sys.stderr.write("\tdbufstat.py -t -s \",\" -o /tmp/t.log\n") sys.stderr.write("\tdbufstat.py -t -s \",\" -o /tmp/t.log\n")
sys.stderr.write("\tdbufstat.py -v\n") sys.stderr.write("\tdbufstat.py -v\n")
sys.stderr.write("\tdbufstat.py -d -f pool,object,objset,dsize,cached\n") sys.stderr.write("\tdbufstat.py -d -f pool,object,objset,dsize,cached\n")
sys.stderr.write("\tdbufstat.py -bx -F dbc=1,objset=54,pool=testpool\n")
sys.stderr.write("\n") sys.stderr.write("\n")
sys.exit(1) sys.exit(1)
@ -409,12 +414,32 @@ def update_dict(d, k, line, labels):
return d return d
def print_dict(d): def skip_line(vals, filters):
print_header() '''
Determines if a line should be skipped during printing
based on a set of filters
'''
if len(filters) == 0:
return False
for key in vals:
if key in filters:
val = prettynum(cols[key][0], cols[key][1], vals[key]).strip()
# we want a full match here
if re.match("(?:" + filters[key] + r")\Z", val) is None:
return True
return False
def print_dict(d, filters, noheader):
if not noheader:
print_header()
for pool in list(d.keys()): for pool in list(d.keys()):
for objset in list(d[pool].keys()): for objset in list(d[pool].keys()):
for v in list(d[pool][objset].values()): for v in list(d[pool][objset].values()):
print_values(v) if not skip_line(v, filters):
print_values(v)
def dnodes_build_dict(filehandle): def dnodes_build_dict(filehandle):
@ -455,7 +480,7 @@ def types_build_dict(filehandle):
return types return types
def buffers_print_all(filehandle): def buffers_print_all(filehandle, filters, noheader):
labels = dict() labels = dict()
# First 3 lines are header information, skip the first two # First 3 lines are header information, skip the first two
@ -466,11 +491,14 @@ def buffers_print_all(filehandle):
for i, v in enumerate(next(filehandle).split()): for i, v in enumerate(next(filehandle).split()):
labels[v] = i labels[v] = i
print_header() if not noheader:
print_header()
# The rest of the file is buffer information # The rest of the file is buffer information
for line in filehandle: for line in filehandle:
print_values(parse_line(line.split(), labels)) vals = parse_line(line.split(), labels)
if not skip_line(vals, filters):
print_values(vals)
def main(): def main():
@ -487,11 +515,13 @@ def main():
tflag = False tflag = False
vflag = False vflag = False
xflag = False xflag = False
nflag = False
filters = dict()
try: try:
opts, args = getopt.getopt( opts, args = getopt.getopt(
sys.argv[1:], sys.argv[1:],
"bdf:hi:o:rs:tvx", "bdf:hi:o:rs:tvxF:n",
[ [
"buffers", "buffers",
"dnodes", "dnodes",
@ -502,7 +532,8 @@ def main():
"seperator", "seperator",
"types", "types",
"verbose", "verbose",
"extended" "extended",
"filter"
] ]
) )
except getopt.error: except getopt.error:
@ -532,6 +563,35 @@ def main():
vflag = True vflag = True
if opt in ('-x', '--extended'): if opt in ('-x', '--extended'):
xflag = True xflag = True
if opt in ('-n', '--noheader'):
nflag = True
if opt in ('-F', '--filter'):
fils = [x.strip() for x in arg.split(",")]
for fil in fils:
f = [x.strip() for x in fil.split("=")]
if len(f) != 2:
sys.stderr.write("Invalid filter '%s'.\n" % fil)
sys.exit(1)
if f[0] not in cols:
sys.stderr.write("Invalid field '%s' in filter.\n" % f[0])
sys.exit(1)
if f[0] in filters:
sys.stderr.write("Field '%s' specified multiple times in "
"filter.\n" % f[0])
sys.exit(1)
try:
re.compile("(?:" + f[1] + r")\Z")
except re.error:
sys.stderr.write("Invalid regex for field '%s' in "
"filter.\n" % f[0])
sys.exit(1)
filters[f[0]] = f[1]
if hflag or (xflag and desired_cols): if hflag or (xflag and desired_cols):
usage() usage()
@ -594,13 +654,13 @@ def main():
sys.exit(1) sys.exit(1)
if bflag: if bflag:
buffers_print_all(sys.stdin) buffers_print_all(sys.stdin, filters, nflag)
if dflag: if dflag:
print_dict(dnodes_build_dict(sys.stdin)) print_dict(dnodes_build_dict(sys.stdin), filters, nflag)
if tflag: if tflag:
print_dict(types_build_dict(sys.stdin)) print_dict(types_build_dict(sys.stdin), filters, nflag)
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -175,6 +175,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/Makefile tests/zfs-tests/tests/functional/Makefile
tests/zfs-tests/tests/functional/acl/Makefile tests/zfs-tests/tests/functional/acl/Makefile
tests/zfs-tests/tests/functional/acl/posix/Makefile tests/zfs-tests/tests/functional/acl/posix/Makefile
tests/zfs-tests/tests/functional/arc/Makefile
tests/zfs-tests/tests/functional/atime/Makefile tests/zfs-tests/tests/functional/atime/Makefile
tests/zfs-tests/tests/functional/bootfs/Makefile tests/zfs-tests/tests/functional/bootfs/Makefile
tests/zfs-tests/tests/functional/cache/Makefile tests/zfs-tests/tests/functional/cache/Makefile

View File

@ -48,6 +48,87 @@
#include <sys/callb.h> #include <sys/callb.h>
#include <sys/abd.h> #include <sys/abd.h>
kstat_t *dbuf_ksp;
typedef struct dbuf_stats {
/*
* Various statistics about the size of the dbuf cache.
*/
kstat_named_t cache_count;
kstat_named_t cache_size_bytes;
kstat_named_t cache_size_bytes_max;
/*
* Statistics regarding the bounds on the dbuf cache size.
*/
kstat_named_t cache_target_bytes;
kstat_named_t cache_lowater_bytes;
kstat_named_t cache_hiwater_bytes;
/*
* Total number of dbuf cache evictions that have occurred.
*/
kstat_named_t cache_total_evicts;
/*
* The distribution of dbuf levels in the dbuf cache and
* the total size of all dbufs at each level.
*/
kstat_named_t cache_levels[DN_MAX_LEVELS];
kstat_named_t cache_levels_bytes[DN_MAX_LEVELS];
/*
* Statistics about the dbuf hash table.
*/
kstat_named_t hash_hits;
kstat_named_t hash_misses;
kstat_named_t hash_collisions;
kstat_named_t hash_elements;
kstat_named_t hash_elements_max;
/*
* Number of sublists containing more than one dbuf in the dbuf
* hash table. Keep track of the longest hash chain.
*/
kstat_named_t hash_chains;
kstat_named_t hash_chain_max;
/*
* Number of times a dbuf_create() discovers that a dbuf was
* already created and in the dbuf hash table.
*/
kstat_named_t hash_insert_race;
} dbuf_stats_t;
dbuf_stats_t dbuf_stats = {
{ "cache_count", KSTAT_DATA_UINT64 },
{ "cache_size_bytes", KSTAT_DATA_UINT64 },
{ "cache_size_bytes_max", KSTAT_DATA_UINT64 },
{ "cache_target_bytes", KSTAT_DATA_UINT64 },
{ "cache_lowater_bytes", KSTAT_DATA_UINT64 },
{ "cache_hiwater_bytes", KSTAT_DATA_UINT64 },
{ "cache_total_evicts", KSTAT_DATA_UINT64 },
{ { "cache_levels_N", KSTAT_DATA_UINT64 } },
{ { "cache_levels_bytes_N", KSTAT_DATA_UINT64 } },
{ "hash_hits", KSTAT_DATA_UINT64 },
{ "hash_misses", KSTAT_DATA_UINT64 },
{ "hash_collisions", KSTAT_DATA_UINT64 },
{ "hash_elements", KSTAT_DATA_UINT64 },
{ "hash_elements_max", KSTAT_DATA_UINT64 },
{ "hash_chains", KSTAT_DATA_UINT64 },
{ "hash_chain_max", KSTAT_DATA_UINT64 },
{ "hash_insert_race", KSTAT_DATA_UINT64 }
};
#define DBUF_STAT_INCR(stat, val) \
atomic_add_64(&dbuf_stats.stat.value.ui64, (val));
#define DBUF_STAT_DECR(stat, val) \
DBUF_STAT_INCR(stat, -(val));
#define DBUF_STAT_BUMP(stat) \
DBUF_STAT_INCR(stat, 1);
#define DBUF_STAT_BUMPDOWN(stat) \
DBUF_STAT_INCR(stat, -1);
#define DBUF_STAT_MAX(stat, v) { \
uint64_t _m; \
while ((v) > (_m = dbuf_stats.stat.value.ui64) && \
(_m != atomic_cas_64(&dbuf_stats.stat.value.ui64, _m, (v))))\
continue; \
}
struct dbuf_hold_impl_data { struct dbuf_hold_impl_data {
/* Function arguments */ /* Function arguments */
dnode_t *dh_dn; dnode_t *dh_dn;
@ -272,13 +353,15 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
int level = db->db_level; int level = db->db_level;
uint64_t blkid, hv, idx; uint64_t blkid, hv, idx;
dmu_buf_impl_t *dbf; dmu_buf_impl_t *dbf;
uint32_t i;
blkid = db->db_blkid; blkid = db->db_blkid;
hv = dbuf_hash(os, obj, level, blkid); hv = dbuf_hash(os, obj, level, blkid);
idx = hv & h->hash_table_mask; idx = hv & h->hash_table_mask;
mutex_enter(DBUF_HASH_MUTEX(h, idx)); mutex_enter(DBUF_HASH_MUTEX(h, idx));
for (dbf = h->hash_table[idx]; dbf != NULL; dbf = dbf->db_hash_next) { for (dbf = h->hash_table[idx], i = 0; dbf != NULL;
dbf = dbf->db_hash_next, i++) {
if (DBUF_EQUAL(dbf, os, obj, level, blkid)) { if (DBUF_EQUAL(dbf, os, obj, level, blkid)) {
mutex_enter(&dbf->db_mtx); mutex_enter(&dbf->db_mtx);
if (dbf->db_state != DB_EVICTING) { if (dbf->db_state != DB_EVICTING) {
@ -289,11 +372,20 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
} }
} }
if (i > 0) {
DBUF_STAT_BUMP(hash_collisions);
if (i == 1)
DBUF_STAT_BUMP(hash_chains);
DBUF_STAT_MAX(hash_chain_max, i);
}
mutex_enter(&db->db_mtx); mutex_enter(&db->db_mtx);
db->db_hash_next = h->hash_table[idx]; db->db_hash_next = h->hash_table[idx];
h->hash_table[idx] = db; h->hash_table[idx] = db;
mutex_exit(DBUF_HASH_MUTEX(h, idx)); mutex_exit(DBUF_HASH_MUTEX(h, idx));
atomic_inc_64(&dbuf_hash_count); atomic_inc_64(&dbuf_hash_count);
DBUF_STAT_MAX(hash_elements_max, dbuf_hash_count);
return (NULL); return (NULL);
} }
@ -328,6 +420,9 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
} }
*dbp = db->db_hash_next; *dbp = db->db_hash_next;
db->db_hash_next = NULL; db->db_hash_next = NULL;
if (h->hash_table[idx] &&
h->hash_table[idx]->db_hash_next == NULL)
DBUF_STAT_BUMPDOWN(hash_chains);
mutex_exit(DBUF_HASH_MUTEX(h, idx)); mutex_exit(DBUF_HASH_MUTEX(h, idx));
atomic_dec_64(&dbuf_hash_count); atomic_dec_64(&dbuf_hash_count);
} }
@ -469,28 +564,32 @@ dbuf_cache_target_bytes(void)
arc_target_bytes() >> dbuf_cache_max_shift); arc_target_bytes() >> dbuf_cache_max_shift);
} }
static inline uint64_t
dbuf_cache_hiwater_bytes(void)
{
uint64_t dbuf_cache_target = dbuf_cache_target_bytes();
return (dbuf_cache_target +
(dbuf_cache_target * dbuf_cache_hiwater_pct) / 100);
}
static inline uint64_t
dbuf_cache_lowater_bytes(void)
{
uint64_t dbuf_cache_target = dbuf_cache_target_bytes();
return (dbuf_cache_target -
(dbuf_cache_target * dbuf_cache_lowater_pct) / 100);
}
static inline boolean_t static inline boolean_t
dbuf_cache_above_hiwater(void) dbuf_cache_above_hiwater(void)
{ {
uint64_t dbuf_cache_target = dbuf_cache_target_bytes(); return (refcount_count(&dbuf_cache_size) > dbuf_cache_hiwater_bytes());
uint64_t dbuf_cache_hiwater_bytes =
(dbuf_cache_target * dbuf_cache_hiwater_pct) / 100;
return (refcount_count(&dbuf_cache_size) >
dbuf_cache_target + dbuf_cache_hiwater_bytes);
} }
static inline boolean_t static inline boolean_t
dbuf_cache_above_lowater(void) dbuf_cache_above_lowater(void)
{ {
uint64_t dbuf_cache_target = dbuf_cache_target_bytes(); return (refcount_count(&dbuf_cache_size) > dbuf_cache_lowater_bytes());
uint64_t dbuf_cache_lowater_bytes =
(dbuf_cache_target * dbuf_cache_lowater_pct) / 100;
return (refcount_count(&dbuf_cache_size) >
dbuf_cache_target - dbuf_cache_lowater_bytes);
} }
/* /*
@ -525,7 +624,14 @@ dbuf_evict_one(void)
multilist_sublist_unlock(mls); multilist_sublist_unlock(mls);
(void) refcount_remove_many(&dbuf_cache_size, (void) refcount_remove_many(&dbuf_cache_size,
db->db.db_size, db); db->db.db_size, db);
DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]);
DBUF_STAT_BUMPDOWN(cache_count);
DBUF_STAT_DECR(cache_levels_bytes[db->db_level],
db->db.db_size);
dbuf_destroy(db); dbuf_destroy(db);
DBUF_STAT_MAX(cache_size_bytes_max,
refcount_count(&dbuf_cache_size));
DBUF_STAT_BUMP(cache_total_evicts);
} else { } else {
multilist_sublist_unlock(mls); multilist_sublist_unlock(mls);
} }
@ -618,7 +724,24 @@ dbuf_evict_notify(void)
} }
} }
static int
dbuf_kstat_update(kstat_t *ksp, int rw)
{
dbuf_stats_t *ds = ksp->ks_data;
if (rw == KSTAT_WRITE) {
return (SET_ERROR(EACCES));
} else {
ds->cache_size_bytes.value.ui64 =
refcount_count(&dbuf_cache_size);
ds->cache_target_bytes.value.ui64 = dbuf_cache_target_bytes();
ds->cache_hiwater_bytes.value.ui64 = dbuf_cache_hiwater_bytes();
ds->cache_lowater_bytes.value.ui64 = dbuf_cache_lowater_bytes();
ds->hash_elements.value.ui64 = dbuf_hash_count;
}
return (0);
}
void void
dbuf_init(void) dbuf_init(void)
@ -687,6 +810,26 @@ retry:
cv_init(&dbuf_evict_cv, NULL, CV_DEFAULT, NULL); cv_init(&dbuf_evict_cv, NULL, CV_DEFAULT, NULL);
dbuf_cache_evict_thread = thread_create(NULL, 0, dbuf_evict_thread, dbuf_cache_evict_thread = thread_create(NULL, 0, dbuf_evict_thread,
NULL, 0, &p0, TS_RUN, minclsyspri); NULL, 0, &p0, TS_RUN, minclsyspri);
dbuf_ksp = kstat_create("zfs", 0, "dbufstats", "misc",
KSTAT_TYPE_NAMED, sizeof (dbuf_stats) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL);
if (dbuf_ksp != NULL) {
dbuf_ksp->ks_data = &dbuf_stats;
dbuf_ksp->ks_update = dbuf_kstat_update;
kstat_install(dbuf_ksp);
for (i = 0; i < DN_MAX_LEVELS; i++) {
snprintf(dbuf_stats.cache_levels[i].name,
KSTAT_STRLEN, "cache_level_%d", i);
dbuf_stats.cache_levels[i].data_type =
KSTAT_DATA_UINT64;
snprintf(dbuf_stats.cache_levels_bytes[i].name,
KSTAT_STRLEN, "cache_level_%d_bytes", i);
dbuf_stats.cache_levels_bytes[i].data_type =
KSTAT_DATA_UINT64;
}
}
} }
void void
@ -725,6 +868,11 @@ dbuf_fini(void)
refcount_destroy(&dbuf_cache_size); refcount_destroy(&dbuf_cache_size);
multilist_destroy(dbuf_cache); multilist_destroy(dbuf_cache);
if (dbuf_ksp != NULL) {
kstat_delete(dbuf_ksp);
dbuf_ksp = NULL;
}
} }
/* /*
@ -1268,6 +1416,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
if ((flags & DB_RF_HAVESTRUCT) == 0) if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock); rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(db); DB_DNODE_EXIT(db);
DBUF_STAT_BUMP(hash_hits);
} else if (db->db_state == DB_UNCACHED) { } else if (db->db_state == DB_UNCACHED) {
spa_t *spa = dn->dn_objset->os_spa; spa_t *spa = dn->dn_objset->os_spa;
boolean_t need_wait = B_FALSE; boolean_t need_wait = B_FALSE;
@ -1287,6 +1436,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
if ((flags & DB_RF_HAVESTRUCT) == 0) if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock); rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(db); DB_DNODE_EXIT(db);
DBUF_STAT_BUMP(hash_misses);
if (!err && need_wait) if (!err && need_wait)
err = zio_wait(zio); err = zio_wait(zio);
@ -1305,6 +1455,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
if ((flags & DB_RF_HAVESTRUCT) == 0) if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock); rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(db); DB_DNODE_EXIT(db);
DBUF_STAT_BUMP(hash_misses);
/* Skip the wait per the caller's request. */ /* Skip the wait per the caller's request. */
mutex_enter(&db->db_mtx); mutex_enter(&db->db_mtx);
@ -2231,6 +2382,10 @@ dbuf_destroy(dmu_buf_impl_t *db)
multilist_remove(dbuf_cache, db); multilist_remove(dbuf_cache, db);
(void) refcount_remove_many(&dbuf_cache_size, (void) refcount_remove_many(&dbuf_cache_size,
db->db.db_size, db); db->db.db_size, db);
DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]);
DBUF_STAT_BUMPDOWN(cache_count);
DBUF_STAT_DECR(cache_levels_bytes[db->db_level],
db->db.db_size);
} }
ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL); ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL);
@ -2458,6 +2613,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
/* someone else inserted it first */ /* someone else inserted it first */
kmem_cache_free(dbuf_kmem_cache, db); kmem_cache_free(dbuf_kmem_cache, db);
mutex_exit(&dn->dn_dbufs_mtx); mutex_exit(&dn->dn_dbufs_mtx);
DBUF_STAT_BUMP(hash_insert_race);
return (odb); return (odb);
} }
avl_add(&dn->dn_dbufs, db); avl_add(&dn->dn_dbufs, db);
@ -2847,6 +3003,10 @@ __dbuf_hold_impl(struct dbuf_hold_impl_data *dh)
multilist_remove(dbuf_cache, dh->dh_db); multilist_remove(dbuf_cache, dh->dh_db);
(void) refcount_remove_many(&dbuf_cache_size, (void) refcount_remove_many(&dbuf_cache_size,
dh->dh_db->db.db_size, dh->dh_db); dh->dh_db->db.db_size, dh->dh_db);
DBUF_STAT_BUMPDOWN(cache_levels[dh->dh_db->db_level]);
DBUF_STAT_BUMPDOWN(cache_count);
DBUF_STAT_DECR(cache_levels_bytes[dh->dh_db->db_level],
dh->dh_db->db.db_size);
} }
(void) refcount_add(&dh->dh_db->db_holds, dh->dh_tag); (void) refcount_add(&dh->dh_db->db_holds, dh->dh_tag);
DBUF_VERIFY(dh->dh_db); DBUF_VERIFY(dh->dh_db);
@ -3118,6 +3278,12 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
multilist_insert(dbuf_cache, db); multilist_insert(dbuf_cache, db);
(void) refcount_add_many(&dbuf_cache_size, (void) refcount_add_many(&dbuf_cache_size,
db->db.db_size, db); db->db.db_size, db);
DBUF_STAT_BUMP(cache_levels[db->db_level]);
DBUF_STAT_BUMP(cache_count);
DBUF_STAT_INCR(cache_levels_bytes[db->db_level],
db->db.db_size);
DBUF_STAT_MAX(cache_size_bytes_max,
refcount_count(&dbuf_cache_size));
mutex_exit(&db->db_mtx); mutex_exit(&db->db_mtx);
dbuf_evict_notify(); dbuf_evict_notify();

View File

@ -46,14 +46,14 @@ static int
dbuf_stats_hash_table_headers(char *buf, size_t size) dbuf_stats_hash_table_headers(char *buf, size_t size)
{ {
(void) snprintf(buf, size, (void) snprintf(buf, size,
"%-88s | %-124s | %s\n" "%-96s | %-119s | %s\n"
"%-16s %-8s %-8s %-8s %-8s %-8s %-8s %-5s %-5s %5s | " "%-16s %-8s %-8s %-8s %-8s %-10s %-8s %-5s %-5s %-7s %3s | "
"%-5s %-5s %-8s %-6s %-8s %-12s " "%-5s %-5s %-9s %-6s %-8s %-12s "
"%-6s %-6s %-6s %-6s %-6s %-8s %-8s %-8s %-5s | " "%-6s %-6s %-6s %-6s %-6s %-8s %-8s %-8s %-6s | "
"%-6s %-6s %-8s %-8s %-6s %-6s %-5s %-8s %-8s\n", "%-6s %-6s %-8s %-8s %-6s %-6s %-6s %-8s %-8s\n",
"dbuf", "arcbuf", "dnode", "pool", "objset", "object", "level", "dbuf", "arcbuf", "dnode", "pool", "objset", "object", "level",
"blkid", "offset", "dbsize", "meta", "state", "dbholds", "list", "blkid", "offset", "dbsize", "meta", "state", "dbholds", "dbc",
"atype", "flags", "count", "asize", "access", "list", "atype", "flags", "count", "asize", "access",
"mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr", "l2_asize", "mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr", "l2_asize",
"l2_comp", "aholds", "dtype", "btype", "data_bs", "meta_bs", "l2_comp", "aholds", "dtype", "btype", "data_bs", "meta_bs",
"bsize", "lvls", "dholds", "blocks", "dsize"); "bsize", "lvls", "dholds", "blocks", "dsize");
@ -75,10 +75,10 @@ __dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db)
__dmu_object_info_from_dnode(dn, &doi); __dmu_object_info_from_dnode(dn, &doi);
nwritten = snprintf(buf, size, nwritten = snprintf(buf, size,
"%-16s %-8llu %-8lld %-8lld %-8lld %-8llu %-8llu %-5d %-5d %-5lu | " "%-16s %-8llu %-8lld %-8lld %-8lld %-10llu %-8llu %-5d %-5d "
"%-5d %-5d 0x%-6x %-6lu %-8llu %-12llu " "%-7lu %-3d | %-5d %-5d 0x%-7x %-6lu %-8llu %-12llu "
"%-6lu %-6lu %-6lu %-6lu %-6lu %-8llu %-8llu %-8d %-5lu | " "%-6lu %-6lu %-6lu %-6lu %-6lu %-8llu %-8llu %-8d %-6lu | "
"%-6d %-6d %-8lu %-8lu %-6llu %-6lu %-5lu %-8llu %-8llu\n", "%-6d %-6d %-8lu %-8lu %-6llu %-6lu %-6lu %-8llu %-8llu\n",
/* dmu_buf_impl_t */ /* dmu_buf_impl_t */
spa_name(dn->dn_objset->os_spa), spa_name(dn->dn_objset->os_spa),
(u_longlong_t)dmu_objset_id(db->db_objset), (u_longlong_t)dmu_objset_id(db->db_objset),
@ -90,6 +90,7 @@ __dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db)
!!dbuf_is_metadata(db), !!dbuf_is_metadata(db),
db->db_state, db->db_state,
(ulong_t)refcount_count(&db->db_holds), (ulong_t)refcount_count(&db->db_holds),
multilist_link_active(&db->db_cache_link),
/* arc_buf_info_t */ /* arc_buf_info_t */
abi.abi_state_type, abi.abi_state_type,
abi.abi_state_contents, abi.abi_state_contents,

View File

@ -27,6 +27,10 @@ tags = ['functional']
tests = ['posix_003_pos'] tests = ['posix_003_pos']
tags = ['functional', 'acl', 'posix'] tags = ['functional', 'acl', 'posix']
[tests/functional/arc]
tests = ['dbufstats_001_pos', 'dbufstats_002_pos']
tags = ['functional', 'arc']
[tests/functional/atime] [tests/functional/atime]
tests = ['atime_001_pos', 'atime_002_neg', 'atime_003_pos'] tests = ['atime_001_pos', 'atime_002_neg', 'atime_003_pos']
tags = ['functional', 'atime'] tags = ['functional', 'atime']

View File

@ -1,5 +1,6 @@
SUBDIRS = \ SUBDIRS = \
acl \ acl \
arc \
atime \ atime \
bootfs \ bootfs \
cache \ cache \

View File

@ -0,0 +1,6 @@
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/arc
dist_pkgdata_SCRIPTS = \
cleanup.ksh \
setup.ksh \
dbufstats_001_pos.ksh \
dbufstats_002_pos.ksh

View File

@ -0,0 +1,29 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2017, Lawrence Livermore National Security, LLC.
#
. $STF_SUITE/include/libtest.shlib
default_cleanup

View File

@ -0,0 +1,83 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2017, Lawrence Livermore National Security, LLC.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/include/math.shlib
#
# DESCRIPTION:
# Ensure stats presented in /proc/spl/kstat/zfs/dbufstats are correct
# based on /proc/spl/kstat/zfs/dbufs.
#
# STRATEGY:
# 1. Generate a file with random data in it
# 2. Store output from dbufs kstat
# 3. Store output from dbufstats kstat
# 4. Compare stats presented in dbufstats with stat generated using
# dbufstat.py and the dbufs kstat output
#
DBUFSTATS_FILE=$(mktemp $TEST_BASE_DIR/dbufstats.out.XXXXXX)
DBUFS_FILE=$(mktemp $TEST_BASE_DIR/dbufs.out.XXXXXX)
function cleanup
{
log_must rm -f $TESTDIR/file $DBUFS_FILE $DBUFSTATS_FILE
}
function testdbufstat # stat_name dbufstat_filter
{
name=$1
filter=""
[[ -n "$2" ]] && filter="-F $2"
verify_eq \
$(grep -w "$name" "$DBUFSTATS_FILE" | awk '{ print $3 }') \
$(dbufstat.py -bxn -i "$DBUFS_FILE" "$filter" | wc -l) \
"$name"
}
verify_runnable "both"
log_assert "dbufstats produces correct statistics"
log_onexit cleanup
log_must file_write -o create -f "$TESTDIR/file" -b 1048576 -c 20 -d R
log_must zpool sync
log_must eval "cat /proc/spl/kstat/zfs/dbufs > $DBUFS_FILE"
log_must eval "cat /proc/spl/kstat/zfs/dbufstats > $DBUFSTATS_FILE"
for level in {0..11}; do
testdbufstat "cache_level_$level" "dbc=1,level=$level"
done
testdbufstat "cache_count" "dbc=1"
testdbufstat "hash_elements" ""
log_pass "dbufstats produces correct statistics passed"

View File

@ -0,0 +1,80 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2017, Lawrence Livermore National Security, LLC.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/include/math.shlib
#
# DESCRIPTION:
# Ensure that dbufs move from mru to mfu as expected.
#
# STRATEGY:
# 1. Set dbuf cache size to a small size (10M for this test)
# 2. Generate a file with random data (small enough to fit in cache)
# 3. zpool sync to remove dbufs from anon list in ARC
# 4. Obtain the object ID using linux stat command
# 5. Ensure that all dbufs are on the mru list in the ARC
# 6. Generate another random file large enough to flush dbuf cache
# 7. cat the first generated file
# 8. Ensure that at least some dbufs moved to the mfu list in the ARC
#
DBUFS_FILE=$(mktemp $TEST_BASE_DIR/dbufs.out.XXXXXX)
function cleanup
{
log_must rm -f $TESTDIR/file $TESTDIR/file2 $DBUFS_FILE
}
verify_runnable "both"
log_assert "dbufs move from mru to mfu list"
log_onexit cleanup
log_must file_write -o create -f "$TESTDIR/file" -b 1048576 -c 1 -d R
log_must zpool sync
objid=$(stat --format="%i" "$TESTDIR/file")
log_note "Object ID for $TESTDIR/file is $objid"
log_must eval "cat /proc/spl/kstat/zfs/dbufs > $DBUFS_FILE"
dbuf=$(dbufstat.py -bxn -i "$DBUFS_FILE" -F "object=$objid" | wc -l)
mru=$(dbufstat.py -bxn -i "$DBUFS_FILE" -F "object=$objid,list=1" | wc -l)
mfu=$(dbufstat.py -bxn -i "$DBUFS_FILE" -F "object=$objid,list=3" | wc -l)
log_note "dbuf count is $dbuf, mru count is $mru, mfu count is $mfu"
verify_ne "0" "$mru" "mru count"
verify_eq "0" "$mfu" "mfu count"
log_must eval "cat $TESTDIR/file > /dev/null"
log_must eval "cat /proc/spl/kstat/zfs/dbufs > $DBUFS_FILE"
dbuf=$(dbufstat.py -bxn -i "$DBUFS_FILE" -F "object=$objid" | wc -l)
mru=$(dbufstat.py -bxn -i "$DBUFS_FILE" -F "object=$objid,list=1" | wc -l)
mfu=$(dbufstat.py -bxn -i "$DBUFS_FILE" -F "object=$objid,list=3" | wc -l)
log_note "dbuf count is $dbuf, mru count is $mru, mfu count is $mfu"
verify_ne "0" "$mfu" "mfu count"
log_pass "dbufs move from mru to mfu list passed"

View File

@ -0,0 +1,30 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2017, Lawrence Livermore National Security, LLC.
#
. $STF_SUITE/include/libtest.shlib
DISK=${DISKS%% *}
default_setup $DISK

View File

@ -27,7 +27,7 @@
. $STF_SUITE/include/libtest.shlib . $STF_SUITE/include/libtest.shlib
set -A args "" "-b" "-d" "-r" "-v" "-s \",\"" "-x" set -A args "" "-b" "-d" "-r" "-v" "-s \",\"" "-x" "-n"
log_assert "dbufstat.py generates output and doesn't return an error code" log_assert "dbufstat.py generates output and doesn't return an error code"
@ -37,4 +37,7 @@ while [[ $i -lt ${#args[*]} ]]; do
((i = i + 1)) ((i = i + 1))
done done
# A simple test of dbufstat.py filter functionality
log_must eval "dbufstat.py -F object=10,dbc=1,pool=$TESTPOOL > /dev/null"
log_pass "dbufstat.py generates output and doesn't return an error code" log_pass "dbufstat.py generates output and doesn't return an error code"