From a69765ea5b563e0cd4d15fac4b1ac08c6ccf12d1 Mon Sep 17 00:00:00 2001 From: Damian Szuberski Date: Thu, 24 Feb 2022 19:21:13 +0100 Subject: [PATCH] Add Linux kmemleak support to ZTS - Kmemleak `clear` is invoked right before every test case run. - Kmemleak `scan` is requested right after each test case is finished. - Kmemleak instrumentation is not used for setup/cleanup/pretest/posttest/failsafe stages to shorten the test case execution time. - Kmemleak periodic scan is disabled (`scan=0`) before the test suite run to avoid interfering with the on-demand scan results. - There are unavoidable potential false positives coming from kernel areas other than OpenZFS module. - The ZTS with kmemleak enabled duration is increased by ~50%. Example run ``` Running Time: 07:12:13 Percent passed: 98.3% unreferenced object 0xffff9da82aea5410 (size 80): comm "kworker/u32:10", pid 942206, jiffies 4296749716 (age 2615.516s) hex dump (first 32 bytes): 00 30 30 00 00 00 00 00 ff 8f 30 00 00 00 00 00 .00.......0..... 51 e6 77 05 a8 9d ff ff 00 00 00 00 00 00 00 00 Q.w............. backtrace: [<000000005cf1fea2>] alloc_extent_state+0x1d/0xb0 [btrfs] [<0000000083f78ae5>] set_extent_bit+0x2ff/0x670 [btrfs] [<00000000de29249e>] lock_extent_bits+0x6b/0xa0 [btrfs] [<00000000b241f424>] lock_and_cleanup_extent_if_need+0xaf/0x1c0 [btrfs] [<0000000093ca72b5>] btrfs_buffered_write+0x297/0x7d0 [btrfs] [<000000002c2938c8>] btrfs_file_write_iter+0x127/0x390 [btrfs] [<00000000b888f720>] do_iter_readv_writev+0x152/0x1b0 [<00000000320f0bcc>] do_iter_write+0x7c/0x1c0 [<000000000b5a8fe0>] lo_write_bvec+0x62/0x150 [loop] [<000000009aa03c73>] loop_process_work+0x250/0xbd0 [loop] [<00000000c7487d8a>] process_one_work+0x1f1/0x390 [<000000000b236831>] worker_thread+0x53/0x3e0 [<0000000023cb3e57>] kthread+0x127/0x150 [<000000002d48676a>] ret_from_fork+0x22/0x30 ``` Reviewed-by: Brian Behlendorf Reviewed-by: Ryan Moeller Signed-off-by: szubersk Closes #13084 --- scripts/zfs-tests.sh | 15 +++++-- tests/test-runner/bin/test-runner.py.in | 54 ++++++++++++++++++++----- tests/test-runner/man/test-runner.1 | 2 + 3 files changed, 58 insertions(+), 13 deletions(-) diff --git a/scripts/zfs-tests.sh b/scripts/zfs-tests.sh index aa0829b283..70d1e39442 100755 --- a/scripts/zfs-tests.sh +++ b/scripts/zfs-tests.sh @@ -53,6 +53,7 @@ ZFS_DBGMSG="$STF_SUITE/callbacks/zfs_dbgmsg.ksh" ZFS_DMESG="$STF_SUITE/callbacks/zfs_dmesg.ksh" UNAME=$(uname -s) RERUN="" +KMEMLEAK="" # Override some defaults if on FreeBSD if [ "$UNAME" = "FreeBSD" ] ; then @@ -328,6 +329,7 @@ OPTIONS: -S Enable stack tracer (negative performance impact) -c Only create and populate constrained path -R Automatically rerun failing tests + -m Enable kmemleak reporting (Linux only) -n NFSFILE Use the nfsfile to determine the NFS configuration -I NUM Number of iterations -d DIR Use DIR for files and loopback devices @@ -354,7 +356,7 @@ $0 -x EOF } -while getopts 'hvqxkfScRn:d:s:r:?t:T:u:I:' OPTION; do +while getopts 'hvqxkfScRmn:d:s:r:?t:T:u:I:' OPTION; do case $OPTION in h) usage @@ -385,6 +387,9 @@ while getopts 'hvqxkfScRn:d:s:r:?t:T:u:I:' OPTION; do R) RERUN="yes" ;; + m) + KMEMLEAK="yes" + ;; n) nfsfile=$OPTARG [ -f "$nfsfile" ] || fail "Cannot read file: $nfsfile" @@ -694,12 +699,14 @@ REPORT_FILE=$(mktemp_file zts-report) # # Run all the tests as specified. # -msg "${TEST_RUNNER} ${QUIET:+-q}" \ +msg "${TEST_RUNNER}" \ + "${QUIET:+-q}" \ + "${KMEMLEAK:+-m}" \ "-c \"${RUNFILES}\"" \ "-T \"${TAGS}\"" \ "-i \"${STF_SUITE}\"" \ "-I \"${ITERATIONS}\"" -${TEST_RUNNER} ${QUIET:+-q} \ +${TEST_RUNNER} ${QUIET:+-q} ${KMEMLEAK:+-m} \ -c "${RUNFILES}" \ -T "${TAGS}" \ -i "${STF_SUITE}" \ @@ -719,7 +726,7 @@ if [ "$RESULT" -eq "2" ] && [ -n "$RERUN" ]; then for test_name in $MAYBES; do grep "$test_name " "$TEMP_RESULTS_FILE" >>"$TEST_LIST" done - ${TEST_RUNNER} ${QUIET:+-q} \ + ${TEST_RUNNER} ${QUIET:+-q} ${KMEMLEAK:+-m} \ -c "${RUNFILES}" \ -T "${TAGS}" \ -i "${STF_SUITE}" \ diff --git a/tests/test-runner/bin/test-runner.py.in b/tests/test-runner/bin/test-runner.py.in index d32e05c453..b40b8de728 100755 --- a/tests/test-runner/bin/test-runner.py.in +++ b/tests/test-runner/bin/test-runner.py.in @@ -36,11 +36,13 @@ from pwd import getpwuid from select import select from subprocess import PIPE from subprocess import Popen +from subprocess import check_output from threading import Timer from time import time BASEDIR = '/var/tmp/test_results' TESTDIR = '/usr/share/zfs/' +KMEMLEAK_FILE = '/sys/kernel/debug/kmemleak' KILL = 'kill' TRUE = 'true' SUDO = 'sudo' @@ -83,6 +85,7 @@ class Result(object): self.runtime = '' self.stdout = [] self.stderr = [] + self.kmemleak = '' self.result = '' def done(self, proc, killed, reran): @@ -98,6 +101,9 @@ class Result(object): if killed: self.result = 'KILLED' Result.runresults['KILLED'] += 1 + elif len(self.kmemleak) > 0: + self.result = 'FAIL' + Result.runresults['FAIL'] += 1 elif self.returncode == 0: self.result = 'PASS' Result.runresults['PASS'] += 1 @@ -258,7 +264,7 @@ User: %s return out.lines, err.lines - def run(self, dryrun): + def run(self, dryrun, kmemleak): """ This is the main function that runs each individual test. Determine whether or not the command requires sudo, and modify it @@ -278,6 +284,11 @@ User: %s fail('%s' % e) self.result.starttime = monotonic_time() + + if kmemleak: + cmd = f'echo clear | {SUDO} tee {KMEMLEAK_FILE}' + check_output(cmd, shell=True) + proc = Popen(privcmd, stdout=PIPE, stderr=PIPE) # Allow a special timeout value of 0 to mean infinity if int(self.timeout) == 0: @@ -287,6 +298,12 @@ User: %s try: t.start() self.result.stdout, self.result.stderr = self.collect_output(proc) + + if kmemleak: + cmd = f'echo scan | {SUDO} tee {KMEMLEAK_FILE}' + check_output(cmd, shell=True) + cmd = f'{SUDO} cat {KMEMLEAK_FILE}' + self.result.kmemleak = check_output(cmd, shell=True) except KeyboardInterrupt: self.kill_cmd(proc, True) fail('\nRun terminated at user request.') @@ -363,6 +380,9 @@ User: %s with open(os.path.join(self.outputdir, 'merged'), 'wb') as merged: for _, line in lines: os.write(merged.fileno(), b'%s\n' % line) + if len(self.result.kmemleak): + with open(os.path.join(self.outputdir, 'kmemleak'), 'wb') as kmem: + kmem.write(self.result.kmemleak) class Test(Cmd): @@ -447,14 +467,14 @@ Tags: %s cont = True if len(pretest.pathname): - pretest.run(options.dryrun) + pretest.run(options.dryrun, False) cont = pretest.result.result == 'PASS' pretest.log(options) if cont: - test.run(options.dryrun) + test.run(options.dryrun, options.kmemleak) if test.result.result == 'KILLED' and len(failsafe.pathname): - failsafe.run(options.dryrun) + failsafe.run(options.dryrun, False) failsafe.log(options, suppress_console=True) else: test.skip() @@ -462,7 +482,7 @@ Tags: %s test.log(options) if len(posttest.pathname): - posttest.run(options.dryrun) + posttest.run(options.dryrun, False) posttest.log(options) @@ -565,7 +585,7 @@ Tags: %s cont = True if len(pretest.pathname): - pretest.run(options.dryrun) + pretest.run(options.dryrun, False) cont = pretest.result.result == 'PASS' pretest.log(options) @@ -578,9 +598,9 @@ Tags: %s failsafe = Cmd(self.failsafe, outputdir=odir, timeout=self.timeout, user=self.failsafe_user, identifier=self.identifier) if cont: - test.run(options.dryrun) + test.run(options.dryrun, options.kmemleak) if test.result.result == 'KILLED' and len(failsafe.pathname): - failsafe.run(options.dryrun) + failsafe.run(options.dryrun, False) failsafe.log(options, suppress_console=True) else: test.skip() @@ -588,7 +608,7 @@ Tags: %s test.log(options) if len(posttest.pathname): - posttest.run(options.dryrun) + posttest.run(options.dryrun, False) posttest.log(options) @@ -853,6 +873,11 @@ class TestRun(object): else: write_log('Could not make a symlink to directory %s\n' % self.outputdir, LOG_ERR) + + if options.kmemleak: + cmd = f'echo scan=0 | {SUDO} tee {KMEMLEAK_FILE}' + check_output(cmd, shell=True) + iteration = 0 while iteration < options.iterations: for test in sorted(self.tests.keys()): @@ -998,6 +1023,14 @@ def fail(retstr, ret=1): exit(ret) +def kmemleak_cb(option, opt_str, value, parser): + if not os.path.exists(KMEMLEAK_FILE): + fail(f"File '{KMEMLEAK_FILE}' doesn't exist. " + + "Enable CONFIG_DEBUG_KMEMLEAK in kernel configuration.") + + setattr(parser.values, option.dest, True) + + def options_cb(option, opt_str, value, parser): path_options = ['outputdir', 'template', 'testdir', 'logfile'] @@ -1035,6 +1068,9 @@ def parse_args(): parser.add_option('-i', action='callback', callback=options_cb, default=TESTDIR, dest='testdir', type='string', metavar='testdir', help='Specify a test directory.') + parser.add_option('-m', action='callback', callback=kmemleak_cb, + default=False, dest='kmemleak', + help='Enable kmemleak reporting (Linux only)') parser.add_option('-p', action='callback', callback=options_cb, default='', dest='pre', metavar='script', type='string', help='Specify a pre script.') diff --git a/tests/test-runner/man/test-runner.1 b/tests/test-runner/man/test-runner.1 index f7cbcbc5b9..b823aaa3e1 100644 --- a/tests/test-runner/man/test-runner.1 +++ b/tests/test-runner/man/test-runner.1 @@ -210,6 +210,8 @@ to be consumed by the run command. .It Fl d Dry run mode. Execute no tests, but print a description of each test that would have been run. +.It Fl m +Enable kmemleak reporting (Linux only) .It Fl g Create test groups from any directories found while searching for tests. .It Fl o Ar outputdir