Add tracepoints for taskq entry lifetime events
This adds some new DTRACE_PROBE* endpoints so that we can observe taskq latencies on a system. Additionally, a new "taskqlatency.bt" script is added to do this observation via "bpftrace". Lastly, a "zfs-trace.sh" script is added to wrap "bpftrace" with the proper options required to run and use "taskqlatency.bt". For example, with these changes in place, a user can run the following: $ cd ./contrib/bpftrace $ sudo ./zfs-trace.sh taskqlatency.bt Attaching 6 probes... ^C Here's some example output, showing latency information for time spent executing the taskq entry's function: @exec_lat_us[dp_sync_taskq, userquota_updates_task]: [2, 4) 5 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [4, 8) 0 | | [8, 16) 1 |@@@@@@@@@@ | [16, 32) 2 |@@@@@@@@@@@@@@@@@@@@ | @exec_lat_us[z_wr_int_h, zio_execute]: [8, 16) 16 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [16, 32) 2 |@@@@@@ | @exec_lat_us[z_wr_iss_h, zio_execute]: [16, 32) 4 |@@@@@@@@@@@@@@@@ | [32, 64) 13 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [64, 128) 1 |@@@@ | @exec_lat_us[z_ioctl_int, zio_execute]: [2, 4) 1 |@@@@ | [4, 8) 11 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | @exec_lat_us[dp_sync_taskq, sync_dnodes_task]: [2, 4) 1 |@@@@@@ | [4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [16, 32) 2 |@@@@@@@@@@@@@ | [32, 64) 4 |@@@@@@@@@@@@@@@@@@@@@@@@@@ | [64, 128) 1 |@@@@@@ | [128, 256) 0 | | [256, 512) 1 |@@@@@@ Here's some example output, showing latency information for time spent waiting on the taskq, prior to starting execution of entry's function: @queue_lat_us[dp_sync_taskq]: [2, 4) 1 |@@@@ | [4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [8, 16) 2 |@@@@@@@@ | [16, 32) 3 |@@@@@@@@@@@@@ | [32, 64) 12 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [64, 128) 6 |@@@@@@@@@@@@@@@@@@@@@@@@@@ | [128, 256) 0 | | [256, 512) 1 |@@@@ | @queue_lat_us[z_wr_iss]: [4, 8) 4 |@@@@ | [8, 16) 13 |@@@@@@@@@@@@@@@ | [16, 32) 6 |@@@@@@@ | [32, 64) 2 |@@ | [64, 128) 12 |@@@@@@@@@@@@@@ | [128, 256) 15 |@@@@@@@@@@@@@@@@@@ | [256, 512) 33 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [512, 1K) 27 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [1K, 2K) 7 |@@@@@@@@ | [2K, 4K) 14 |@@@@@@@@@@@@@@@@ | [4K, 8K) 14 |@@@@@@@@@@@@@@@@ | [8K, 16K) 23 |@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [16K, 32K) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| @queue_lat_us[z_wr_int]: [2, 4) 10 |@@@@@ | [4, 8) 71 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [8, 16) 88 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [16, 32) 50 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [32, 64) 65 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [64, 128) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@ | [128, 256) 19 |@@@@@@@@@@@ | [256, 512) 3 |@ | [512, 1K) 1 | | Reviewed by: Brad Lewis <brad.lewis@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Prakash Surya <prakash.surya@delphix.com> Closes #9525
This commit is contained in:
parent
e5d1c27e30
commit
ae38e00968
|
@ -86,6 +86,7 @@ AC_CONFIG_FILES([
|
||||||
cmd/zvol_wait/Makefile
|
cmd/zvol_wait/Makefile
|
||||||
contrib/Makefile
|
contrib/Makefile
|
||||||
contrib/bash_completion.d/Makefile
|
contrib/bash_completion.d/Makefile
|
||||||
|
contrib/bpftrace/Makefile
|
||||||
contrib/dracut/02zfsexpandknowledge/Makefile
|
contrib/dracut/02zfsexpandknowledge/Makefile
|
||||||
contrib/dracut/90zfs/Makefile
|
contrib/dracut/90zfs/Makefile
|
||||||
contrib/dracut/Makefile
|
contrib/dracut/Makefile
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
SUBDIRS = bash_completion.d dracut initramfs pyzfs zcp
|
SUBDIRS = bash_completion.d bpftrace dracut initramfs pyzfs zcp
|
||||||
DIST_SUBDIRS = bash_completion.d dracut initramfs pyzfs zcp
|
DIST_SUBDIRS = bash_completion.d bpftrace dracut initramfs pyzfs zcp
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
EXTRA_DIST = \
|
||||||
|
$(top_srcdir)/contrib/bpftrace/taskqlatency.bt \
|
||||||
|
$(top_srcdir)/contrib/bpftrace/zfs-trace.sh
|
|
@ -0,0 +1,54 @@
|
||||||
|
#include <sys/taskq.h>
|
||||||
|
|
||||||
|
kprobe:trace_zfs_taskq_ent__birth
|
||||||
|
{
|
||||||
|
$tqent = (struct taskq_ent *)arg0;
|
||||||
|
|
||||||
|
$tqent_id = $tqent->tqent_id;
|
||||||
|
$tq_name = str($tqent->tqent_taskq->tq_name);
|
||||||
|
|
||||||
|
@birth[$tq_name, $tqent_id] = nsecs;
|
||||||
|
}
|
||||||
|
|
||||||
|
kprobe:trace_zfs_taskq_ent__start
|
||||||
|
{
|
||||||
|
$tqent = (struct taskq_ent *)arg0;
|
||||||
|
|
||||||
|
@tqent_id[tid] = $tqent->tqent_id;
|
||||||
|
@tq_name[tid] = str($tqent->tqent_taskq->tq_name);
|
||||||
|
|
||||||
|
@start[@tq_name[tid], @tqent_id[tid]] = nsecs;
|
||||||
|
}
|
||||||
|
|
||||||
|
kprobe:trace_zfs_taskq_ent__start
|
||||||
|
/ @birth[@tq_name[tid], @tqent_id[tid]] /
|
||||||
|
{
|
||||||
|
@queue_lat_us[@tq_name[tid]] =
|
||||||
|
hist((nsecs - @birth[@tq_name[tid], @tqent_id[tid]])/1000);
|
||||||
|
delete(@birth[@tq_name[tid], @tqent_id[tid]]);
|
||||||
|
}
|
||||||
|
|
||||||
|
kprobe:trace_zfs_taskq_ent__finish
|
||||||
|
/ @start[@tq_name[tid], @tqent_id[tid]] /
|
||||||
|
{
|
||||||
|
$tqent = (struct taskq_ent *)arg0;
|
||||||
|
|
||||||
|
@exec_lat_us[@tq_name[tid], ksym($tqent->tqent_func)] =
|
||||||
|
hist((nsecs - @start[@tq_name[tid], @tqent_id[tid]])/1000);
|
||||||
|
delete(@start[@tq_name[tid], @tqent_id[tid]]);
|
||||||
|
}
|
||||||
|
|
||||||
|
kprobe:trace_zfs_taskq_ent__finish
|
||||||
|
{
|
||||||
|
delete(@tq_name[tid]);
|
||||||
|
delete(@tqent_id[tid]);
|
||||||
|
}
|
||||||
|
|
||||||
|
END
|
||||||
|
{
|
||||||
|
clear(@birth);
|
||||||
|
clear(@start);
|
||||||
|
|
||||||
|
clear(@tq_name);
|
||||||
|
clear(@tqent_id);
|
||||||
|
}
|
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
ZVER=$(cat /sys/module/zfs/version | cut -f 1 -d '-')
|
||||||
|
KVER=$(uname -r)
|
||||||
|
|
||||||
|
bpftrace \
|
||||||
|
--include "/usr/src/zfs-$ZVER/$KVER/zfs_config.h" \
|
||||||
|
-I "/usr/src/zfs-$ZVER/include" \
|
||||||
|
-I "/usr/src/zfs-$ZVER/include/spl" \
|
||||||
|
"$@"
|
|
@ -46,6 +46,7 @@ KERNEL_H = \
|
||||||
$(top_srcdir)/include/os/linux/spl/sys/timer.h \
|
$(top_srcdir)/include/os/linux/spl/sys/timer.h \
|
||||||
$(top_srcdir)/include/os/linux/spl/sys/trace.h \
|
$(top_srcdir)/include/os/linux/spl/sys/trace.h \
|
||||||
$(top_srcdir)/include/os/linux/spl/sys/trace_spl.h \
|
$(top_srcdir)/include/os/linux/spl/sys/trace_spl.h \
|
||||||
|
$(top_srcdir)/include/os/linux/spl/sys/trace_taskq.h \
|
||||||
$(top_srcdir)/include/os/linux/spl/sys/tsd.h \
|
$(top_srcdir)/include/os/linux/spl/sys/tsd.h \
|
||||||
$(top_srcdir)/include/os/linux/spl/sys/types32.h \
|
$(top_srcdir)/include/os/linux/spl/sys/types32.h \
|
||||||
$(top_srcdir)/include/os/linux/spl/sys/types.h \
|
$(top_srcdir)/include/os/linux/spl/sys/types.h \
|
||||||
|
|
|
@ -22,6 +22,9 @@
|
||||||
#ifndef _OS_LINUX_SPL_TRACE_H
|
#ifndef _OS_LINUX_SPL_TRACE_H
|
||||||
#define _OS_LINUX_SPL_TRACE_H
|
#define _OS_LINUX_SPL_TRACE_H
|
||||||
|
|
||||||
|
#include <sys/taskq.h>
|
||||||
|
|
||||||
#include <sys/trace.h>
|
#include <sys/trace.h>
|
||||||
|
#include <sys/trace_taskq.h>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -0,0 +1,89 @@
|
||||||
|
/*
|
||||||
|
* CDDL HEADER START
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the terms of the
|
||||||
|
* Common Development and Distribution License (the "License").
|
||||||
|
* You may not use this file except in compliance with the License.
|
||||||
|
*
|
||||||
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
* or http://www.opensolaris.org/os/licensing.
|
||||||
|
* See the License for the specific language governing permissions
|
||||||
|
* and limitations under the License.
|
||||||
|
*
|
||||||
|
* When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
* If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
*
|
||||||
|
* CDDL HEADER END
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if defined(_KERNEL)
|
||||||
|
#if defined(HAVE_DECLARE_EVENT_CLASS)
|
||||||
|
|
||||||
|
#undef TRACE_SYSTEM
|
||||||
|
#define TRACE_SYSTEM zfs
|
||||||
|
|
||||||
|
#undef TRACE_SYSTEM_VAR
|
||||||
|
#define TRACE_SYSTEM_VAR zfs_taskq
|
||||||
|
|
||||||
|
#if !defined(_TRACE_TASKQ_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||||
|
#define _TRACE_TASKQ_H
|
||||||
|
|
||||||
|
#include <linux/tracepoint.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Generic support for single argument tracepoints of the form:
|
||||||
|
*
|
||||||
|
* DTRACE_PROBE1(...,
|
||||||
|
* taskq_ent_t *, ...);
|
||||||
|
*/
|
||||||
|
/* BEGIN CSTYLED */
|
||||||
|
DECLARE_EVENT_CLASS(zfs_taskq_ent_class,
|
||||||
|
TP_PROTO(taskq_ent_t *taskq_ent),
|
||||||
|
TP_ARGS(taskq_ent),
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(taskq_ent_t *, taskq_ent)
|
||||||
|
),
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->taskq_ent = taskq_ent;
|
||||||
|
),
|
||||||
|
TP_printk("taskq_ent %p", __entry->taskq_ent)
|
||||||
|
);
|
||||||
|
/* END CSTYLED */
|
||||||
|
|
||||||
|
/* BEGIN CSTYLED */
|
||||||
|
#define DEFINE_TASKQ_EVENT(name) \
|
||||||
|
DEFINE_EVENT(zfs_taskq_ent_class, name, \
|
||||||
|
TP_PROTO(taskq_ent_t *taskq_ent), \
|
||||||
|
TP_ARGS(taskq_ent))
|
||||||
|
/* END CSTYLED */
|
||||||
|
DEFINE_TASKQ_EVENT(zfs_taskq_ent__birth);
|
||||||
|
DEFINE_TASKQ_EVENT(zfs_taskq_ent__start);
|
||||||
|
DEFINE_TASKQ_EVENT(zfs_taskq_ent__finish);
|
||||||
|
|
||||||
|
#endif /* _TRACE_TASKQ_H */
|
||||||
|
|
||||||
|
#undef TRACE_INCLUDE_PATH
|
||||||
|
#undef TRACE_INCLUDE_FILE
|
||||||
|
#define TRACE_INCLUDE_PATH sys
|
||||||
|
#define TRACE_INCLUDE_FILE trace_taskq
|
||||||
|
#include <trace/define_trace.h>
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When tracepoints are not available, a DEFINE_DTRACE_PROBE* macro is
|
||||||
|
* needed for each DTRACE_PROBE. These will be used to generate stub
|
||||||
|
* tracing functions and prototypes for those functions. See
|
||||||
|
* include/os/linux/spl/sys/trace.h.
|
||||||
|
*/
|
||||||
|
|
||||||
|
DEFINE_DTRACE_PROBE1(taskq_ent__birth);
|
||||||
|
DEFINE_DTRACE_PROBE1(taskq_ent__start);
|
||||||
|
DEFINE_DTRACE_PROBE1(taskq_ent__finish);
|
||||||
|
|
||||||
|
#endif /* HAVE_DECLARE_EVENT_CLASS */
|
||||||
|
#endif /* _KERNEL */
|
|
@ -28,6 +28,7 @@
|
||||||
#include <sys/taskq.h>
|
#include <sys/taskq.h>
|
||||||
#include <sys/kmem.h>
|
#include <sys/kmem.h>
|
||||||
#include <sys/tsd.h>
|
#include <sys/tsd.h>
|
||||||
|
#include <sys/trace_spl.h>
|
||||||
|
|
||||||
int spl_taskq_thread_bind = 0;
|
int spl_taskq_thread_bind = 0;
|
||||||
module_param(spl_taskq_thread_bind, int, 0644);
|
module_param(spl_taskq_thread_bind, int, 0644);
|
||||||
|
@ -223,6 +224,8 @@ task_expire_impl(taskq_ent_t *t)
|
||||||
}
|
}
|
||||||
|
|
||||||
t->tqent_birth = jiffies;
|
t->tqent_birth = jiffies;
|
||||||
|
DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The priority list must be maintained in strict task id order
|
* The priority list must be maintained in strict task id order
|
||||||
* from lowest to highest for lowest_id to be easily calculable.
|
* from lowest to highest for lowest_id to be easily calculable.
|
||||||
|
@ -593,7 +596,9 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
|
||||||
t->tqent_taskq = tq;
|
t->tqent_taskq = tq;
|
||||||
t->tqent_timer.function = NULL;
|
t->tqent_timer.function = NULL;
|
||||||
t->tqent_timer.expires = 0;
|
t->tqent_timer.expires = 0;
|
||||||
|
|
||||||
t->tqent_birth = jiffies;
|
t->tqent_birth = jiffies;
|
||||||
|
DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t);
|
||||||
|
|
||||||
ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
|
ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
|
||||||
|
|
||||||
|
@ -706,7 +711,9 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
|
||||||
t->tqent_func = func;
|
t->tqent_func = func;
|
||||||
t->tqent_arg = arg;
|
t->tqent_arg = arg;
|
||||||
t->tqent_taskq = tq;
|
t->tqent_taskq = tq;
|
||||||
|
|
||||||
t->tqent_birth = jiffies;
|
t->tqent_birth = jiffies;
|
||||||
|
DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t);
|
||||||
|
|
||||||
spin_unlock(&t->tqent_lock);
|
spin_unlock(&t->tqent_lock);
|
||||||
|
|
||||||
|
@ -906,15 +913,15 @@ taskq_thread(void *args)
|
||||||
* tqent_flags here.
|
* tqent_flags here.
|
||||||
*
|
*
|
||||||
* Also use an on stack taskq_ent_t for tqt_task
|
* Also use an on stack taskq_ent_t for tqt_task
|
||||||
* assignment in this case. We only populate the two
|
* assignment in this case; we want to make sure
|
||||||
* fields used by the only user in taskq proc file.
|
* to duplicate all fields, so the values are
|
||||||
|
* correct when it's accessed via DTRACE_PROBE*.
|
||||||
*/
|
*/
|
||||||
tqt->tqt_id = t->tqent_id;
|
tqt->tqt_id = t->tqent_id;
|
||||||
tqt->tqt_flags = t->tqent_flags;
|
tqt->tqt_flags = t->tqent_flags;
|
||||||
|
|
||||||
if (t->tqent_flags & TQENT_FLAG_PREALLOC) {
|
if (t->tqent_flags & TQENT_FLAG_PREALLOC) {
|
||||||
dup_task.tqent_func = t->tqent_func;
|
dup_task = *t;
|
||||||
dup_task.tqent_arg = t->tqent_arg;
|
|
||||||
t = &dup_task;
|
t = &dup_task;
|
||||||
}
|
}
|
||||||
tqt->tqt_task = t;
|
tqt->tqt_task = t;
|
||||||
|
@ -923,9 +930,13 @@ taskq_thread(void *args)
|
||||||
tq->tq_nactive++;
|
tq->tq_nactive++;
|
||||||
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
||||||
|
|
||||||
|
DTRACE_PROBE1(taskq_ent__start, taskq_ent_t *, t);
|
||||||
|
|
||||||
/* Perform the requested task */
|
/* Perform the requested task */
|
||||||
t->tqent_func(t->tqent_arg);
|
t->tqent_func(t->tqent_arg);
|
||||||
|
|
||||||
|
DTRACE_PROBE1(taskq_ent__finish, taskq_ent_t *, t);
|
||||||
|
|
||||||
spin_lock_irqsave_nested(&tq->tq_lock, flags,
|
spin_lock_irqsave_nested(&tq->tq_lock, flags,
|
||||||
tq->tq_lock_class);
|
tq->tq_lock_class);
|
||||||
tq->tq_nactive--;
|
tq->tq_nactive--;
|
||||||
|
|
|
@ -24,7 +24,10 @@
|
||||||
* source file, so this dummy file exists for that purpose.
|
* source file, so this dummy file exists for that purpose.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <sys/taskq.h>
|
||||||
|
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
#define CREATE_TRACE_POINTS
|
#define CREATE_TRACE_POINTS
|
||||||
#include <sys/trace.h>
|
#include <sys/trace.h>
|
||||||
|
#include <sys/trace_taskq.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue