2015-12-12 00:15:50 +00:00
|
|
|
/*
|
2010-05-17 22:18:00 +00:00
|
|
|
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
|
|
|
|
* Copyright (C) 2007 The Regents of the University of California.
|
|
|
|
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
|
|
|
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
|
2008-05-26 04:38:26 +00:00
|
|
|
* UCRL-CODE-235197
|
|
|
|
*
|
2010-05-17 22:18:00 +00:00
|
|
|
* This file is part of the SPL, Solaris Porting Layer.
|
2008-05-26 04:38:26 +00:00
|
|
|
*
|
2010-05-17 22:18:00 +00:00
|
|
|
* The SPL is free software; you can redistribute it and/or modify it
|
|
|
|
* under the terms of the GNU General Public License as published by the
|
|
|
|
* Free Software Foundation; either version 2 of the License, or (at your
|
|
|
|
* option) any later version.
|
|
|
|
*
|
|
|
|
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
2008-05-26 04:38:26 +00:00
|
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
|
|
* for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
2010-05-17 22:18:00 +00:00
|
|
|
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
2015-12-12 00:15:50 +00:00
|
|
|
*
|
2010-05-17 22:18:00 +00:00
|
|
|
* Solaris Porting Layer (SPL) Task Queue Implementation.
|
2015-12-12 00:15:50 +00:00
|
|
|
*/
|
2008-05-26 04:38:26 +00:00
|
|
|
|
2019-05-23 21:40:28 +00:00
|
|
|
#include <sys/timer.h>
|
2008-03-01 00:45:59 +00:00
|
|
|
#include <sys/taskq.h>
|
2008-08-11 22:13:47 +00:00
|
|
|
#include <sys/kmem.h>
|
2015-12-02 22:52:46 +00:00
|
|
|
#include <sys/tsd.h>
|
Add tracepoints for taskq entry lifetime events
This adds some new DTRACE_PROBE* endpoints so that we can observe taskq
latencies on a system. Additionally, a new "taskqlatency.bt" script is
added to do this observation via "bpftrace". Lastly, a "zfs-trace.sh"
script is added to wrap "bpftrace" with the proper options required to
run and use "taskqlatency.bt".
For example, with these changes in place, a user can run the following:
$ cd ./contrib/bpftrace
$ sudo ./zfs-trace.sh taskqlatency.bt
Attaching 6 probes...
^C
Here's some example output, showing latency information for time spent
executing the taskq entry's function:
@exec_lat_us[dp_sync_taskq, userquota_updates_task]:
[2, 4) 5 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[4, 8) 0 | |
[8, 16) 1 |@@@@@@@@@@ |
[16, 32) 2 |@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[z_wr_int_h, zio_execute]:
[8, 16) 16 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@ |
@exec_lat_us[z_wr_iss_h, zio_execute]:
[16, 32) 4 |@@@@@@@@@@@@@@@@ |
[32, 64) 13 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 1 |@@@@ |
@exec_lat_us[z_ioctl_int, zio_execute]:
[2, 4) 1 |@@@@ |
[4, 8) 11 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[dp_sync_taskq, sync_dnodes_task]:
[2, 4) 1 |@@@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@@@@@@@@ |
[32, 64) 4 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 1 |@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@@@
Here's some example output, showing latency information for time spent
waiting on the taskq, prior to starting execution of entry's function:
@queue_lat_us[dp_sync_taskq]:
[2, 4) 1 |@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 2 |@@@@@@@@ |
[16, 32) 3 |@@@@@@@@@@@@@ |
[32, 64) 12 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 6 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@ |
@queue_lat_us[z_wr_iss]:
[4, 8) 4 |@@@@ |
[8, 16) 13 |@@@@@@@@@@@@@@@ |
[16, 32) 6 |@@@@@@@ |
[32, 64) 2 |@@ |
[64, 128) 12 |@@@@@@@@@@@@@@ |
[128, 256) 15 |@@@@@@@@@@@@@@@@@@ |
[256, 512) 33 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[512, 1K) 27 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[1K, 2K) 7 |@@@@@@@@ |
[2K, 4K) 14 |@@@@@@@@@@@@@@@@ |
[4K, 8K) 14 |@@@@@@@@@@@@@@@@ |
[8K, 16K) 23 |@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[16K, 32K) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
@queue_lat_us[z_wr_int]:
[2, 4) 10 |@@@@@ |
[4, 8) 71 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 88 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 50 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[32, 64) 65 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 19 |@@@@@@@@@@@ |
[256, 512) 3 |@ |
[512, 1K) 1 | |
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Prakash Surya <prakash.surya@delphix.com>
Closes #9525
2019-10-24 21:13:41 +00:00
|
|
|
#include <sys/trace_spl.h>
|
2020-12-10 22:09:23 +00:00
|
|
|
#ifdef HAVE_CPU_HOTPLUG
|
|
|
|
#include <linux/cpuhotplug.h>
|
|
|
|
#endif
|
2008-04-21 17:29:47 +00:00
|
|
|
|
2022-01-14 23:37:55 +00:00
|
|
|
static int spl_taskq_thread_bind = 0;
|
2013-08-28 02:09:25 +00:00
|
|
|
module_param(spl_taskq_thread_bind, int, 0644);
|
|
|
|
MODULE_PARM_DESC(spl_taskq_thread_bind, "Bind taskq thread to CPU by default");
|
|
|
|
|
2023-06-26 20:57:12 +00:00
|
|
|
static uint_t spl_taskq_thread_timeout_ms = 10000;
|
|
|
|
/* BEGIN CSTYLED */
|
|
|
|
module_param(spl_taskq_thread_timeout_ms, uint, 0644);
|
|
|
|
/* END CSTYLED */
|
|
|
|
MODULE_PARM_DESC(spl_taskq_thread_timeout_ms,
|
|
|
|
"Time to require a dynamic thread be idle before it gets cleaned up");
|
2015-06-08 21:36:27 +00:00
|
|
|
|
2022-01-14 23:37:55 +00:00
|
|
|
static int spl_taskq_thread_dynamic = 1;
|
2020-12-10 22:09:23 +00:00
|
|
|
module_param(spl_taskq_thread_dynamic, int, 0444);
|
2015-06-08 21:36:27 +00:00
|
|
|
MODULE_PARM_DESC(spl_taskq_thread_dynamic, "Allow dynamic taskq threads");
|
|
|
|
|
2022-01-14 23:37:55 +00:00
|
|
|
static int spl_taskq_thread_priority = 1;
|
2015-07-23 18:21:08 +00:00
|
|
|
module_param(spl_taskq_thread_priority, int, 0644);
|
|
|
|
MODULE_PARM_DESC(spl_taskq_thread_priority,
|
2015-12-12 00:15:50 +00:00
|
|
|
"Allow non-default priority for taskq threads");
|
2015-07-23 18:21:08 +00:00
|
|
|
|
Cleanup: Specify unsignedness on things that should not be signed
In #13871, zfs_vdev_aggregation_limit_non_rotating and
zfs_vdev_aggregation_limit being signed was pointed out as a possible
reason not to eliminate an unnecessary MAX(unsigned, 0) since the
unsigned value was assigned from them.
There is no reason for these module parameters to be signed and upon
inspection, it was found that there are a number of other module
parameters that are signed, but should not be, so we make them unsigned.
Making them unsigned made it clear that some other variables in the code
should also be unsigned, so we also make those unsigned. This prevents
users from setting negative values that could potentially cause bad
behaviors. It also makes the code slightly easier to understand.
Mostly module parameters that deal with timeouts, limits, bitshifts and
percentages are made unsigned by this. Any that are boolean are left
signed, since whether booleans should be considered signed or unsigned
does not matter.
Making zfs_arc_lotsfree_percent unsigned caused a
`zfs_arc_lotsfree_percent >= 0` check to become redundant, so it was
removed. Removing the check was also necessary to prevent a compiler
error from -Werror=type-limits.
Several end of line comments had to be moved to their own lines because
replacing int with uint_t caused us to exceed the 80 character limit
enforced by cstyle.pl.
The following were kept signed because they are passed to
taskq_create(), which expects signed values and modifying the
OpenSolaris/Illumos DDI is out of scope of this patch:
* metaslab_load_pct
* zfs_sync_taskq_batch_pct
* zfs_zil_clean_taskq_nthr_pct
* zfs_zil_clean_taskq_minalloc
* zfs_zil_clean_taskq_maxalloc
* zfs_arc_prune_task_threads
Also, negative values in those parameters was found to be harmless.
The following were left signed because either negative values make
sense, or more analysis was needed to determine whether negative values
should be disallowed:
* zfs_metaslab_switch_threshold
* zfs_pd_bytes_max
* zfs_livelist_min_percent_shared
zfs_multihost_history was made static to be consistent with other
parameters.
A number of module parameters were marked as signed, but in reality
referenced unsigned variables. upgrade_errlog_limit is one of the
numerous examples. In the case of zfs_vdev_async_read_max_active, it was
already uint32_t, but zdb had an extern int declaration for it.
Interestingly, the documentation in zfs.4 was right for
upgrade_errlog_limit despite the module parameter being wrongly marked,
while the documentation for zfs_vdev_async_read_max_active (and friends)
was wrong. It was also wrong for zstd_abort_size, which was unsigned,
but was documented as signed.
Also, the documentation in zfs.4 incorrectly described the following
parameters as ulong when they were int:
* zfs_arc_meta_adjust_restarts
* zfs_override_estimate_recordsize
They are now uint_t as of this patch and thus the man page has been
updated to describe them as uint.
dbuf_state_index was left alone since it does nothing and perhaps should
be removed in another patch.
If any module parameters were missed, they were not found by `grep -r
'ZFS_MODULE_PARAM' | grep ', INT'`. I did find a few that grep missed,
but only because they were in files that had hits.
This patch intentionally did not attempt to address whether some of
these module parameters should be elevated to 64-bit parameters, because
the length of a long on 32-bit is 32-bit.
Lastly, it was pointed out during review that uint_t is a better match
for these variables than uint32_t because FreeBSD kernel parameter
definitions are designed for uint_t, whose bit width can change in
future memory models. As a result, we change the existing parameters
that are uint32_t to use uint_t.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Neal Gompa <ngompa@datto.com>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13875
2022-09-27 23:42:41 +00:00
|
|
|
static uint_t spl_taskq_thread_sequential = 4;
|
|
|
|
/* BEGIN CSTYLED */
|
|
|
|
module_param(spl_taskq_thread_sequential, uint, 0644);
|
|
|
|
/* END CSTYLED */
|
2015-06-08 21:36:27 +00:00
|
|
|
MODULE_PARM_DESC(spl_taskq_thread_sequential,
|
2015-12-12 00:15:50 +00:00
|
|
|
"Create new taskq threads after N sequential tasks");
|
2015-06-08 21:36:27 +00:00
|
|
|
|
2021-11-19 17:02:45 +00:00
|
|
|
/*
|
|
|
|
* Global system-wide dynamic task queue available for all consumers. This
|
|
|
|
* taskq is not intended for long-running tasks; instead, a dedicated taskq
|
|
|
|
* should be created.
|
|
|
|
*/
|
2009-01-05 23:08:03 +00:00
|
|
|
taskq_t *system_taskq;
|
|
|
|
EXPORT_SYMBOL(system_taskq);
|
2016-12-08 21:00:20 +00:00
|
|
|
/* Global dynamic task queue for long delay */
|
|
|
|
taskq_t *system_delay_taskq;
|
|
|
|
EXPORT_SYMBOL(system_delay_taskq);
|
2009-01-05 23:08:03 +00:00
|
|
|
|
2015-06-08 21:36:27 +00:00
|
|
|
/* Private dedicated taskq for creating new taskq threads on demand. */
|
|
|
|
static taskq_t *dynamic_taskq;
|
|
|
|
static taskq_thread_t *taskq_thread_create(taskq_t *);
|
|
|
|
|
2020-12-10 22:09:23 +00:00
|
|
|
#ifdef HAVE_CPU_HOTPLUG
|
|
|
|
/* Multi-callback id for cpu hotplugging. */
|
|
|
|
static int spl_taskq_cpuhp_state;
|
|
|
|
#endif
|
|
|
|
|
2015-10-19 12:47:52 +00:00
|
|
|
/* List of all taskqs */
|
|
|
|
LIST_HEAD(tq_list);
|
2018-02-16 01:53:18 +00:00
|
|
|
struct rw_semaphore tq_list_sem;
|
2015-12-02 22:52:46 +00:00
|
|
|
static uint_t taskq_tsd;
|
2015-10-19 12:47:52 +00:00
|
|
|
|
2012-09-12 18:31:39 +00:00
|
|
|
static int
|
|
|
|
task_km_flags(uint_t flags)
|
|
|
|
{
|
|
|
|
if (flags & TQ_NOSLEEP)
|
2015-12-12 00:15:50 +00:00
|
|
|
return (KM_NOSLEEP);
|
2012-09-12 18:31:39 +00:00
|
|
|
|
|
|
|
if (flags & TQ_PUSHPAGE)
|
2015-12-12 00:15:50 +00:00
|
|
|
return (KM_PUSHPAGE);
|
2012-09-12 18:31:39 +00:00
|
|
|
|
2015-12-12 00:15:50 +00:00
|
|
|
return (KM_SLEEP);
|
2012-09-12 18:31:39 +00:00
|
|
|
}
|
|
|
|
|
2015-10-19 12:47:52 +00:00
|
|
|
/*
|
|
|
|
* taskq_find_by_name - Find the largest instance number of a named taskq.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
taskq_find_by_name(const char *name)
|
|
|
|
{
|
2019-12-14 00:07:48 +00:00
|
|
|
struct list_head *tql = NULL;
|
2015-10-19 12:47:52 +00:00
|
|
|
taskq_t *tq;
|
|
|
|
|
|
|
|
list_for_each_prev(tql, &tq_list) {
|
|
|
|
tq = list_entry(tql, taskq_t, tq_taskqs);
|
|
|
|
if (strcmp(name, tq->tq_name) == 0)
|
2018-02-07 19:49:38 +00:00
|
|
|
return (tq->tq_instance);
|
2015-10-19 12:47:52 +00:00
|
|
|
}
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
2010-01-04 23:52:26 +00:00
|
|
|
/*
|
|
|
|
* NOTE: Must be called with tq->tq_lock held, returns a list_t which
|
2008-04-25 22:10:47 +00:00
|
|
|
* is not attached to the free, work, or pending taskq lists.
|
2008-02-26 20:36:04 +00:00
|
|
|
*/
|
2011-11-11 23:06:35 +00:00
|
|
|
static taskq_ent_t *
|
2015-12-03 23:06:03 +00:00
|
|
|
task_alloc(taskq_t *tq, uint_t flags, unsigned long *irqflags)
|
2008-04-25 22:10:47 +00:00
|
|
|
{
|
2012-12-06 20:57:42 +00:00
|
|
|
taskq_ent_t *t;
|
|
|
|
int count = 0;
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2012-12-06 20:57:42 +00:00
|
|
|
ASSERT(tq);
|
2008-04-25 22:10:47 +00:00
|
|
|
retry:
|
2012-12-06 20:57:42 +00:00
|
|
|
/* Acquire taskq_ent_t's from free list if available */
|
|
|
|
if (!list_empty(&tq->tq_free_list) && !(flags & TQ_NEW)) {
|
|
|
|
t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
|
|
|
|
|
|
|
|
ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
|
2012-12-06 20:38:19 +00:00
|
|
|
ASSERT(!(t->tqent_flags & TQENT_FLAG_CANCEL));
|
|
|
|
ASSERT(!timer_pending(&t->tqent_timer));
|
2012-12-06 20:57:42 +00:00
|
|
|
|
|
|
|
list_del_init(&t->tqent_list);
|
2014-11-05 22:30:35 +00:00
|
|
|
return (t);
|
2012-12-06 20:57:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Free list is empty and memory allocations are prohibited */
|
|
|
|
if (flags & TQ_NOALLOC)
|
2014-11-05 22:30:35 +00:00
|
|
|
return (NULL);
|
2012-12-06 20:57:42 +00:00
|
|
|
|
|
|
|
/* Hit maximum taskq_ent_t pool size */
|
|
|
|
if (tq->tq_nalloc >= tq->tq_maxalloc) {
|
|
|
|
if (flags & TQ_NOSLEEP)
|
2014-11-05 22:30:35 +00:00
|
|
|
return (NULL);
|
2012-12-06 20:57:42 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Sleep periodically polling the free list for an available
|
|
|
|
* taskq_ent_t. Dispatching with TQ_SLEEP should always succeed
|
|
|
|
* but we cannot block forever waiting for an taskq_ent_t to
|
|
|
|
* show up in the free list, otherwise a deadlock can happen.
|
|
|
|
*
|
|
|
|
* Therefore, we need to allocate a new task even if the number
|
|
|
|
* of allocated tasks is above tq->tq_maxalloc, but we still
|
|
|
|
* end up delaying the task allocation by one second, thereby
|
|
|
|
* throttling the task dispatch rate.
|
|
|
|
*/
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, *irqflags);
|
2012-12-06 20:57:42 +00:00
|
|
|
schedule_timeout(HZ / 100);
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, *irqflags,
|
2015-10-13 23:56:51 +00:00
|
|
|
tq->tq_lock_class);
|
2014-11-05 22:30:35 +00:00
|
|
|
if (count < 100) {
|
|
|
|
count++;
|
|
|
|
goto retry;
|
|
|
|
}
|
2012-12-06 20:57:42 +00:00
|
|
|
}
|
|
|
|
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, *irqflags);
|
2015-12-12 00:15:50 +00:00
|
|
|
t = kmem_alloc(sizeof (taskq_ent_t), task_km_flags(flags));
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, *irqflags, tq->tq_lock_class);
|
2012-12-06 20:57:42 +00:00
|
|
|
|
|
|
|
if (t) {
|
|
|
|
taskq_init_ent(t);
|
|
|
|
tq->tq_nalloc++;
|
|
|
|
}
|
|
|
|
|
2014-11-05 22:30:35 +00:00
|
|
|
return (t);
|
2008-04-25 22:10:47 +00:00
|
|
|
}
|
|
|
|
|
2010-01-04 23:52:26 +00:00
|
|
|
/*
|
2011-11-11 23:06:35 +00:00
|
|
|
* NOTE: Must be called with tq->tq_lock held, expects the taskq_ent_t
|
2008-04-25 22:10:47 +00:00
|
|
|
* to already be removed from the free, work, or pending taskq lists.
|
|
|
|
*/
|
|
|
|
static void
|
2011-11-11 23:06:35 +00:00
|
|
|
task_free(taskq_t *tq, taskq_ent_t *t)
|
2008-04-25 22:10:47 +00:00
|
|
|
{
|
2012-12-06 20:57:42 +00:00
|
|
|
ASSERT(tq);
|
|
|
|
ASSERT(t);
|
2011-11-11 23:06:35 +00:00
|
|
|
ASSERT(list_empty(&t->tqent_list));
|
2012-12-06 20:38:19 +00:00
|
|
|
ASSERT(!timer_pending(&t->tqent_timer));
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2015-12-12 00:15:50 +00:00
|
|
|
kmem_free(t, sizeof (taskq_ent_t));
|
2012-12-06 20:57:42 +00:00
|
|
|
tq->tq_nalloc--;
|
2008-04-25 22:10:47 +00:00
|
|
|
}
|
|
|
|
|
2010-01-04 23:52:26 +00:00
|
|
|
/*
|
|
|
|
* NOTE: Must be called with tq->tq_lock held, either destroys the
|
2011-11-11 23:06:35 +00:00
|
|
|
* taskq_ent_t if too many exist or moves it to the free list for later use.
|
2008-04-25 22:10:47 +00:00
|
|
|
*/
|
2008-02-26 20:36:04 +00:00
|
|
|
static void
|
2011-11-11 23:06:35 +00:00
|
|
|
task_done(taskq_t *tq, taskq_ent_t *t)
|
2008-02-26 20:36:04 +00:00
|
|
|
{
|
2008-04-25 22:10:47 +00:00
|
|
|
ASSERT(tq);
|
|
|
|
ASSERT(t);
|
|
|
|
|
2012-12-06 20:38:19 +00:00
|
|
|
/* Wake tasks blocked in taskq_wait_id() */
|
|
|
|
wake_up_all(&t->tqent_waitq);
|
|
|
|
|
2011-11-11 23:06:35 +00:00
|
|
|
list_del_init(&t->tqent_list);
|
2008-02-26 20:36:04 +00:00
|
|
|
|
2012-12-06 20:57:42 +00:00
|
|
|
if (tq->tq_nalloc <= tq->tq_minalloc) {
|
2016-10-28 21:23:30 +00:00
|
|
|
t->tqent_id = TASKQID_INVALID;
|
2011-11-11 23:06:35 +00:00
|
|
|
t->tqent_func = NULL;
|
|
|
|
t->tqent_arg = NULL;
|
2011-12-06 18:04:51 +00:00
|
|
|
t->tqent_flags = 0;
|
Store copy of tqent_flags prior to servicing task
A preallocated taskq_ent_t's tqent_flags must be checked prior to
servicing the taskq_ent_t. Once a preallocated taskq entry is serviced,
the ownership of the entry is handed back to the caller of
taskq_dispatch, thus the entry's contents can potentially be mangled.
In particular, this is a problem in the case where a preallocated taskq
entry is serviced, and the caller clears it's tqent_flags field. Thus,
when the function returns and task_done is called, it looks as though
the entry is **not** a preallocated task (when in fact it **is** a
preallocated task).
In this situation, task_done will place the preallocated taskq_ent_t
structure onto the taskq_t's free list. This is a **huge** mistake. If
the taskq_ent_t is then freed by the caller of taskq_dispatch, the
taskq_t's free list will hold a pointer to garbage data. Even worse, if
nothing has over written the freed memory before the pointer is
dereferenced, it may still look as though it points to a valid list_head
belonging to a taskq_ent_t structure.
Thus, the task entry's flags are now copied prior to servicing the task.
This copy is then checked to see if it is a preallocated task, and
determine if the entry needs to be passed down to the task_done
function.
Signed-off-by: Prakash Surya <surya1@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #71
2011-12-16 22:57:31 +00:00
|
|
|
|
2012-12-06 20:57:42 +00:00
|
|
|
list_add_tail(&t->tqent_list, &tq->tq_free_list);
|
2008-04-25 22:10:47 +00:00
|
|
|
} else {
|
|
|
|
task_free(tq, t);
|
|
|
|
}
|
2008-02-26 20:36:04 +00:00
|
|
|
}
|
|
|
|
|
2010-01-04 23:52:26 +00:00
|
|
|
/*
|
2012-12-06 20:38:19 +00:00
|
|
|
* When a delayed task timer expires remove it from the delay list and
|
|
|
|
* add it to the priority list in order for immediate processing.
|
2008-04-25 22:10:47 +00:00
|
|
|
*/
|
2012-12-06 20:38:19 +00:00
|
|
|
static void
|
2017-12-21 18:56:32 +00:00
|
|
|
task_expire_impl(taskq_ent_t *t)
|
2008-04-25 22:10:47 +00:00
|
|
|
{
|
2017-12-21 18:56:32 +00:00
|
|
|
taskq_ent_t *w;
|
2012-12-06 20:38:19 +00:00
|
|
|
taskq_t *tq = t->tqent_taskq;
|
2019-12-14 00:07:48 +00:00
|
|
|
struct list_head *l = NULL;
|
2015-12-03 23:06:03 +00:00
|
|
|
unsigned long flags;
|
2009-03-15 22:13:49 +00:00
|
|
|
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
|
2012-12-06 20:38:19 +00:00
|
|
|
|
|
|
|
if (t->tqent_flags & TQENT_FLAG_CANCEL) {
|
|
|
|
ASSERT(list_empty(&t->tqent_list));
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
2012-12-06 20:38:19 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-01-28 00:55:14 +00:00
|
|
|
t->tqent_birth = jiffies;
|
Add tracepoints for taskq entry lifetime events
This adds some new DTRACE_PROBE* endpoints so that we can observe taskq
latencies on a system. Additionally, a new "taskqlatency.bt" script is
added to do this observation via "bpftrace". Lastly, a "zfs-trace.sh"
script is added to wrap "bpftrace" with the proper options required to
run and use "taskqlatency.bt".
For example, with these changes in place, a user can run the following:
$ cd ./contrib/bpftrace
$ sudo ./zfs-trace.sh taskqlatency.bt
Attaching 6 probes...
^C
Here's some example output, showing latency information for time spent
executing the taskq entry's function:
@exec_lat_us[dp_sync_taskq, userquota_updates_task]:
[2, 4) 5 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[4, 8) 0 | |
[8, 16) 1 |@@@@@@@@@@ |
[16, 32) 2 |@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[z_wr_int_h, zio_execute]:
[8, 16) 16 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@ |
@exec_lat_us[z_wr_iss_h, zio_execute]:
[16, 32) 4 |@@@@@@@@@@@@@@@@ |
[32, 64) 13 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 1 |@@@@ |
@exec_lat_us[z_ioctl_int, zio_execute]:
[2, 4) 1 |@@@@ |
[4, 8) 11 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[dp_sync_taskq, sync_dnodes_task]:
[2, 4) 1 |@@@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@@@@@@@@ |
[32, 64) 4 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 1 |@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@@@
Here's some example output, showing latency information for time spent
waiting on the taskq, prior to starting execution of entry's function:
@queue_lat_us[dp_sync_taskq]:
[2, 4) 1 |@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 2 |@@@@@@@@ |
[16, 32) 3 |@@@@@@@@@@@@@ |
[32, 64) 12 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 6 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@ |
@queue_lat_us[z_wr_iss]:
[4, 8) 4 |@@@@ |
[8, 16) 13 |@@@@@@@@@@@@@@@ |
[16, 32) 6 |@@@@@@@ |
[32, 64) 2 |@@ |
[64, 128) 12 |@@@@@@@@@@@@@@ |
[128, 256) 15 |@@@@@@@@@@@@@@@@@@ |
[256, 512) 33 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[512, 1K) 27 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[1K, 2K) 7 |@@@@@@@@ |
[2K, 4K) 14 |@@@@@@@@@@@@@@@@ |
[4K, 8K) 14 |@@@@@@@@@@@@@@@@ |
[8K, 16K) 23 |@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[16K, 32K) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
@queue_lat_us[z_wr_int]:
[2, 4) 10 |@@@@@ |
[4, 8) 71 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 88 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 50 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[32, 64) 65 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 19 |@@@@@@@@@@@ |
[256, 512) 3 |@ |
[512, 1K) 1 | |
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Prakash Surya <prakash.surya@delphix.com>
Closes #9525
2019-10-24 21:13:41 +00:00
|
|
|
DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t);
|
|
|
|
|
2012-12-06 20:38:19 +00:00
|
|
|
/*
|
|
|
|
* The priority list must be maintained in strict task id order
|
|
|
|
* from lowest to highest for lowest_id to be easily calculable.
|
|
|
|
*/
|
|
|
|
list_del(&t->tqent_list);
|
|
|
|
list_for_each_prev(l, &tq->tq_prio_list) {
|
|
|
|
w = list_entry(l, taskq_ent_t, tqent_list);
|
|
|
|
if (w->tqent_id < t->tqent_id) {
|
|
|
|
list_add(&t->tqent_list, l);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (l == &tq->tq_prio_list)
|
|
|
|
list_add(&t->tqent_list, &tq->tq_prio_list);
|
|
|
|
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
2009-03-15 22:13:49 +00:00
|
|
|
|
2012-12-06 20:38:19 +00:00
|
|
|
wake_up(&tq->tq_work_waitq);
|
|
|
|
}
|
|
|
|
|
2017-12-21 18:56:32 +00:00
|
|
|
static void
|
2019-05-23 21:40:28 +00:00
|
|
|
task_expire(spl_timer_list_t tl)
|
2017-12-21 18:56:32 +00:00
|
|
|
{
|
2019-05-23 21:40:28 +00:00
|
|
|
struct timer_list *tmr = (struct timer_list *)tl;
|
|
|
|
taskq_ent_t *t = from_timer(t, tmr, tqent_timer);
|
2017-12-21 18:56:32 +00:00
|
|
|
task_expire_impl(t);
|
|
|
|
}
|
|
|
|
|
2012-12-06 20:38:19 +00:00
|
|
|
/*
|
|
|
|
* Returns the lowest incomplete taskqid_t. The taskqid_t may
|
|
|
|
* be queued on the pending list, on the priority list, on the
|
|
|
|
* delay list, or on the work list currently being handled, but
|
|
|
|
* it is not 100% complete yet.
|
|
|
|
*/
|
|
|
|
static taskqid_t
|
|
|
|
taskq_lowest_id(taskq_t *tq)
|
|
|
|
{
|
|
|
|
taskqid_t lowest_id = tq->tq_next_id;
|
|
|
|
taskq_ent_t *t;
|
|
|
|
taskq_thread_t *tqt;
|
|
|
|
|
|
|
|
if (!list_empty(&tq->tq_pend_list)) {
|
|
|
|
t = list_entry(tq->tq_pend_list.next, taskq_ent_t, tqent_list);
|
|
|
|
lowest_id = MIN(lowest_id, t->tqent_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!list_empty(&tq->tq_prio_list)) {
|
|
|
|
t = list_entry(tq->tq_prio_list.next, taskq_ent_t, tqent_list);
|
|
|
|
lowest_id = MIN(lowest_id, t->tqent_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!list_empty(&tq->tq_delay_list)) {
|
|
|
|
t = list_entry(tq->tq_delay_list.next, taskq_ent_t, tqent_list);
|
|
|
|
lowest_id = MIN(lowest_id, t->tqent_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!list_empty(&tq->tq_active_list)) {
|
|
|
|
tqt = list_entry(tq->tq_active_list.next, taskq_thread_t,
|
|
|
|
tqt_active_list);
|
2016-10-28 21:23:30 +00:00
|
|
|
ASSERT(tqt->tqt_id != TASKQID_INVALID);
|
2012-12-06 20:38:19 +00:00
|
|
|
lowest_id = MIN(lowest_id, tqt->tqt_id);
|
|
|
|
}
|
|
|
|
|
2014-11-05 22:30:35 +00:00
|
|
|
return (lowest_id);
|
2012-12-06 20:38:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Insert a task into a list keeping the list sorted by increasing taskqid.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
taskq_insert_in_order(taskq_t *tq, taskq_thread_t *tqt)
|
|
|
|
{
|
|
|
|
taskq_thread_t *w;
|
2019-12-14 00:07:48 +00:00
|
|
|
struct list_head *l = NULL;
|
2012-12-06 20:38:19 +00:00
|
|
|
|
|
|
|
ASSERT(tq);
|
|
|
|
ASSERT(tqt);
|
|
|
|
|
|
|
|
list_for_each_prev(l, &tq->tq_active_list) {
|
|
|
|
w = list_entry(l, taskq_thread_t, tqt_active_list);
|
|
|
|
if (w->tqt_id < tqt->tqt_id) {
|
|
|
|
list_add(&tqt->tqt_active_list, l);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (l == &tq->tq_active_list)
|
|
|
|
list_add(&tqt->tqt_active_list, &tq->tq_active_list);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Find and return a task from the given list if it exists. The list
|
|
|
|
* must be in lowest to highest task id order.
|
|
|
|
*/
|
|
|
|
static taskq_ent_t *
|
|
|
|
taskq_find_list(taskq_t *tq, struct list_head *lh, taskqid_t id)
|
|
|
|
{
|
2019-12-14 00:07:48 +00:00
|
|
|
struct list_head *l = NULL;
|
2012-12-06 20:38:19 +00:00
|
|
|
taskq_ent_t *t;
|
|
|
|
|
|
|
|
list_for_each(l, lh) {
|
|
|
|
t = list_entry(l, taskq_ent_t, tqent_list);
|
|
|
|
|
|
|
|
if (t->tqent_id == id)
|
2014-11-05 22:30:35 +00:00
|
|
|
return (t);
|
2012-12-06 20:38:19 +00:00
|
|
|
|
|
|
|
if (t->tqent_id > id)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2014-11-05 22:30:35 +00:00
|
|
|
return (NULL);
|
2008-04-25 22:10:47 +00:00
|
|
|
}
|
|
|
|
|
2012-12-06 20:38:19 +00:00
|
|
|
/*
|
|
|
|
* Find an already dispatched task given the task id regardless of what
|
2017-08-04 16:57:58 +00:00
|
|
|
* state it is in. If a task is still pending it will be returned.
|
|
|
|
* If a task is executing, then -EBUSY will be returned instead.
|
|
|
|
* If the task has already been run then NULL is returned.
|
2012-12-06 20:38:19 +00:00
|
|
|
*/
|
|
|
|
static taskq_ent_t *
|
2017-08-04 16:57:58 +00:00
|
|
|
taskq_find(taskq_t *tq, taskqid_t id)
|
2012-12-06 20:38:19 +00:00
|
|
|
{
|
|
|
|
taskq_thread_t *tqt;
|
2019-12-14 00:07:48 +00:00
|
|
|
struct list_head *l = NULL;
|
2012-12-06 20:38:19 +00:00
|
|
|
taskq_ent_t *t;
|
|
|
|
|
|
|
|
t = taskq_find_list(tq, &tq->tq_delay_list, id);
|
|
|
|
if (t)
|
2014-11-05 22:30:35 +00:00
|
|
|
return (t);
|
2012-12-06 20:38:19 +00:00
|
|
|
|
|
|
|
t = taskq_find_list(tq, &tq->tq_prio_list, id);
|
|
|
|
if (t)
|
2014-11-05 22:30:35 +00:00
|
|
|
return (t);
|
2012-12-06 20:38:19 +00:00
|
|
|
|
|
|
|
t = taskq_find_list(tq, &tq->tq_pend_list, id);
|
|
|
|
if (t)
|
2014-11-05 22:30:35 +00:00
|
|
|
return (t);
|
2012-12-06 20:38:19 +00:00
|
|
|
|
|
|
|
list_for_each(l, &tq->tq_active_list) {
|
|
|
|
tqt = list_entry(l, taskq_thread_t, tqt_active_list);
|
|
|
|
if (tqt->tqt_id == id) {
|
2017-08-04 16:57:58 +00:00
|
|
|
/*
|
|
|
|
* Instead of returning tqt_task, we just return a non
|
|
|
|
* NULL value to prevent misuse, since tqt_task only
|
|
|
|
* has two valid fields.
|
|
|
|
*/
|
|
|
|
return (ERR_PTR(-EBUSY));
|
2012-12-06 20:38:19 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-05 22:30:35 +00:00
|
|
|
return (NULL);
|
2012-12-06 20:38:19 +00:00
|
|
|
}
|
|
|
|
|
2015-05-14 19:26:51 +00:00
|
|
|
/*
|
|
|
|
* Theory for the taskq_wait_id(), taskq_wait_outstanding(), and
|
|
|
|
* taskq_wait() functions below.
|
|
|
|
*
|
|
|
|
* Taskq waiting is accomplished by tracking the lowest outstanding task
|
|
|
|
* id and the next available task id. As tasks are dispatched they are
|
|
|
|
* added to the tail of the pending, priority, or delay lists. As worker
|
|
|
|
* threads become available the tasks are removed from the heads of these
|
|
|
|
* lists and linked to the worker threads. This ensures the lists are
|
|
|
|
* kept sorted by lowest to highest task id.
|
|
|
|
*
|
|
|
|
* Therefore the lowest outstanding task id can be quickly determined by
|
|
|
|
* checking the head item from all of these lists. This value is stored
|
|
|
|
* with the taskq as the lowest id. It only needs to be recalculated when
|
|
|
|
* either the task with the current lowest id completes or is canceled.
|
|
|
|
*
|
|
|
|
* By blocking until the lowest task id exceeds the passed task id the
|
|
|
|
* taskq_wait_outstanding() function can be easily implemented. Similarly,
|
|
|
|
* by blocking until the lowest task id matches the next task id taskq_wait()
|
|
|
|
* can be implemented.
|
|
|
|
*
|
|
|
|
* Callers should be aware that when there are multiple worked threads it
|
|
|
|
* is possible for larger task ids to complete before smaller ones. Also
|
|
|
|
* when the taskq contains delay tasks with small task ids callers may
|
|
|
|
* block for a considerable length of time waiting for them to expire and
|
|
|
|
* execute.
|
|
|
|
*/
|
2013-04-29 20:47:59 +00:00
|
|
|
static int
|
|
|
|
taskq_wait_id_check(taskq_t *tq, taskqid_t id)
|
2008-02-26 20:36:04 +00:00
|
|
|
{
|
2013-04-29 20:47:59 +00:00
|
|
|
int rc;
|
2015-12-03 23:06:03 +00:00
|
|
|
unsigned long flags;
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
|
2017-08-04 16:57:58 +00:00
|
|
|
rc = (taskq_find(tq, id) == NULL);
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
2012-12-06 20:38:19 +00:00
|
|
|
|
2013-04-29 20:47:59 +00:00
|
|
|
return (rc);
|
|
|
|
}
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2013-04-29 20:47:59 +00:00
|
|
|
/*
|
|
|
|
* The taskq_wait_id() function blocks until the passed task id completes.
|
|
|
|
* This does not guarantee that all lower task ids have completed.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
taskq_wait_id(taskq_t *tq, taskqid_t id)
|
|
|
|
{
|
|
|
|
wait_event(tq->tq_wait_waitq, taskq_wait_id_check(tq, id));
|
2008-04-25 22:10:47 +00:00
|
|
|
}
|
2012-12-06 21:04:27 +00:00
|
|
|
EXPORT_SYMBOL(taskq_wait_id);
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2012-12-06 20:38:19 +00:00
|
|
|
static int
|
2015-05-14 19:26:51 +00:00
|
|
|
taskq_wait_outstanding_check(taskq_t *tq, taskqid_t id)
|
2012-12-06 20:38:19 +00:00
|
|
|
{
|
|
|
|
int rc;
|
2015-12-03 23:06:03 +00:00
|
|
|
unsigned long flags;
|
2012-12-06 20:38:19 +00:00
|
|
|
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
|
2012-12-06 20:38:19 +00:00
|
|
|
rc = (id < tq->tq_lowest_id);
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
2012-12-06 20:38:19 +00:00
|
|
|
|
2014-11-05 22:30:35 +00:00
|
|
|
return (rc);
|
2012-12-06 20:38:19 +00:00
|
|
|
}
|
|
|
|
|
2015-05-14 19:26:51 +00:00
|
|
|
/*
|
|
|
|
* The taskq_wait_outstanding() function will block until all tasks with a
|
|
|
|
* lower taskqid than the passed 'id' have been completed. Note that all
|
|
|
|
* task id's are assigned monotonically at dispatch time. Zero may be
|
|
|
|
* passed for the id to indicate all tasks dispatch up to this point,
|
|
|
|
* but not after, should be waited for.
|
|
|
|
*/
|
2012-12-06 20:38:19 +00:00
|
|
|
void
|
2015-05-14 19:26:51 +00:00
|
|
|
taskq_wait_outstanding(taskq_t *tq, taskqid_t id)
|
2012-12-06 20:38:19 +00:00
|
|
|
{
|
2016-05-23 21:12:22 +00:00
|
|
|
id = id ? id : tq->tq_next_id - 1;
|
|
|
|
wait_event(tq->tq_wait_waitq, taskq_wait_outstanding_check(tq, id));
|
2012-12-06 20:38:19 +00:00
|
|
|
}
|
2015-05-14 19:26:51 +00:00
|
|
|
EXPORT_SYMBOL(taskq_wait_outstanding);
|
2012-12-06 20:38:19 +00:00
|
|
|
|
2015-05-14 19:26:51 +00:00
|
|
|
static int
|
|
|
|
taskq_wait_check(taskq_t *tq)
|
2008-04-25 22:10:47 +00:00
|
|
|
{
|
2015-05-14 19:26:51 +00:00
|
|
|
int rc;
|
2015-12-03 23:06:03 +00:00
|
|
|
unsigned long flags;
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
|
2015-05-14 19:26:51 +00:00
|
|
|
rc = (tq->tq_lowest_id == tq->tq_next_id);
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2015-05-14 19:26:51 +00:00
|
|
|
return (rc);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The taskq_wait() function will block until the taskq is empty.
|
|
|
|
* This means that if a taskq re-dispatches work to itself taskq_wait()
|
|
|
|
* callers will block indefinitely.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
taskq_wait(taskq_t *tq)
|
|
|
|
{
|
|
|
|
wait_event(tq->tq_wait_waitq, taskq_wait_check(tq));
|
2008-04-25 22:10:47 +00:00
|
|
|
}
|
2012-12-06 21:04:27 +00:00
|
|
|
EXPORT_SYMBOL(taskq_wait);
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2015-12-09 01:04:31 +00:00
|
|
|
int
|
2015-12-02 22:52:46 +00:00
|
|
|
taskq_member(taskq_t *tq, kthread_t *t)
|
2015-12-09 01:04:31 +00:00
|
|
|
{
|
2015-12-02 22:52:46 +00:00
|
|
|
return (tq == (taskq_t *)tsd_get_by_thread(taskq_tsd, t));
|
2015-12-09 01:04:31 +00:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(taskq_member);
|
|
|
|
|
2020-03-03 18:29:38 +00:00
|
|
|
taskq_t *
|
|
|
|
taskq_of_curthread(void)
|
|
|
|
{
|
|
|
|
return (tsd_get(taskq_tsd));
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(taskq_of_curthread);
|
|
|
|
|
2012-12-06 20:38:19 +00:00
|
|
|
/*
|
|
|
|
* Cancel an already dispatched task given the task id. Still pending tasks
|
|
|
|
* will be immediately canceled, and if the task is active the function will
|
|
|
|
* block until it completes. Preallocated tasks which are canceled must be
|
|
|
|
* freed by the caller.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
taskq_cancel_id(taskq_t *tq, taskqid_t id)
|
|
|
|
{
|
|
|
|
taskq_ent_t *t;
|
|
|
|
int rc = ENOENT;
|
2015-12-03 23:06:03 +00:00
|
|
|
unsigned long flags;
|
2012-12-06 20:38:19 +00:00
|
|
|
|
|
|
|
ASSERT(tq);
|
|
|
|
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
|
2017-08-04 16:57:58 +00:00
|
|
|
t = taskq_find(tq, id);
|
|
|
|
if (t && t != ERR_PTR(-EBUSY)) {
|
2012-12-06 20:38:19 +00:00
|
|
|
list_del_init(&t->tqent_list);
|
|
|
|
t->tqent_flags |= TQENT_FLAG_CANCEL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When canceling the lowest outstanding task id we
|
|
|
|
* must recalculate the new lowest outstanding id.
|
|
|
|
*/
|
|
|
|
if (tq->tq_lowest_id == t->tqent_id) {
|
|
|
|
tq->tq_lowest_id = taskq_lowest_id(tq);
|
|
|
|
ASSERT3S(tq->tq_lowest_id, >, t->tqent_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The task_expire() function takes the tq->tq_lock so drop
|
|
|
|
* drop the lock before synchronously cancelling the timer.
|
|
|
|
*/
|
|
|
|
if (timer_pending(&t->tqent_timer)) {
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
2012-12-06 20:38:19 +00:00
|
|
|
del_timer_sync(&t->tqent_timer);
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, flags,
|
|
|
|
tq->tq_lock_class);
|
2012-12-06 20:38:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!(t->tqent_flags & TQENT_FLAG_PREALLOC))
|
|
|
|
task_done(tq, t);
|
|
|
|
|
|
|
|
rc = 0;
|
|
|
|
}
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
2012-12-06 20:38:19 +00:00
|
|
|
|
2017-08-04 16:57:58 +00:00
|
|
|
if (t == ERR_PTR(-EBUSY)) {
|
2012-12-06 20:38:19 +00:00
|
|
|
taskq_wait_id(tq, id);
|
|
|
|
rc = EBUSY;
|
|
|
|
}
|
|
|
|
|
2014-11-05 22:30:35 +00:00
|
|
|
return (rc);
|
2012-12-06 20:38:19 +00:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(taskq_cancel_id);
|
|
|
|
|
2015-11-06 23:00:55 +00:00
|
|
|
static int taskq_thread_spawn(taskq_t *tq);
|
2015-08-27 16:13:20 +00:00
|
|
|
|
2008-04-25 22:10:47 +00:00
|
|
|
taskqid_t
|
2012-12-06 21:04:27 +00:00
|
|
|
taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
|
2008-04-25 22:10:47 +00:00
|
|
|
{
|
2012-12-06 20:57:42 +00:00
|
|
|
taskq_ent_t *t;
|
2016-10-28 21:23:30 +00:00
|
|
|
taskqid_t rc = TASKQID_INVALID;
|
2015-12-03 23:06:03 +00:00
|
|
|
unsigned long irqflags;
|
2008-02-26 20:36:04 +00:00
|
|
|
|
2012-12-06 20:57:42 +00:00
|
|
|
ASSERT(tq);
|
|
|
|
ASSERT(func);
|
2010-04-23 21:39:47 +00:00
|
|
|
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, irqflags, tq->tq_lock_class);
|
2008-02-26 20:36:04 +00:00
|
|
|
|
2008-04-25 22:10:47 +00:00
|
|
|
/* Taskq being destroyed and all tasks drained */
|
2015-06-08 21:36:27 +00:00
|
|
|
if (!(tq->tq_flags & TASKQ_ACTIVE))
|
2014-11-05 22:30:35 +00:00
|
|
|
goto out;
|
2008-02-26 20:36:04 +00:00
|
|
|
|
2008-04-25 22:10:47 +00:00
|
|
|
/* Do not queue the task unless there is idle thread for it */
|
|
|
|
ASSERT(tq->tq_nactive <= tq->tq_nthreads);
|
2016-02-08 19:20:05 +00:00
|
|
|
if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) {
|
|
|
|
/* Dynamic taskq may be able to spawn another thread */
|
2018-02-07 19:49:38 +00:00
|
|
|
if (!(tq->tq_flags & TASKQ_DYNAMIC) ||
|
|
|
|
taskq_thread_spawn(tq) == 0)
|
2016-02-08 19:20:05 +00:00
|
|
|
goto out;
|
|
|
|
}
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2015-12-03 23:06:03 +00:00
|
|
|
if ((t = task_alloc(tq, flags, &irqflags)) == NULL)
|
2014-11-05 22:30:35 +00:00
|
|
|
goto out;
|
2008-02-26 20:36:04 +00:00
|
|
|
|
2011-11-11 23:06:35 +00:00
|
|
|
spin_lock(&t->tqent_lock);
|
2010-07-01 17:07:51 +00:00
|
|
|
|
2016-02-08 19:20:05 +00:00
|
|
|
/* Queue to the front of the list to enforce TQ_NOQUEUE semantics */
|
|
|
|
if (flags & TQ_NOQUEUE)
|
|
|
|
list_add(&t->tqent_list, &tq->tq_prio_list);
|
2010-07-01 17:07:51 +00:00
|
|
|
/* Queue to the priority list instead of the pending list */
|
2016-02-08 19:20:05 +00:00
|
|
|
else if (flags & TQ_FRONT)
|
2011-11-11 23:06:35 +00:00
|
|
|
list_add_tail(&t->tqent_list, &tq->tq_prio_list);
|
2010-07-01 17:07:51 +00:00
|
|
|
else
|
2011-11-11 23:06:35 +00:00
|
|
|
list_add_tail(&t->tqent_list, &tq->tq_pend_list);
|
2010-07-01 17:07:51 +00:00
|
|
|
|
2011-11-11 23:06:35 +00:00
|
|
|
t->tqent_id = rc = tq->tq_next_id;
|
2008-04-25 22:10:47 +00:00
|
|
|
tq->tq_next_id++;
|
2012-12-06 20:57:42 +00:00
|
|
|
t->tqent_func = func;
|
|
|
|
t->tqent_arg = arg;
|
2012-12-06 20:38:19 +00:00
|
|
|
t->tqent_taskq = tq;
|
|
|
|
t->tqent_timer.function = NULL;
|
|
|
|
t->tqent_timer.expires = 0;
|
Add tracepoints for taskq entry lifetime events
This adds some new DTRACE_PROBE* endpoints so that we can observe taskq
latencies on a system. Additionally, a new "taskqlatency.bt" script is
added to do this observation via "bpftrace". Lastly, a "zfs-trace.sh"
script is added to wrap "bpftrace" with the proper options required to
run and use "taskqlatency.bt".
For example, with these changes in place, a user can run the following:
$ cd ./contrib/bpftrace
$ sudo ./zfs-trace.sh taskqlatency.bt
Attaching 6 probes...
^C
Here's some example output, showing latency information for time spent
executing the taskq entry's function:
@exec_lat_us[dp_sync_taskq, userquota_updates_task]:
[2, 4) 5 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[4, 8) 0 | |
[8, 16) 1 |@@@@@@@@@@ |
[16, 32) 2 |@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[z_wr_int_h, zio_execute]:
[8, 16) 16 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@ |
@exec_lat_us[z_wr_iss_h, zio_execute]:
[16, 32) 4 |@@@@@@@@@@@@@@@@ |
[32, 64) 13 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 1 |@@@@ |
@exec_lat_us[z_ioctl_int, zio_execute]:
[2, 4) 1 |@@@@ |
[4, 8) 11 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[dp_sync_taskq, sync_dnodes_task]:
[2, 4) 1 |@@@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@@@@@@@@ |
[32, 64) 4 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 1 |@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@@@
Here's some example output, showing latency information for time spent
waiting on the taskq, prior to starting execution of entry's function:
@queue_lat_us[dp_sync_taskq]:
[2, 4) 1 |@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 2 |@@@@@@@@ |
[16, 32) 3 |@@@@@@@@@@@@@ |
[32, 64) 12 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 6 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@ |
@queue_lat_us[z_wr_iss]:
[4, 8) 4 |@@@@ |
[8, 16) 13 |@@@@@@@@@@@@@@@ |
[16, 32) 6 |@@@@@@@ |
[32, 64) 2 |@@ |
[64, 128) 12 |@@@@@@@@@@@@@@ |
[128, 256) 15 |@@@@@@@@@@@@@@@@@@ |
[256, 512) 33 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[512, 1K) 27 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[1K, 2K) 7 |@@@@@@@@ |
[2K, 4K) 14 |@@@@@@@@@@@@@@@@ |
[4K, 8K) 14 |@@@@@@@@@@@@@@@@ |
[8K, 16K) 23 |@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[16K, 32K) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
@queue_lat_us[z_wr_int]:
[2, 4) 10 |@@@@@ |
[4, 8) 71 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 88 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 50 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[32, 64) 65 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 19 |@@@@@@@@@@@ |
[256, 512) 3 |@ |
[512, 1K) 1 | |
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Prakash Surya <prakash.surya@delphix.com>
Closes #9525
2019-10-24 21:13:41 +00:00
|
|
|
|
2016-01-28 00:55:14 +00:00
|
|
|
t->tqent_birth = jiffies;
|
Add tracepoints for taskq entry lifetime events
This adds some new DTRACE_PROBE* endpoints so that we can observe taskq
latencies on a system. Additionally, a new "taskqlatency.bt" script is
added to do this observation via "bpftrace". Lastly, a "zfs-trace.sh"
script is added to wrap "bpftrace" with the proper options required to
run and use "taskqlatency.bt".
For example, with these changes in place, a user can run the following:
$ cd ./contrib/bpftrace
$ sudo ./zfs-trace.sh taskqlatency.bt
Attaching 6 probes...
^C
Here's some example output, showing latency information for time spent
executing the taskq entry's function:
@exec_lat_us[dp_sync_taskq, userquota_updates_task]:
[2, 4) 5 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[4, 8) 0 | |
[8, 16) 1 |@@@@@@@@@@ |
[16, 32) 2 |@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[z_wr_int_h, zio_execute]:
[8, 16) 16 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@ |
@exec_lat_us[z_wr_iss_h, zio_execute]:
[16, 32) 4 |@@@@@@@@@@@@@@@@ |
[32, 64) 13 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 1 |@@@@ |
@exec_lat_us[z_ioctl_int, zio_execute]:
[2, 4) 1 |@@@@ |
[4, 8) 11 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[dp_sync_taskq, sync_dnodes_task]:
[2, 4) 1 |@@@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@@@@@@@@ |
[32, 64) 4 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 1 |@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@@@
Here's some example output, showing latency information for time spent
waiting on the taskq, prior to starting execution of entry's function:
@queue_lat_us[dp_sync_taskq]:
[2, 4) 1 |@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 2 |@@@@@@@@ |
[16, 32) 3 |@@@@@@@@@@@@@ |
[32, 64) 12 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 6 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@ |
@queue_lat_us[z_wr_iss]:
[4, 8) 4 |@@@@ |
[8, 16) 13 |@@@@@@@@@@@@@@@ |
[16, 32) 6 |@@@@@@@ |
[32, 64) 2 |@@ |
[64, 128) 12 |@@@@@@@@@@@@@@ |
[128, 256) 15 |@@@@@@@@@@@@@@@@@@ |
[256, 512) 33 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[512, 1K) 27 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[1K, 2K) 7 |@@@@@@@@ |
[2K, 4K) 14 |@@@@@@@@@@@@@@@@ |
[4K, 8K) 14 |@@@@@@@@@@@@@@@@ |
[8K, 16K) 23 |@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[16K, 32K) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
@queue_lat_us[z_wr_int]:
[2, 4) 10 |@@@@@ |
[4, 8) 71 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 88 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 50 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[32, 64) 65 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 19 |@@@@@@@@@@@ |
[256, 512) 3 |@ |
[512, 1K) 1 | |
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Prakash Surya <prakash.surya@delphix.com>
Closes #9525
2019-10-24 21:13:41 +00:00
|
|
|
DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t);
|
2011-12-06 18:04:51 +00:00
|
|
|
|
|
|
|
ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
|
|
|
|
|
2011-11-11 23:06:35 +00:00
|
|
|
spin_unlock(&t->tqent_lock);
|
2012-01-19 19:36:27 +00:00
|
|
|
|
|
|
|
wake_up(&tq->tq_work_waitq);
|
2008-04-25 22:10:47 +00:00
|
|
|
out:
|
2015-08-27 16:13:20 +00:00
|
|
|
/* Spawn additional taskq threads if required. */
|
2016-02-08 19:20:05 +00:00
|
|
|
if (!(flags & TQ_NOQUEUE) && tq->tq_nactive == tq->tq_nthreads)
|
2015-11-06 23:00:55 +00:00
|
|
|
(void) taskq_thread_spawn(tq);
|
2015-08-27 16:13:20 +00:00
|
|
|
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, irqflags);
|
2014-11-05 22:30:35 +00:00
|
|
|
return (rc);
|
2008-02-26 20:36:04 +00:00
|
|
|
}
|
2012-12-06 21:04:27 +00:00
|
|
|
EXPORT_SYMBOL(taskq_dispatch);
|
2011-12-06 18:04:51 +00:00
|
|
|
|
2012-12-06 20:38:19 +00:00
|
|
|
taskqid_t
|
|
|
|
taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
|
|
|
|
uint_t flags, clock_t expire_time)
|
|
|
|
{
|
2016-10-28 21:23:30 +00:00
|
|
|
taskqid_t rc = TASKQID_INVALID;
|
2014-11-05 22:30:35 +00:00
|
|
|
taskq_ent_t *t;
|
2015-12-03 23:06:03 +00:00
|
|
|
unsigned long irqflags;
|
2012-12-06 20:38:19 +00:00
|
|
|
|
|
|
|
ASSERT(tq);
|
|
|
|
ASSERT(func);
|
|
|
|
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, irqflags, tq->tq_lock_class);
|
2012-12-06 20:38:19 +00:00
|
|
|
|
|
|
|
/* Taskq being destroyed and all tasks drained */
|
2015-06-08 21:36:27 +00:00
|
|
|
if (!(tq->tq_flags & TASKQ_ACTIVE))
|
2014-11-05 22:30:35 +00:00
|
|
|
goto out;
|
2012-12-06 20:38:19 +00:00
|
|
|
|
2015-12-03 23:06:03 +00:00
|
|
|
if ((t = task_alloc(tq, flags, &irqflags)) == NULL)
|
2014-11-05 22:30:35 +00:00
|
|
|
goto out;
|
2012-12-06 20:38:19 +00:00
|
|
|
|
|
|
|
spin_lock(&t->tqent_lock);
|
|
|
|
|
|
|
|
/* Queue to the delay list for subsequent execution */
|
|
|
|
list_add_tail(&t->tqent_list, &tq->tq_delay_list);
|
|
|
|
|
|
|
|
t->tqent_id = rc = tq->tq_next_id;
|
|
|
|
tq->tq_next_id++;
|
|
|
|
t->tqent_func = func;
|
|
|
|
t->tqent_arg = arg;
|
|
|
|
t->tqent_taskq = tq;
|
|
|
|
t->tqent_timer.function = task_expire;
|
|
|
|
t->tqent_timer.expires = (unsigned long)expire_time;
|
|
|
|
add_timer(&t->tqent_timer);
|
|
|
|
|
|
|
|
ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
|
|
|
|
|
|
|
|
spin_unlock(&t->tqent_lock);
|
|
|
|
out:
|
2015-08-27 16:13:20 +00:00
|
|
|
/* Spawn additional taskq threads if required. */
|
2015-11-06 23:00:55 +00:00
|
|
|
if (tq->tq_nactive == tq->tq_nthreads)
|
|
|
|
(void) taskq_thread_spawn(tq);
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, irqflags);
|
2014-11-05 22:30:35 +00:00
|
|
|
return (rc);
|
2012-12-06 20:38:19 +00:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(taskq_dispatch_delay);
|
|
|
|
|
2011-12-06 18:04:51 +00:00
|
|
|
void
|
2012-12-06 21:04:27 +00:00
|
|
|
taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
|
2015-12-12 00:15:50 +00:00
|
|
|
taskq_ent_t *t)
|
2011-12-06 18:04:51 +00:00
|
|
|
{
|
2015-12-03 23:06:03 +00:00
|
|
|
unsigned long irqflags;
|
2011-12-06 18:04:51 +00:00
|
|
|
ASSERT(tq);
|
|
|
|
ASSERT(func);
|
|
|
|
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, irqflags,
|
2015-10-13 23:56:51 +00:00
|
|
|
tq->tq_lock_class);
|
2011-12-06 18:04:51 +00:00
|
|
|
|
|
|
|
/* Taskq being destroyed and all tasks drained */
|
2015-06-08 21:36:27 +00:00
|
|
|
if (!(tq->tq_flags & TASKQ_ACTIVE)) {
|
2016-10-28 21:23:30 +00:00
|
|
|
t->tqent_id = TASKQID_INVALID;
|
2011-12-06 18:04:51 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2016-02-08 19:20:05 +00:00
|
|
|
if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) {
|
|
|
|
/* Dynamic taskq may be able to spawn another thread */
|
2018-02-07 19:49:38 +00:00
|
|
|
if (!(tq->tq_flags & TASKQ_DYNAMIC) ||
|
|
|
|
taskq_thread_spawn(tq) == 0)
|
2016-02-08 19:20:05 +00:00
|
|
|
goto out2;
|
|
|
|
flags |= TQ_FRONT;
|
|
|
|
}
|
|
|
|
|
2011-12-06 18:04:51 +00:00
|
|
|
spin_lock(&t->tqent_lock);
|
|
|
|
|
2017-08-08 15:31:52 +00:00
|
|
|
/*
|
|
|
|
* Make sure the entry is not on some other taskq; it is important to
|
|
|
|
* ASSERT() under lock
|
|
|
|
*/
|
|
|
|
ASSERT(taskq_empty_ent(t));
|
|
|
|
|
2011-12-06 18:04:51 +00:00
|
|
|
/*
|
|
|
|
* Mark it as a prealloc'd task. This is important
|
|
|
|
* to ensure that we don't free it later.
|
|
|
|
*/
|
|
|
|
t->tqent_flags |= TQENT_FLAG_PREALLOC;
|
|
|
|
|
|
|
|
/* Queue to the priority list instead of the pending list */
|
|
|
|
if (flags & TQ_FRONT)
|
|
|
|
list_add_tail(&t->tqent_list, &tq->tq_prio_list);
|
|
|
|
else
|
|
|
|
list_add_tail(&t->tqent_list, &tq->tq_pend_list);
|
|
|
|
|
|
|
|
t->tqent_id = tq->tq_next_id;
|
|
|
|
tq->tq_next_id++;
|
|
|
|
t->tqent_func = func;
|
|
|
|
t->tqent_arg = arg;
|
2012-12-06 20:38:19 +00:00
|
|
|
t->tqent_taskq = tq;
|
Add tracepoints for taskq entry lifetime events
This adds some new DTRACE_PROBE* endpoints so that we can observe taskq
latencies on a system. Additionally, a new "taskqlatency.bt" script is
added to do this observation via "bpftrace". Lastly, a "zfs-trace.sh"
script is added to wrap "bpftrace" with the proper options required to
run and use "taskqlatency.bt".
For example, with these changes in place, a user can run the following:
$ cd ./contrib/bpftrace
$ sudo ./zfs-trace.sh taskqlatency.bt
Attaching 6 probes...
^C
Here's some example output, showing latency information for time spent
executing the taskq entry's function:
@exec_lat_us[dp_sync_taskq, userquota_updates_task]:
[2, 4) 5 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[4, 8) 0 | |
[8, 16) 1 |@@@@@@@@@@ |
[16, 32) 2 |@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[z_wr_int_h, zio_execute]:
[8, 16) 16 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@ |
@exec_lat_us[z_wr_iss_h, zio_execute]:
[16, 32) 4 |@@@@@@@@@@@@@@@@ |
[32, 64) 13 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 1 |@@@@ |
@exec_lat_us[z_ioctl_int, zio_execute]:
[2, 4) 1 |@@@@ |
[4, 8) 11 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[dp_sync_taskq, sync_dnodes_task]:
[2, 4) 1 |@@@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@@@@@@@@ |
[32, 64) 4 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 1 |@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@@@
Here's some example output, showing latency information for time spent
waiting on the taskq, prior to starting execution of entry's function:
@queue_lat_us[dp_sync_taskq]:
[2, 4) 1 |@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 2 |@@@@@@@@ |
[16, 32) 3 |@@@@@@@@@@@@@ |
[32, 64) 12 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 6 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@ |
@queue_lat_us[z_wr_iss]:
[4, 8) 4 |@@@@ |
[8, 16) 13 |@@@@@@@@@@@@@@@ |
[16, 32) 6 |@@@@@@@ |
[32, 64) 2 |@@ |
[64, 128) 12 |@@@@@@@@@@@@@@ |
[128, 256) 15 |@@@@@@@@@@@@@@@@@@ |
[256, 512) 33 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[512, 1K) 27 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[1K, 2K) 7 |@@@@@@@@ |
[2K, 4K) 14 |@@@@@@@@@@@@@@@@ |
[4K, 8K) 14 |@@@@@@@@@@@@@@@@ |
[8K, 16K) 23 |@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[16K, 32K) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
@queue_lat_us[z_wr_int]:
[2, 4) 10 |@@@@@ |
[4, 8) 71 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 88 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 50 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[32, 64) 65 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 19 |@@@@@@@@@@@ |
[256, 512) 3 |@ |
[512, 1K) 1 | |
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Prakash Surya <prakash.surya@delphix.com>
Closes #9525
2019-10-24 21:13:41 +00:00
|
|
|
|
2016-01-28 00:55:14 +00:00
|
|
|
t->tqent_birth = jiffies;
|
Add tracepoints for taskq entry lifetime events
This adds some new DTRACE_PROBE* endpoints so that we can observe taskq
latencies on a system. Additionally, a new "taskqlatency.bt" script is
added to do this observation via "bpftrace". Lastly, a "zfs-trace.sh"
script is added to wrap "bpftrace" with the proper options required to
run and use "taskqlatency.bt".
For example, with these changes in place, a user can run the following:
$ cd ./contrib/bpftrace
$ sudo ./zfs-trace.sh taskqlatency.bt
Attaching 6 probes...
^C
Here's some example output, showing latency information for time spent
executing the taskq entry's function:
@exec_lat_us[dp_sync_taskq, userquota_updates_task]:
[2, 4) 5 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[4, 8) 0 | |
[8, 16) 1 |@@@@@@@@@@ |
[16, 32) 2 |@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[z_wr_int_h, zio_execute]:
[8, 16) 16 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@ |
@exec_lat_us[z_wr_iss_h, zio_execute]:
[16, 32) 4 |@@@@@@@@@@@@@@@@ |
[32, 64) 13 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 1 |@@@@ |
@exec_lat_us[z_ioctl_int, zio_execute]:
[2, 4) 1 |@@@@ |
[4, 8) 11 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[dp_sync_taskq, sync_dnodes_task]:
[2, 4) 1 |@@@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@@@@@@@@ |
[32, 64) 4 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 1 |@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@@@
Here's some example output, showing latency information for time spent
waiting on the taskq, prior to starting execution of entry's function:
@queue_lat_us[dp_sync_taskq]:
[2, 4) 1 |@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 2 |@@@@@@@@ |
[16, 32) 3 |@@@@@@@@@@@@@ |
[32, 64) 12 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 6 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@ |
@queue_lat_us[z_wr_iss]:
[4, 8) 4 |@@@@ |
[8, 16) 13 |@@@@@@@@@@@@@@@ |
[16, 32) 6 |@@@@@@@ |
[32, 64) 2 |@@ |
[64, 128) 12 |@@@@@@@@@@@@@@ |
[128, 256) 15 |@@@@@@@@@@@@@@@@@@ |
[256, 512) 33 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[512, 1K) 27 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[1K, 2K) 7 |@@@@@@@@ |
[2K, 4K) 14 |@@@@@@@@@@@@@@@@ |
[4K, 8K) 14 |@@@@@@@@@@@@@@@@ |
[8K, 16K) 23 |@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[16K, 32K) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
@queue_lat_us[z_wr_int]:
[2, 4) 10 |@@@@@ |
[4, 8) 71 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 88 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 50 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[32, 64) 65 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 19 |@@@@@@@@@@@ |
[256, 512) 3 |@ |
[512, 1K) 1 | |
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Prakash Surya <prakash.surya@delphix.com>
Closes #9525
2019-10-24 21:13:41 +00:00
|
|
|
DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t);
|
2011-12-06 18:04:51 +00:00
|
|
|
|
|
|
|
spin_unlock(&t->tqent_lock);
|
|
|
|
|
|
|
|
wake_up(&tq->tq_work_waitq);
|
|
|
|
out:
|
2015-08-27 16:13:20 +00:00
|
|
|
/* Spawn additional taskq threads if required. */
|
2015-11-06 23:00:55 +00:00
|
|
|
if (tq->tq_nactive == tq->tq_nthreads)
|
|
|
|
(void) taskq_thread_spawn(tq);
|
2016-02-08 19:20:05 +00:00
|
|
|
out2:
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, irqflags);
|
2011-12-06 18:04:51 +00:00
|
|
|
}
|
2012-12-06 21:04:27 +00:00
|
|
|
EXPORT_SYMBOL(taskq_dispatch_ent);
|
2011-12-06 18:04:51 +00:00
|
|
|
|
|
|
|
int
|
2012-12-06 21:04:27 +00:00
|
|
|
taskq_empty_ent(taskq_ent_t *t)
|
2011-12-06 18:04:51 +00:00
|
|
|
{
|
2015-12-12 00:15:50 +00:00
|
|
|
return (list_empty(&t->tqent_list));
|
2011-12-06 18:04:51 +00:00
|
|
|
}
|
2012-12-06 21:04:27 +00:00
|
|
|
EXPORT_SYMBOL(taskq_empty_ent);
|
2011-12-06 18:04:51 +00:00
|
|
|
|
|
|
|
void
|
2012-12-06 21:04:27 +00:00
|
|
|
taskq_init_ent(taskq_ent_t *t)
|
2011-12-06 18:04:51 +00:00
|
|
|
{
|
|
|
|
spin_lock_init(&t->tqent_lock);
|
2012-12-06 20:38:19 +00:00
|
|
|
init_waitqueue_head(&t->tqent_waitq);
|
2017-12-21 18:56:32 +00:00
|
|
|
timer_setup(&t->tqent_timer, NULL, 0);
|
2011-12-06 18:04:51 +00:00
|
|
|
INIT_LIST_HEAD(&t->tqent_list);
|
|
|
|
t->tqent_id = 0;
|
|
|
|
t->tqent_func = NULL;
|
|
|
|
t->tqent_arg = NULL;
|
|
|
|
t->tqent_flags = 0;
|
2012-12-06 20:38:19 +00:00
|
|
|
t->tqent_taskq = NULL;
|
2011-12-06 18:04:51 +00:00
|
|
|
}
|
2012-12-06 21:04:27 +00:00
|
|
|
EXPORT_SYMBOL(taskq_init_ent);
|
2011-12-06 18:04:51 +00:00
|
|
|
|
2015-06-08 21:36:27 +00:00
|
|
|
/*
|
|
|
|
* Return the next pending task, preference is given to tasks on the
|
|
|
|
* priority list which were dispatched with TQ_FRONT.
|
|
|
|
*/
|
|
|
|
static taskq_ent_t *
|
|
|
|
taskq_next_ent(taskq_t *tq)
|
|
|
|
{
|
|
|
|
struct list_head *list;
|
|
|
|
|
|
|
|
if (!list_empty(&tq->tq_prio_list))
|
|
|
|
list = &tq->tq_prio_list;
|
|
|
|
else if (!list_empty(&tq->tq_pend_list))
|
|
|
|
list = &tq->tq_pend_list;
|
|
|
|
else
|
|
|
|
return (NULL);
|
|
|
|
|
|
|
|
return (list_entry(list->next, taskq_ent_t, tqent_list));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Spawns a new thread for the specified taskq.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
taskq_thread_spawn_task(void *arg)
|
|
|
|
{
|
|
|
|
taskq_t *tq = (taskq_t *)arg;
|
2015-12-03 23:06:03 +00:00
|
|
|
unsigned long flags;
|
2015-06-08 21:36:27 +00:00
|
|
|
|
2016-05-21 01:04:03 +00:00
|
|
|
if (taskq_thread_create(tq) == NULL) {
|
|
|
|
/* restore spawning count if failed */
|
2018-02-07 19:49:38 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, flags,
|
|
|
|
tq->tq_lock_class);
|
2016-05-21 01:04:03 +00:00
|
|
|
tq->tq_nspawn--;
|
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
|
|
|
}
|
2015-06-08 21:36:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2015-10-13 23:56:51 +00:00
|
|
|
* Spawn addition threads for dynamic taskqs (TASKQ_DYNAMIC) the current
|
2015-06-08 21:36:27 +00:00
|
|
|
* number of threads is insufficient to handle the pending tasks. These
|
|
|
|
* new threads must be created by the dedicated dynamic_taskq to avoid
|
|
|
|
* deadlocks between thread creation and memory reclaim. The system_taskq
|
|
|
|
* which is also a dynamic taskq cannot be safely used for this.
|
|
|
|
*/
|
|
|
|
static int
|
2015-11-06 23:00:55 +00:00
|
|
|
taskq_thread_spawn(taskq_t *tq)
|
2015-06-08 21:36:27 +00:00
|
|
|
{
|
|
|
|
int spawning = 0;
|
|
|
|
|
|
|
|
if (!(tq->tq_flags & TASKQ_DYNAMIC))
|
|
|
|
return (0);
|
|
|
|
|
2015-11-06 23:00:55 +00:00
|
|
|
if ((tq->tq_nthreads + tq->tq_nspawn < tq->tq_maxthreads) &&
|
2015-06-08 21:36:27 +00:00
|
|
|
(tq->tq_flags & TASKQ_ACTIVE)) {
|
|
|
|
spawning = (++tq->tq_nspawn);
|
|
|
|
taskq_dispatch(dynamic_taskq, taskq_thread_spawn_task,
|
|
|
|
tq, TQ_NOSLEEP);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (spawning);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Threads in a dynamic taskq should only exit once it has been completely
|
|
|
|
* drained and no other threads are actively servicing tasks. This prevents
|
|
|
|
* threads from being created and destroyed more than is required.
|
|
|
|
*
|
|
|
|
* The first thread is the thread list is treated as the primary thread.
|
|
|
|
* There is nothing special about the primary thread but in order to avoid
|
|
|
|
* all the taskq pids from changing we opt to make it long running.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
taskq_thread_should_stop(taskq_t *tq, taskq_thread_t *tqt)
|
|
|
|
{
|
|
|
|
if (!(tq->tq_flags & TASKQ_DYNAMIC))
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
if (list_first_entry(&(tq->tq_thread_list), taskq_thread_t,
|
|
|
|
tqt_thread_list) == tqt)
|
|
|
|
return (0);
|
|
|
|
|
2023-06-26 20:57:12 +00:00
|
|
|
int no_work =
|
2015-06-08 21:36:27 +00:00
|
|
|
((tq->tq_nspawn == 0) && /* No threads are being spawned */
|
|
|
|
(tq->tq_nactive == 0) && /* No threads are handling tasks */
|
|
|
|
(tq->tq_nthreads > 1) && /* More than 1 thread is running */
|
|
|
|
(!taskq_next_ent(tq)) && /* There are no pending tasks */
|
2015-12-12 00:15:50 +00:00
|
|
|
(spl_taskq_thread_dynamic)); /* Dynamic taskqs are allowed */
|
2023-06-26 20:57:12 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If we would have said stop before, let's instead wait a bit, maybe
|
|
|
|
* we'll see more work come our way soon...
|
|
|
|
*/
|
|
|
|
if (no_work) {
|
|
|
|
/* if it's 0, we want the old behavior. */
|
|
|
|
/* if the taskq is being torn down, we also want to go away. */
|
|
|
|
if (spl_taskq_thread_timeout_ms == 0 ||
|
|
|
|
!(tq->tq_flags & TASKQ_ACTIVE))
|
|
|
|
return (1);
|
|
|
|
unsigned long lasttime = tq->lastshouldstop;
|
|
|
|
if (lasttime > 0) {
|
|
|
|
if (time_after(jiffies, lasttime +
|
|
|
|
msecs_to_jiffies(spl_taskq_thread_timeout_ms)))
|
|
|
|
return (1);
|
|
|
|
else
|
|
|
|
return (0);
|
|
|
|
} else {
|
|
|
|
tq->lastshouldstop = jiffies;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
tq->lastshouldstop = 0;
|
|
|
|
}
|
|
|
|
return (0);
|
2015-06-08 21:36:27 +00:00
|
|
|
}
|
|
|
|
|
2008-04-25 22:10:47 +00:00
|
|
|
static int
|
|
|
|
taskq_thread(void *args)
|
|
|
|
{
|
2012-12-06 20:57:42 +00:00
|
|
|
DECLARE_WAITQUEUE(wait, current);
|
|
|
|
sigset_t blocked;
|
2011-12-06 01:32:48 +00:00
|
|
|
taskq_thread_t *tqt = args;
|
2012-12-06 20:57:42 +00:00
|
|
|
taskq_t *tq;
|
|
|
|
taskq_ent_t *t;
|
2015-06-08 21:36:27 +00:00
|
|
|
int seq_tasks = 0;
|
2015-12-03 23:06:03 +00:00
|
|
|
unsigned long flags;
|
2017-08-04 16:57:58 +00:00
|
|
|
taskq_ent_t dup_task = {};
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2012-12-06 20:57:42 +00:00
|
|
|
ASSERT(tqt);
|
2015-10-13 23:56:51 +00:00
|
|
|
ASSERT(tqt->tqt_tq);
|
2011-12-06 01:32:48 +00:00
|
|
|
tq = tqt->tqt_tq;
|
2012-12-06 20:57:42 +00:00
|
|
|
current->flags |= PF_NOFREEZE;
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2015-12-18 02:31:58 +00:00
|
|
|
(void) spl_fstrans_mark();
|
2015-09-07 16:35:21 +00:00
|
|
|
|
2012-12-06 20:57:42 +00:00
|
|
|
sigfillset(&blocked);
|
|
|
|
sigprocmask(SIG_BLOCK, &blocked, NULL);
|
|
|
|
flush_signals(current);
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2015-12-02 22:52:46 +00:00
|
|
|
tsd_set(taskq_tsd, tq);
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
|
2016-05-21 01:04:03 +00:00
|
|
|
/*
|
|
|
|
* If we are dynamically spawned, decrease spawning count. Note that
|
|
|
|
* we could be created during taskq_create, in which case we shouldn't
|
|
|
|
* do the decrement. But it's fine because taskq_create will reset
|
|
|
|
* tq_nspawn later.
|
|
|
|
*/
|
|
|
|
if (tq->tq_flags & TASKQ_DYNAMIC)
|
|
|
|
tq->tq_nspawn--;
|
2015-06-08 21:36:27 +00:00
|
|
|
|
|
|
|
/* Immediately exit if more threads than allowed were created. */
|
|
|
|
if (tq->tq_nthreads >= tq->tq_maxthreads)
|
|
|
|
goto error;
|
|
|
|
|
2012-12-06 20:57:42 +00:00
|
|
|
tq->tq_nthreads++;
|
2015-06-08 21:36:27 +00:00
|
|
|
list_add_tail(&tqt->tqt_thread_list, &tq->tq_thread_list);
|
2012-12-06 20:57:42 +00:00
|
|
|
wake_up(&tq->tq_wait_waitq);
|
|
|
|
set_current_state(TASK_INTERRUPTIBLE);
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2012-12-06 20:57:42 +00:00
|
|
|
while (!kthread_should_stop()) {
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2010-07-01 17:07:51 +00:00
|
|
|
if (list_empty(&tq->tq_pend_list) &&
|
|
|
|
list_empty(&tq->tq_prio_list)) {
|
2015-06-08 21:36:27 +00:00
|
|
|
|
|
|
|
if (taskq_thread_should_stop(tq, tqt)) {
|
|
|
|
wake_up_all(&tq->tq_wait_waitq);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2012-01-19 18:33:19 +00:00
|
|
|
add_wait_queue_exclusive(&tq->tq_work_waitq, &wait);
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
2015-06-08 21:36:27 +00:00
|
|
|
|
2008-04-25 22:10:47 +00:00
|
|
|
schedule();
|
2015-06-08 21:36:27 +00:00
|
|
|
seq_tasks = 0;
|
|
|
|
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, flags,
|
|
|
|
tq->tq_lock_class);
|
2012-01-19 18:33:19 +00:00
|
|
|
remove_wait_queue(&tq->tq_work_waitq, &wait);
|
2008-04-25 22:10:47 +00:00
|
|
|
} else {
|
|
|
|
__set_current_state(TASK_RUNNING);
|
|
|
|
}
|
|
|
|
|
2015-06-08 21:36:27 +00:00
|
|
|
if ((t = taskq_next_ent(tq)) != NULL) {
|
2012-12-06 20:57:42 +00:00
|
|
|
list_del_init(&t->tqent_list);
|
Store copy of tqent_flags prior to servicing task
A preallocated taskq_ent_t's tqent_flags must be checked prior to
servicing the taskq_ent_t. Once a preallocated taskq entry is serviced,
the ownership of the entry is handed back to the caller of
taskq_dispatch, thus the entry's contents can potentially be mangled.
In particular, this is a problem in the case where a preallocated taskq
entry is serviced, and the caller clears it's tqent_flags field. Thus,
when the function returns and task_done is called, it looks as though
the entry is **not** a preallocated task (when in fact it **is** a
preallocated task).
In this situation, task_done will place the preallocated taskq_ent_t
structure onto the taskq_t's free list. This is a **huge** mistake. If
the taskq_ent_t is then freed by the caller of taskq_dispatch, the
taskq_t's free list will hold a pointer to garbage data. Even worse, if
nothing has over written the freed memory before the pointer is
dereferenced, it may still look as though it points to a valid list_head
belonging to a taskq_ent_t structure.
Thus, the task entry's flags are now copied prior to servicing the task.
This copy is then checked to see if it is a preallocated task, and
determine if the entry needs to be passed down to the task_done
function.
Signed-off-by: Prakash Surya <surya1@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #71
2011-12-16 22:57:31 +00:00
|
|
|
|
2015-12-12 00:15:50 +00:00
|
|
|
/*
|
2017-08-04 16:57:58 +00:00
|
|
|
* A TQENT_FLAG_PREALLOC task may be reused or freed
|
|
|
|
* during the task function call. Store tqent_id and
|
|
|
|
* tqent_flags here.
|
|
|
|
*
|
|
|
|
* Also use an on stack taskq_ent_t for tqt_task
|
Add tracepoints for taskq entry lifetime events
This adds some new DTRACE_PROBE* endpoints so that we can observe taskq
latencies on a system. Additionally, a new "taskqlatency.bt" script is
added to do this observation via "bpftrace". Lastly, a "zfs-trace.sh"
script is added to wrap "bpftrace" with the proper options required to
run and use "taskqlatency.bt".
For example, with these changes in place, a user can run the following:
$ cd ./contrib/bpftrace
$ sudo ./zfs-trace.sh taskqlatency.bt
Attaching 6 probes...
^C
Here's some example output, showing latency information for time spent
executing the taskq entry's function:
@exec_lat_us[dp_sync_taskq, userquota_updates_task]:
[2, 4) 5 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[4, 8) 0 | |
[8, 16) 1 |@@@@@@@@@@ |
[16, 32) 2 |@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[z_wr_int_h, zio_execute]:
[8, 16) 16 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@ |
@exec_lat_us[z_wr_iss_h, zio_execute]:
[16, 32) 4 |@@@@@@@@@@@@@@@@ |
[32, 64) 13 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 1 |@@@@ |
@exec_lat_us[z_ioctl_int, zio_execute]:
[2, 4) 1 |@@@@ |
[4, 8) 11 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[dp_sync_taskq, sync_dnodes_task]:
[2, 4) 1 |@@@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@@@@@@@@ |
[32, 64) 4 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 1 |@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@@@
Here's some example output, showing latency information for time spent
waiting on the taskq, prior to starting execution of entry's function:
@queue_lat_us[dp_sync_taskq]:
[2, 4) 1 |@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 2 |@@@@@@@@ |
[16, 32) 3 |@@@@@@@@@@@@@ |
[32, 64) 12 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 6 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@ |
@queue_lat_us[z_wr_iss]:
[4, 8) 4 |@@@@ |
[8, 16) 13 |@@@@@@@@@@@@@@@ |
[16, 32) 6 |@@@@@@@ |
[32, 64) 2 |@@ |
[64, 128) 12 |@@@@@@@@@@@@@@ |
[128, 256) 15 |@@@@@@@@@@@@@@@@@@ |
[256, 512) 33 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[512, 1K) 27 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[1K, 2K) 7 |@@@@@@@@ |
[2K, 4K) 14 |@@@@@@@@@@@@@@@@ |
[4K, 8K) 14 |@@@@@@@@@@@@@@@@ |
[8K, 16K) 23 |@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[16K, 32K) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
@queue_lat_us[z_wr_int]:
[2, 4) 10 |@@@@@ |
[4, 8) 71 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 88 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 50 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[32, 64) 65 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 19 |@@@@@@@@@@@ |
[256, 512) 3 |@ |
[512, 1K) 1 | |
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Prakash Surya <prakash.surya@delphix.com>
Closes #9525
2019-10-24 21:13:41 +00:00
|
|
|
* assignment in this case; we want to make sure
|
|
|
|
* to duplicate all fields, so the values are
|
|
|
|
* correct when it's accessed via DTRACE_PROBE*.
|
2015-12-12 00:15:50 +00:00
|
|
|
*/
|
Swap taskq_ent_t with taskqid_t in taskq_thread_t
The taskq_t's active thread list is sorted based on its
tqt_ent->tqent_id field. The list is kept sorted solely by inserting
new taskq_thread_t's in their correct sorted location; no other
means is used. This means that once inserted, if a taskq_thread_t's
tqt_ent->tqent_id field changes, the list runs the risk of no
longer being sorted.
Prior to the introduction of the taskq_dispatch_prealloc() interface,
this was not a problem as a taskq_ent_t actively being serviced under
the old interface should always have a static tqent_id field. Thus,
once the taskq_thread_t is added to the taskq_t's active thread list,
the taskq_thread_t's tqt_ent->tqent_id field would remain constant.
Now, this is no longer the case. Currently, if using the
taskq_dispatch_prealloc() interface, any given taskq_ent_t actively
being serviced _may_ have its tqent_id value incremented. This happens
when the preallocated taskq_ent_t structure is recursively dispatched.
Thus, a taskq_thread_t could potentially have its tqt_ent->tqent_id
field silently modified from under its feet. If this were to happen
to a taskq_thread_t on a taskq_t's active thread list, this would
compromise the integrity of the order of the list (as the list
_may_ no longer be sorted).
To get around this, the taskq_thread_t's taskq_ent_t pointer was
replaced with its own static copy of the tqent_id. So, as a taskq_ent_t
is pulled off of the taskq_t's pending list, a static copy of its
tqent_id is made and this copy is used to sort the active thread
list. Using a static copy is key in ensuring the integrity of the
order of the active thread list. Even if the underlying taskq_ent_t
is recursively dispatched (as has its tqent_id modified), this
static copy stored inside the taskq_thread_t will remain constant.
Signed-off-by: Prakash Surya <surya1@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #71
2011-12-16 17:44:31 +00:00
|
|
|
tqt->tqt_id = t->tqent_id;
|
Store copy of tqent_flags prior to servicing task
A preallocated taskq_ent_t's tqent_flags must be checked prior to
servicing the taskq_ent_t. Once a preallocated taskq entry is serviced,
the ownership of the entry is handed back to the caller of
taskq_dispatch, thus the entry's contents can potentially be mangled.
In particular, this is a problem in the case where a preallocated taskq
entry is serviced, and the caller clears it's tqent_flags field. Thus,
when the function returns and task_done is called, it looks as though
the entry is **not** a preallocated task (when in fact it **is** a
preallocated task).
In this situation, task_done will place the preallocated taskq_ent_t
structure onto the taskq_t's free list. This is a **huge** mistake. If
the taskq_ent_t is then freed by the caller of taskq_dispatch, the
taskq_t's free list will hold a pointer to garbage data. Even worse, if
nothing has over written the freed memory before the pointer is
dereferenced, it may still look as though it points to a valid list_head
belonging to a taskq_ent_t structure.
Thus, the task entry's flags are now copied prior to servicing the task.
This copy is then checked to see if it is a preallocated task, and
determine if the entry needs to be passed down to the task_done
function.
Signed-off-by: Prakash Surya <surya1@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #71
2011-12-16 22:57:31 +00:00
|
|
|
tqt->tqt_flags = t->tqent_flags;
|
|
|
|
|
2017-08-04 16:57:58 +00:00
|
|
|
if (t->tqent_flags & TQENT_FLAG_PREALLOC) {
|
Add tracepoints for taskq entry lifetime events
This adds some new DTRACE_PROBE* endpoints so that we can observe taskq
latencies on a system. Additionally, a new "taskqlatency.bt" script is
added to do this observation via "bpftrace". Lastly, a "zfs-trace.sh"
script is added to wrap "bpftrace" with the proper options required to
run and use "taskqlatency.bt".
For example, with these changes in place, a user can run the following:
$ cd ./contrib/bpftrace
$ sudo ./zfs-trace.sh taskqlatency.bt
Attaching 6 probes...
^C
Here's some example output, showing latency information for time spent
executing the taskq entry's function:
@exec_lat_us[dp_sync_taskq, userquota_updates_task]:
[2, 4) 5 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[4, 8) 0 | |
[8, 16) 1 |@@@@@@@@@@ |
[16, 32) 2 |@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[z_wr_int_h, zio_execute]:
[8, 16) 16 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@ |
@exec_lat_us[z_wr_iss_h, zio_execute]:
[16, 32) 4 |@@@@@@@@@@@@@@@@ |
[32, 64) 13 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 1 |@@@@ |
@exec_lat_us[z_ioctl_int, zio_execute]:
[2, 4) 1 |@@@@ |
[4, 8) 11 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[dp_sync_taskq, sync_dnodes_task]:
[2, 4) 1 |@@@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@@@@@@@@ |
[32, 64) 4 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 1 |@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@@@
Here's some example output, showing latency information for time spent
waiting on the taskq, prior to starting execution of entry's function:
@queue_lat_us[dp_sync_taskq]:
[2, 4) 1 |@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 2 |@@@@@@@@ |
[16, 32) 3 |@@@@@@@@@@@@@ |
[32, 64) 12 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 6 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@ |
@queue_lat_us[z_wr_iss]:
[4, 8) 4 |@@@@ |
[8, 16) 13 |@@@@@@@@@@@@@@@ |
[16, 32) 6 |@@@@@@@ |
[32, 64) 2 |@@ |
[64, 128) 12 |@@@@@@@@@@@@@@ |
[128, 256) 15 |@@@@@@@@@@@@@@@@@@ |
[256, 512) 33 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[512, 1K) 27 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[1K, 2K) 7 |@@@@@@@@ |
[2K, 4K) 14 |@@@@@@@@@@@@@@@@ |
[4K, 8K) 14 |@@@@@@@@@@@@@@@@ |
[8K, 16K) 23 |@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[16K, 32K) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
@queue_lat_us[z_wr_int]:
[2, 4) 10 |@@@@@ |
[4, 8) 71 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 88 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 50 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[32, 64) 65 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 19 |@@@@@@@@@@@ |
[256, 512) 3 |@ |
[512, 1K) 1 | |
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Prakash Surya <prakash.surya@delphix.com>
Closes #9525
2019-10-24 21:13:41 +00:00
|
|
|
dup_task = *t;
|
2017-08-04 16:57:58 +00:00
|
|
|
t = &dup_task;
|
|
|
|
}
|
|
|
|
tqt->tqt_task = t;
|
|
|
|
|
2011-12-06 01:32:48 +00:00
|
|
|
taskq_insert_in_order(tq, tqt);
|
2012-12-06 20:57:42 +00:00
|
|
|
tq->tq_nactive++;
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
2008-04-25 22:10:47 +00:00
|
|
|
|
Add tracepoints for taskq entry lifetime events
This adds some new DTRACE_PROBE* endpoints so that we can observe taskq
latencies on a system. Additionally, a new "taskqlatency.bt" script is
added to do this observation via "bpftrace". Lastly, a "zfs-trace.sh"
script is added to wrap "bpftrace" with the proper options required to
run and use "taskqlatency.bt".
For example, with these changes in place, a user can run the following:
$ cd ./contrib/bpftrace
$ sudo ./zfs-trace.sh taskqlatency.bt
Attaching 6 probes...
^C
Here's some example output, showing latency information for time spent
executing the taskq entry's function:
@exec_lat_us[dp_sync_taskq, userquota_updates_task]:
[2, 4) 5 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[4, 8) 0 | |
[8, 16) 1 |@@@@@@@@@@ |
[16, 32) 2 |@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[z_wr_int_h, zio_execute]:
[8, 16) 16 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@ |
@exec_lat_us[z_wr_iss_h, zio_execute]:
[16, 32) 4 |@@@@@@@@@@@@@@@@ |
[32, 64) 13 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 1 |@@@@ |
@exec_lat_us[z_ioctl_int, zio_execute]:
[2, 4) 1 |@@@@ |
[4, 8) 11 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[dp_sync_taskq, sync_dnodes_task]:
[2, 4) 1 |@@@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@@@@@@@@ |
[32, 64) 4 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 1 |@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@@@
Here's some example output, showing latency information for time spent
waiting on the taskq, prior to starting execution of entry's function:
@queue_lat_us[dp_sync_taskq]:
[2, 4) 1 |@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 2 |@@@@@@@@ |
[16, 32) 3 |@@@@@@@@@@@@@ |
[32, 64) 12 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 6 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@ |
@queue_lat_us[z_wr_iss]:
[4, 8) 4 |@@@@ |
[8, 16) 13 |@@@@@@@@@@@@@@@ |
[16, 32) 6 |@@@@@@@ |
[32, 64) 2 |@@ |
[64, 128) 12 |@@@@@@@@@@@@@@ |
[128, 256) 15 |@@@@@@@@@@@@@@@@@@ |
[256, 512) 33 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[512, 1K) 27 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[1K, 2K) 7 |@@@@@@@@ |
[2K, 4K) 14 |@@@@@@@@@@@@@@@@ |
[4K, 8K) 14 |@@@@@@@@@@@@@@@@ |
[8K, 16K) 23 |@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[16K, 32K) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
@queue_lat_us[z_wr_int]:
[2, 4) 10 |@@@@@ |
[4, 8) 71 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 88 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 50 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[32, 64) 65 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 19 |@@@@@@@@@@@ |
[256, 512) 3 |@ |
[512, 1K) 1 | |
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Prakash Surya <prakash.surya@delphix.com>
Closes #9525
2019-10-24 21:13:41 +00:00
|
|
|
DTRACE_PROBE1(taskq_ent__start, taskq_ent_t *, t);
|
|
|
|
|
2008-04-25 22:10:47 +00:00
|
|
|
/* Perform the requested task */
|
2012-12-06 20:57:42 +00:00
|
|
|
t->tqent_func(t->tqent_arg);
|
2008-04-25 22:10:47 +00:00
|
|
|
|
Add tracepoints for taskq entry lifetime events
This adds some new DTRACE_PROBE* endpoints so that we can observe taskq
latencies on a system. Additionally, a new "taskqlatency.bt" script is
added to do this observation via "bpftrace". Lastly, a "zfs-trace.sh"
script is added to wrap "bpftrace" with the proper options required to
run and use "taskqlatency.bt".
For example, with these changes in place, a user can run the following:
$ cd ./contrib/bpftrace
$ sudo ./zfs-trace.sh taskqlatency.bt
Attaching 6 probes...
^C
Here's some example output, showing latency information for time spent
executing the taskq entry's function:
@exec_lat_us[dp_sync_taskq, userquota_updates_task]:
[2, 4) 5 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[4, 8) 0 | |
[8, 16) 1 |@@@@@@@@@@ |
[16, 32) 2 |@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[z_wr_int_h, zio_execute]:
[8, 16) 16 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@ |
@exec_lat_us[z_wr_iss_h, zio_execute]:
[16, 32) 4 |@@@@@@@@@@@@@@@@ |
[32, 64) 13 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 1 |@@@@ |
@exec_lat_us[z_ioctl_int, zio_execute]:
[2, 4) 1 |@@@@ |
[4, 8) 11 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[dp_sync_taskq, sync_dnodes_task]:
[2, 4) 1 |@@@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@@@@@@@@ |
[32, 64) 4 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 1 |@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@@@
Here's some example output, showing latency information for time spent
waiting on the taskq, prior to starting execution of entry's function:
@queue_lat_us[dp_sync_taskq]:
[2, 4) 1 |@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 2 |@@@@@@@@ |
[16, 32) 3 |@@@@@@@@@@@@@ |
[32, 64) 12 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 6 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@ |
@queue_lat_us[z_wr_iss]:
[4, 8) 4 |@@@@ |
[8, 16) 13 |@@@@@@@@@@@@@@@ |
[16, 32) 6 |@@@@@@@ |
[32, 64) 2 |@@ |
[64, 128) 12 |@@@@@@@@@@@@@@ |
[128, 256) 15 |@@@@@@@@@@@@@@@@@@ |
[256, 512) 33 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[512, 1K) 27 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[1K, 2K) 7 |@@@@@@@@ |
[2K, 4K) 14 |@@@@@@@@@@@@@@@@ |
[4K, 8K) 14 |@@@@@@@@@@@@@@@@ |
[8K, 16K) 23 |@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[16K, 32K) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
@queue_lat_us[z_wr_int]:
[2, 4) 10 |@@@@@ |
[4, 8) 71 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 88 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 50 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[32, 64) 65 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 19 |@@@@@@@@@@@ |
[256, 512) 3 |@ |
[512, 1K) 1 | |
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Prakash Surya <prakash.surya@delphix.com>
Closes #9525
2019-10-24 21:13:41 +00:00
|
|
|
DTRACE_PROBE1(taskq_ent__finish, taskq_ent_t *, t);
|
|
|
|
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, flags,
|
|
|
|
tq->tq_lock_class);
|
2012-12-06 20:57:42 +00:00
|
|
|
tq->tq_nactive--;
|
2011-12-06 01:32:48 +00:00
|
|
|
list_del_init(&tqt->tqt_active_list);
|
2012-12-06 20:38:19 +00:00
|
|
|
tqt->tqt_task = NULL;
|
Store copy of tqent_flags prior to servicing task
A preallocated taskq_ent_t's tqent_flags must be checked prior to
servicing the taskq_ent_t. Once a preallocated taskq entry is serviced,
the ownership of the entry is handed back to the caller of
taskq_dispatch, thus the entry's contents can potentially be mangled.
In particular, this is a problem in the case where a preallocated taskq
entry is serviced, and the caller clears it's tqent_flags field. Thus,
when the function returns and task_done is called, it looks as though
the entry is **not** a preallocated task (when in fact it **is** a
preallocated task).
In this situation, task_done will place the preallocated taskq_ent_t
structure onto the taskq_t's free list. This is a **huge** mistake. If
the taskq_ent_t is then freed by the caller of taskq_dispatch, the
taskq_t's free list will hold a pointer to garbage data. Even worse, if
nothing has over written the freed memory before the pointer is
dereferenced, it may still look as though it points to a valid list_head
belonging to a taskq_ent_t structure.
Thus, the task entry's flags are now copied prior to servicing the task.
This copy is then checked to see if it is a preallocated task, and
determine if the entry needs to be passed down to the task_done
function.
Signed-off-by: Prakash Surya <surya1@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #71
2011-12-16 22:57:31 +00:00
|
|
|
|
|
|
|
/* For prealloc'd tasks, we don't free anything. */
|
2015-06-08 21:36:27 +00:00
|
|
|
if (!(tqt->tqt_flags & TQENT_FLAG_PREALLOC))
|
Store copy of tqent_flags prior to servicing task
A preallocated taskq_ent_t's tqent_flags must be checked prior to
servicing the taskq_ent_t. Once a preallocated taskq entry is serviced,
the ownership of the entry is handed back to the caller of
taskq_dispatch, thus the entry's contents can potentially be mangled.
In particular, this is a problem in the case where a preallocated taskq
entry is serviced, and the caller clears it's tqent_flags field. Thus,
when the function returns and task_done is called, it looks as though
the entry is **not** a preallocated task (when in fact it **is** a
preallocated task).
In this situation, task_done will place the preallocated taskq_ent_t
structure onto the taskq_t's free list. This is a **huge** mistake. If
the taskq_ent_t is then freed by the caller of taskq_dispatch, the
taskq_t's free list will hold a pointer to garbage data. Even worse, if
nothing has over written the freed memory before the pointer is
dereferenced, it may still look as though it points to a valid list_head
belonging to a taskq_ent_t structure.
Thus, the task entry's flags are now copied prior to servicing the task.
This copy is then checked to see if it is a preallocated task, and
determine if the entry needs to be passed down to the task_done
function.
Signed-off-by: Prakash Surya <surya1@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #71
2011-12-16 22:57:31 +00:00
|
|
|
task_done(tq, t);
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2015-12-12 00:15:50 +00:00
|
|
|
/*
|
|
|
|
* When the current lowest outstanding taskqid is
|
|
|
|
* done calculate the new lowest outstanding id
|
|
|
|
*/
|
Swap taskq_ent_t with taskqid_t in taskq_thread_t
The taskq_t's active thread list is sorted based on its
tqt_ent->tqent_id field. The list is kept sorted solely by inserting
new taskq_thread_t's in their correct sorted location; no other
means is used. This means that once inserted, if a taskq_thread_t's
tqt_ent->tqent_id field changes, the list runs the risk of no
longer being sorted.
Prior to the introduction of the taskq_dispatch_prealloc() interface,
this was not a problem as a taskq_ent_t actively being serviced under
the old interface should always have a static tqent_id field. Thus,
once the taskq_thread_t is added to the taskq_t's active thread list,
the taskq_thread_t's tqt_ent->tqent_id field would remain constant.
Now, this is no longer the case. Currently, if using the
taskq_dispatch_prealloc() interface, any given taskq_ent_t actively
being serviced _may_ have its tqent_id value incremented. This happens
when the preallocated taskq_ent_t structure is recursively dispatched.
Thus, a taskq_thread_t could potentially have its tqt_ent->tqent_id
field silently modified from under its feet. If this were to happen
to a taskq_thread_t on a taskq_t's active thread list, this would
compromise the integrity of the order of the list (as the list
_may_ no longer be sorted).
To get around this, the taskq_thread_t's taskq_ent_t pointer was
replaced with its own static copy of the tqent_id. So, as a taskq_ent_t
is pulled off of the taskq_t's pending list, a static copy of its
tqent_id is made and this copy is used to sort the active thread
list. Using a static copy is key in ensuring the integrity of the
order of the active thread list. Even if the underlying taskq_ent_t
is recursively dispatched (as has its tqent_id modified), this
static copy stored inside the taskq_thread_t will remain constant.
Signed-off-by: Prakash Surya <surya1@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #71
2011-12-16 17:44:31 +00:00
|
|
|
if (tq->tq_lowest_id == tqt->tqt_id) {
|
2008-04-25 22:10:47 +00:00
|
|
|
tq->tq_lowest_id = taskq_lowest_id(tq);
|
Swap taskq_ent_t with taskqid_t in taskq_thread_t
The taskq_t's active thread list is sorted based on its
tqt_ent->tqent_id field. The list is kept sorted solely by inserting
new taskq_thread_t's in their correct sorted location; no other
means is used. This means that once inserted, if a taskq_thread_t's
tqt_ent->tqent_id field changes, the list runs the risk of no
longer being sorted.
Prior to the introduction of the taskq_dispatch_prealloc() interface,
this was not a problem as a taskq_ent_t actively being serviced under
the old interface should always have a static tqent_id field. Thus,
once the taskq_thread_t is added to the taskq_t's active thread list,
the taskq_thread_t's tqt_ent->tqent_id field would remain constant.
Now, this is no longer the case. Currently, if using the
taskq_dispatch_prealloc() interface, any given taskq_ent_t actively
being serviced _may_ have its tqent_id value incremented. This happens
when the preallocated taskq_ent_t structure is recursively dispatched.
Thus, a taskq_thread_t could potentially have its tqt_ent->tqent_id
field silently modified from under its feet. If this were to happen
to a taskq_thread_t on a taskq_t's active thread list, this would
compromise the integrity of the order of the list (as the list
_may_ no longer be sorted).
To get around this, the taskq_thread_t's taskq_ent_t pointer was
replaced with its own static copy of the tqent_id. So, as a taskq_ent_t
is pulled off of the taskq_t's pending list, a static copy of its
tqent_id is made and this copy is used to sort the active thread
list. Using a static copy is key in ensuring the integrity of the
order of the active thread list. Even if the underlying taskq_ent_t
is recursively dispatched (as has its tqent_id modified), this
static copy stored inside the taskq_thread_t will remain constant.
Signed-off-by: Prakash Surya <surya1@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #71
2011-12-16 17:44:31 +00:00
|
|
|
ASSERT3S(tq->tq_lowest_id, >, tqt->tqt_id);
|
2008-04-25 22:10:47 +00:00
|
|
|
}
|
|
|
|
|
2015-06-08 21:36:27 +00:00
|
|
|
/* Spawn additional taskq threads if required. */
|
2015-11-06 23:00:55 +00:00
|
|
|
if ((++seq_tasks) > spl_taskq_thread_sequential &&
|
|
|
|
taskq_thread_spawn(tq))
|
2015-06-08 21:36:27 +00:00
|
|
|
seq_tasks = 0;
|
|
|
|
|
2016-10-28 21:23:30 +00:00
|
|
|
tqt->tqt_id = TASKQID_INVALID;
|
Store copy of tqent_flags prior to servicing task
A preallocated taskq_ent_t's tqent_flags must be checked prior to
servicing the taskq_ent_t. Once a preallocated taskq entry is serviced,
the ownership of the entry is handed back to the caller of
taskq_dispatch, thus the entry's contents can potentially be mangled.
In particular, this is a problem in the case where a preallocated taskq
entry is serviced, and the caller clears it's tqent_flags field. Thus,
when the function returns and task_done is called, it looks as though
the entry is **not** a preallocated task (when in fact it **is** a
preallocated task).
In this situation, task_done will place the preallocated taskq_ent_t
structure onto the taskq_t's free list. This is a **huge** mistake. If
the taskq_ent_t is then freed by the caller of taskq_dispatch, the
taskq_t's free list will hold a pointer to garbage data. Even worse, if
nothing has over written the freed memory before the pointer is
dereferenced, it may still look as though it points to a valid list_head
belonging to a taskq_ent_t structure.
Thus, the task entry's flags are now copied prior to servicing the task.
This copy is then checked to see if it is a preallocated task, and
determine if the entry needs to be passed down to the task_done
function.
Signed-off-by: Prakash Surya <surya1@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #71
2011-12-16 22:57:31 +00:00
|
|
|
tqt->tqt_flags = 0;
|
2012-12-06 20:57:42 +00:00
|
|
|
wake_up_all(&tq->tq_wait_waitq);
|
2015-06-08 21:36:27 +00:00
|
|
|
} else {
|
|
|
|
if (taskq_thread_should_stop(tq, tqt))
|
|
|
|
break;
|
2008-04-25 22:10:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
set_current_state(TASK_INTERRUPTIBLE);
|
|
|
|
|
2012-12-06 20:57:42 +00:00
|
|
|
}
|
2008-04-25 22:10:47 +00:00
|
|
|
|
|
|
|
__set_current_state(TASK_RUNNING);
|
2012-12-06 20:57:42 +00:00
|
|
|
tq->tq_nthreads--;
|
2011-12-06 01:32:48 +00:00
|
|
|
list_del_init(&tqt->tqt_thread_list);
|
2015-06-08 21:36:27 +00:00
|
|
|
error:
|
|
|
|
kmem_free(tqt, sizeof (taskq_thread_t));
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2015-12-02 22:52:46 +00:00
|
|
|
tsd_set(taskq_tsd, NULL);
|
2021-01-25 19:18:28 +00:00
|
|
|
thread_exit();
|
2015-12-02 22:52:46 +00:00
|
|
|
|
2014-11-05 22:30:35 +00:00
|
|
|
return (0);
|
2008-04-25 22:10:47 +00:00
|
|
|
}
|
|
|
|
|
2015-06-08 21:36:27 +00:00
|
|
|
static taskq_thread_t *
|
|
|
|
taskq_thread_create(taskq_t *tq)
|
|
|
|
{
|
|
|
|
static int last_used_cpu = 0;
|
|
|
|
taskq_thread_t *tqt;
|
|
|
|
|
|
|
|
tqt = kmem_alloc(sizeof (*tqt), KM_PUSHPAGE);
|
|
|
|
INIT_LIST_HEAD(&tqt->tqt_thread_list);
|
|
|
|
INIT_LIST_HEAD(&tqt->tqt_active_list);
|
|
|
|
tqt->tqt_tq = tq;
|
2016-10-28 21:23:30 +00:00
|
|
|
tqt->tqt_id = TASKQID_INVALID;
|
2015-06-08 21:36:27 +00:00
|
|
|
|
|
|
|
tqt->tqt_thread = spl_kthread_create(taskq_thread, tqt,
|
|
|
|
"%s", tq->tq_name);
|
|
|
|
if (tqt->tqt_thread == NULL) {
|
|
|
|
kmem_free(tqt, sizeof (taskq_thread_t));
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (spl_taskq_thread_bind) {
|
|
|
|
last_used_cpu = (last_used_cpu + 1) % num_online_cpus();
|
|
|
|
kthread_bind(tqt->tqt_thread, last_used_cpu);
|
|
|
|
}
|
|
|
|
|
2015-07-23 18:21:08 +00:00
|
|
|
if (spl_taskq_thread_priority)
|
|
|
|
set_user_nice(tqt->tqt_thread, PRIO_TO_NICE(tq->tq_pri));
|
|
|
|
|
2015-06-08 21:36:27 +00:00
|
|
|
wake_up_process(tqt->tqt_thread);
|
|
|
|
|
|
|
|
return (tqt);
|
|
|
|
}
|
|
|
|
|
2008-02-26 20:36:04 +00:00
|
|
|
taskq_t *
|
2020-12-10 22:09:23 +00:00
|
|
|
taskq_create(const char *name, int threads_arg, pri_t pri,
|
2012-12-06 20:57:42 +00:00
|
|
|
int minalloc, int maxalloc, uint_t flags)
|
2008-02-26 20:36:04 +00:00
|
|
|
{
|
2012-12-06 20:57:42 +00:00
|
|
|
taskq_t *tq;
|
2011-12-06 01:32:48 +00:00
|
|
|
taskq_thread_t *tqt;
|
2015-06-08 21:36:27 +00:00
|
|
|
int count = 0, rc = 0, i;
|
2015-12-03 23:06:03 +00:00
|
|
|
unsigned long irqflags;
|
2020-12-10 22:09:23 +00:00
|
|
|
int nthreads = threads_arg;
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2012-12-06 20:57:42 +00:00
|
|
|
ASSERT(name != NULL);
|
|
|
|
ASSERT(minalloc >= 0);
|
2015-06-08 21:36:27 +00:00
|
|
|
ASSERT(!(flags & (TASKQ_CPR_SAFE))); /* Unsupported */
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2009-07-09 17:07:52 +00:00
|
|
|
/* Scale the number of threads using nthreads as a percentage */
|
|
|
|
if (flags & TASKQ_THREADS_CPU_PCT) {
|
|
|
|
ASSERT(nthreads <= 100);
|
|
|
|
ASSERT(nthreads >= 0);
|
2020-12-10 22:09:23 +00:00
|
|
|
nthreads = MIN(threads_arg, 100);
|
2009-07-09 17:07:52 +00:00
|
|
|
nthreads = MAX(nthreads, 0);
|
2020-12-10 22:09:23 +00:00
|
|
|
nthreads = MAX((num_online_cpus() * nthreads) /100, 1);
|
2009-07-09 17:07:52 +00:00
|
|
|
}
|
|
|
|
|
2015-06-08 21:36:27 +00:00
|
|
|
tq = kmem_alloc(sizeof (*tq), KM_PUSHPAGE);
|
2012-12-06 20:57:42 +00:00
|
|
|
if (tq == NULL)
|
2014-11-05 22:30:35 +00:00
|
|
|
return (NULL);
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2020-12-10 22:09:23 +00:00
|
|
|
tq->tq_hp_support = B_FALSE;
|
|
|
|
#ifdef HAVE_CPU_HOTPLUG
|
|
|
|
if (flags & TASKQ_THREADS_CPU_PCT) {
|
|
|
|
tq->tq_hp_support = B_TRUE;
|
|
|
|
if (cpuhp_state_add_instance_nocalls(spl_taskq_cpuhp_state,
|
|
|
|
&tq->tq_hp_cb_node) != 0) {
|
|
|
|
kmem_free(tq, sizeof (*tq));
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2012-12-06 20:57:42 +00:00
|
|
|
spin_lock_init(&tq->tq_lock);
|
|
|
|
INIT_LIST_HEAD(&tq->tq_thread_list);
|
|
|
|
INIT_LIST_HEAD(&tq->tq_active_list);
|
2019-10-10 16:47:06 +00:00
|
|
|
tq->tq_name = kmem_strdup(name);
|
2015-12-12 00:15:50 +00:00
|
|
|
tq->tq_nactive = 0;
|
|
|
|
tq->tq_nthreads = 0;
|
|
|
|
tq->tq_nspawn = 0;
|
2015-06-08 21:36:27 +00:00
|
|
|
tq->tq_maxthreads = nthreads;
|
2020-12-10 22:09:23 +00:00
|
|
|
tq->tq_cpu_pct = threads_arg;
|
2015-12-12 00:15:50 +00:00
|
|
|
tq->tq_pri = pri;
|
|
|
|
tq->tq_minalloc = minalloc;
|
|
|
|
tq->tq_maxalloc = maxalloc;
|
|
|
|
tq->tq_nalloc = 0;
|
|
|
|
tq->tq_flags = (flags | TASKQ_ACTIVE);
|
2016-10-28 21:23:30 +00:00
|
|
|
tq->tq_next_id = TASKQID_INITIAL;
|
|
|
|
tq->tq_lowest_id = TASKQID_INITIAL;
|
2023-06-26 20:57:12 +00:00
|
|
|
tq->lastshouldstop = 0;
|
2012-12-06 20:57:42 +00:00
|
|
|
INIT_LIST_HEAD(&tq->tq_free_list);
|
|
|
|
INIT_LIST_HEAD(&tq->tq_pend_list);
|
|
|
|
INIT_LIST_HEAD(&tq->tq_prio_list);
|
2012-12-06 20:38:19 +00:00
|
|
|
INIT_LIST_HEAD(&tq->tq_delay_list);
|
2012-12-06 20:57:42 +00:00
|
|
|
init_waitqueue_head(&tq->tq_work_waitq);
|
|
|
|
init_waitqueue_head(&tq->tq_wait_waitq);
|
2015-10-13 23:56:51 +00:00
|
|
|
tq->tq_lock_class = TQ_LOCK_GENERAL;
|
2015-10-19 12:47:52 +00:00
|
|
|
INIT_LIST_HEAD(&tq->tq_taskqs);
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2015-06-08 21:36:27 +00:00
|
|
|
if (flags & TASKQ_PREPOPULATE) {
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, irqflags,
|
2015-10-13 23:56:51 +00:00
|
|
|
tq->tq_lock_class);
|
2015-06-08 21:36:27 +00:00
|
|
|
|
2012-12-06 20:57:42 +00:00
|
|
|
for (i = 0; i < minalloc; i++)
|
2015-12-03 23:06:03 +00:00
|
|
|
task_done(tq, task_alloc(tq, TQ_PUSHPAGE | TQ_NEW,
|
|
|
|
&irqflags));
|
2008-04-23 21:19:47 +00:00
|
|
|
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, irqflags);
|
2015-06-08 21:36:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if ((flags & TASKQ_DYNAMIC) && spl_taskq_thread_dynamic)
|
|
|
|
nthreads = 1;
|
2008-04-23 21:19:47 +00:00
|
|
|
|
2011-12-06 01:32:48 +00:00
|
|
|
for (i = 0; i < nthreads; i++) {
|
2015-06-08 21:36:27 +00:00
|
|
|
tqt = taskq_thread_create(tq);
|
|
|
|
if (tqt == NULL)
|
2011-12-06 01:32:48 +00:00
|
|
|
rc = 1;
|
2015-06-08 21:36:27 +00:00
|
|
|
else
|
|
|
|
count++;
|
2011-12-06 01:32:48 +00:00
|
|
|
}
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2012-12-06 20:57:42 +00:00
|
|
|
/* Wait for all threads to be started before potential destroy */
|
2015-06-08 21:36:27 +00:00
|
|
|
wait_event(tq->tq_wait_waitq, tq->tq_nthreads == count);
|
2016-05-21 01:04:03 +00:00
|
|
|
/*
|
|
|
|
* taskq_thread might have touched nspawn, but we don't want them to
|
|
|
|
* because they're not dynamically spawned. So we reset it to 0
|
|
|
|
*/
|
|
|
|
tq->tq_nspawn = 0;
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2012-12-06 20:57:42 +00:00
|
|
|
if (rc) {
|
2012-12-06 21:04:27 +00:00
|
|
|
taskq_destroy(tq);
|
2012-12-06 20:57:42 +00:00
|
|
|
tq = NULL;
|
2015-10-19 12:47:52 +00:00
|
|
|
} else {
|
|
|
|
down_write(&tq_list_sem);
|
|
|
|
tq->tq_instance = taskq_find_by_name(name) + 1;
|
|
|
|
list_add_tail(&tq->tq_taskqs, &tq_list);
|
|
|
|
up_write(&tq_list_sem);
|
2012-12-06 20:57:42 +00:00
|
|
|
}
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2014-11-05 22:30:35 +00:00
|
|
|
return (tq);
|
2008-02-26 20:36:04 +00:00
|
|
|
}
|
2012-12-06 21:04:27 +00:00
|
|
|
EXPORT_SYMBOL(taskq_create);
|
2008-03-11 02:08:57 +00:00
|
|
|
|
|
|
|
void
|
2012-12-06 21:04:27 +00:00
|
|
|
taskq_destroy(taskq_t *tq)
|
2008-03-11 02:08:57 +00:00
|
|
|
{
|
2011-12-06 01:32:48 +00:00
|
|
|
struct task_struct *thread;
|
|
|
|
taskq_thread_t *tqt;
|
2011-11-11 23:06:35 +00:00
|
|
|
taskq_ent_t *t;
|
2015-12-03 23:06:03 +00:00
|
|
|
unsigned long flags;
|
2008-03-11 02:08:57 +00:00
|
|
|
|
2008-04-25 22:10:47 +00:00
|
|
|
ASSERT(tq);
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
|
2015-06-08 21:36:27 +00:00
|
|
|
tq->tq_flags &= ~TASKQ_ACTIVE;
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2020-12-10 22:09:23 +00:00
|
|
|
#ifdef HAVE_CPU_HOTPLUG
|
|
|
|
if (tq->tq_hp_support) {
|
|
|
|
VERIFY0(cpuhp_state_remove_instance_nocalls(
|
|
|
|
spl_taskq_cpuhp_state, &tq->tq_hp_cb_node));
|
|
|
|
}
|
|
|
|
#endif
|
2015-06-08 21:36:27 +00:00
|
|
|
/*
|
|
|
|
* When TASKQ_ACTIVE is clear new tasks may not be added nor may
|
|
|
|
* new worker threads be spawned for dynamic taskq.
|
|
|
|
*/
|
|
|
|
if (dynamic_taskq != NULL)
|
|
|
|
taskq_wait_outstanding(dynamic_taskq, 0);
|
|
|
|
|
2012-12-06 21:04:27 +00:00
|
|
|
taskq_wait(tq);
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2015-10-19 12:47:52 +00:00
|
|
|
/* remove taskq from global list used by the kstats */
|
|
|
|
down_write(&tq_list_sem);
|
|
|
|
list_del(&tq->tq_taskqs);
|
|
|
|
up_write(&tq_list_sem);
|
|
|
|
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
|
2016-05-21 01:04:03 +00:00
|
|
|
/* wait for spawning threads to insert themselves to the list */
|
|
|
|
while (tq->tq_nspawn) {
|
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
|
|
|
schedule_timeout_interruptible(1);
|
2018-02-07 19:49:38 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, flags,
|
|
|
|
tq->tq_lock_class);
|
2016-05-21 01:04:03 +00:00
|
|
|
}
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2011-12-06 01:32:48 +00:00
|
|
|
/*
|
|
|
|
* Signal each thread to exit and block until it does. Each thread
|
|
|
|
* is responsible for removing itself from the list and freeing its
|
|
|
|
* taskq_thread_t. This allows for idle threads to opt to remove
|
|
|
|
* themselves from the taskq. They can be recreated as needed.
|
|
|
|
*/
|
|
|
|
while (!list_empty(&tq->tq_thread_list)) {
|
|
|
|
tqt = list_entry(tq->tq_thread_list.next,
|
2015-06-08 21:36:27 +00:00
|
|
|
taskq_thread_t, tqt_thread_list);
|
2011-12-06 01:32:48 +00:00
|
|
|
thread = tqt->tqt_thread;
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
2011-12-06 01:32:48 +00:00
|
|
|
|
|
|
|
kthread_stop(thread);
|
|
|
|
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, flags,
|
2015-10-13 23:56:51 +00:00
|
|
|
tq->tq_lock_class);
|
2011-12-06 01:32:48 +00:00
|
|
|
}
|
|
|
|
|
2012-12-06 20:57:42 +00:00
|
|
|
while (!list_empty(&tq->tq_free_list)) {
|
2011-11-11 23:06:35 +00:00
|
|
|
t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
|
2011-12-06 18:04:51 +00:00
|
|
|
|
|
|
|
ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
|
|
|
|
|
2012-12-06 20:57:42 +00:00
|
|
|
list_del_init(&t->tqent_list);
|
|
|
|
task_free(tq, t);
|
|
|
|
}
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2015-06-08 21:36:27 +00:00
|
|
|
ASSERT0(tq->tq_nthreads);
|
|
|
|
ASSERT0(tq->tq_nalloc);
|
|
|
|
ASSERT0(tq->tq_nspawn);
|
2012-12-06 20:57:42 +00:00
|
|
|
ASSERT(list_empty(&tq->tq_thread_list));
|
|
|
|
ASSERT(list_empty(&tq->tq_active_list));
|
|
|
|
ASSERT(list_empty(&tq->tq_free_list));
|
|
|
|
ASSERT(list_empty(&tq->tq_pend_list));
|
|
|
|
ASSERT(list_empty(&tq->tq_prio_list));
|
2012-12-06 20:38:19 +00:00
|
|
|
ASSERT(list_empty(&tq->tq_delay_list));
|
2008-04-25 22:10:47 +00:00
|
|
|
|
2015-12-03 23:06:03 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
2011-12-06 01:32:48 +00:00
|
|
|
|
2019-10-10 16:47:06 +00:00
|
|
|
kmem_strfree(tq->tq_name);
|
2015-06-08 21:36:27 +00:00
|
|
|
kmem_free(tq, sizeof (taskq_t));
|
2008-03-11 02:08:57 +00:00
|
|
|
}
|
2012-12-06 21:04:27 +00:00
|
|
|
EXPORT_SYMBOL(taskq_destroy);
|
2009-01-05 23:08:03 +00:00
|
|
|
|
2016-01-28 00:55:14 +00:00
|
|
|
static unsigned int spl_taskq_kick = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* 2.6.36 API Change
|
|
|
|
* module_param_cb is introduced to take kernel_param_ops and
|
|
|
|
* module_param_call is marked as obsolete. Also set and get operations
|
|
|
|
* were changed to take a 'const struct kernel_param *'.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
#ifdef module_param_cb
|
|
|
|
param_set_taskq_kick(const char *val, const struct kernel_param *kp)
|
|
|
|
#else
|
|
|
|
param_set_taskq_kick(const char *val, struct kernel_param *kp)
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
int ret;
|
2019-12-14 00:07:48 +00:00
|
|
|
taskq_t *tq = NULL;
|
2016-01-28 00:55:14 +00:00
|
|
|
taskq_ent_t *t;
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
ret = param_set_uint(val, kp);
|
|
|
|
if (ret < 0 || !spl_taskq_kick)
|
|
|
|
return (ret);
|
|
|
|
/* reset value */
|
|
|
|
spl_taskq_kick = 0;
|
|
|
|
|
|
|
|
down_read(&tq_list_sem);
|
|
|
|
list_for_each_entry(tq, &tq_list, tq_taskqs) {
|
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, flags,
|
|
|
|
tq->tq_lock_class);
|
|
|
|
/* Check if the first pending is older than 5 seconds */
|
|
|
|
t = taskq_next_ent(tq);
|
|
|
|
if (t && time_after(jiffies, t->tqent_birth + 5*HZ)) {
|
|
|
|
(void) taskq_thread_spawn(tq);
|
|
|
|
printk(KERN_INFO "spl: Kicked taskq %s/%d\n",
|
|
|
|
tq->tq_name, tq->tq_instance);
|
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
|
|
|
}
|
|
|
|
up_read(&tq_list_sem);
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef module_param_cb
|
|
|
|
static const struct kernel_param_ops param_ops_taskq_kick = {
|
2018-02-07 19:49:38 +00:00
|
|
|
.set = param_set_taskq_kick,
|
|
|
|
.get = param_get_uint,
|
2016-01-28 00:55:14 +00:00
|
|
|
};
|
|
|
|
module_param_cb(spl_taskq_kick, ¶m_ops_taskq_kick, &spl_taskq_kick, 0644);
|
|
|
|
#else
|
|
|
|
module_param_call(spl_taskq_kick, param_set_taskq_kick, param_get_uint,
|
2018-02-07 19:49:38 +00:00
|
|
|
&spl_taskq_kick, 0644);
|
2016-01-28 00:55:14 +00:00
|
|
|
#endif
|
|
|
|
MODULE_PARM_DESC(spl_taskq_kick,
|
2018-02-07 19:49:38 +00:00
|
|
|
"Write nonzero to kick stuck taskqs to spawn more threads");
|
2016-01-28 00:55:14 +00:00
|
|
|
|
2020-12-10 22:09:23 +00:00
|
|
|
#ifdef HAVE_CPU_HOTPLUG
|
|
|
|
/*
|
|
|
|
* This callback will be called exactly once for each core that comes online,
|
|
|
|
* for each dynamic taskq. We attempt to expand taskqs that have
|
|
|
|
* TASKQ_THREADS_CPU_PCT set. We need to redo the percentage calculation every
|
|
|
|
* time, to correctly determine whether or not to add a thread.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
spl_taskq_expand(unsigned int cpu, struct hlist_node *node)
|
|
|
|
{
|
|
|
|
taskq_t *tq = list_entry(node, taskq_t, tq_hp_cb_node);
|
|
|
|
unsigned long flags;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
ASSERT(tq);
|
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
|
|
|
|
|
2021-11-02 16:23:48 +00:00
|
|
|
if (!(tq->tq_flags & TASKQ_ACTIVE)) {
|
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
|
|
|
return (err);
|
|
|
|
}
|
2020-12-10 22:09:23 +00:00
|
|
|
|
|
|
|
ASSERT(tq->tq_flags & TASKQ_THREADS_CPU_PCT);
|
|
|
|
int nthreads = MIN(tq->tq_cpu_pct, 100);
|
|
|
|
nthreads = MAX(((num_online_cpus() + 1) * nthreads) / 100, 1);
|
|
|
|
tq->tq_maxthreads = nthreads;
|
|
|
|
|
|
|
|
if (!((tq->tq_flags & TASKQ_DYNAMIC) && spl_taskq_thread_dynamic) &&
|
|
|
|
tq->tq_maxthreads > tq->tq_nthreads) {
|
2021-11-02 16:23:48 +00:00
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
2020-12-10 22:09:23 +00:00
|
|
|
taskq_thread_t *tqt = taskq_thread_create(tq);
|
|
|
|
if (tqt == NULL)
|
|
|
|
err = -1;
|
2021-11-02 16:23:48 +00:00
|
|
|
return (err);
|
2020-12-10 22:09:23 +00:00
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
|
|
|
return (err);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* While we don't support offlining CPUs, it is possible that CPUs will fail
|
|
|
|
* to online successfully. We do need to be able to handle this case
|
|
|
|
* gracefully.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
spl_taskq_prepare_down(unsigned int cpu, struct hlist_node *node)
|
|
|
|
{
|
|
|
|
taskq_t *tq = list_entry(node, taskq_t, tq_hp_cb_node);
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
ASSERT(tq);
|
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
|
|
|
|
|
|
|
|
if (!(tq->tq_flags & TASKQ_ACTIVE))
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
ASSERT(tq->tq_flags & TASKQ_THREADS_CPU_PCT);
|
|
|
|
int nthreads = MIN(tq->tq_cpu_pct, 100);
|
|
|
|
nthreads = MAX(((num_online_cpus()) * nthreads) / 100, 1);
|
|
|
|
tq->tq_maxthreads = nthreads;
|
|
|
|
|
|
|
|
if (!((tq->tq_flags & TASKQ_DYNAMIC) && spl_taskq_thread_dynamic) &&
|
|
|
|
tq->tq_maxthreads < tq->tq_nthreads) {
|
|
|
|
ASSERT3U(tq->tq_maxthreads, ==, tq->tq_nthreads - 1);
|
|
|
|
taskq_thread_t *tqt = list_entry(tq->tq_thread_list.next,
|
|
|
|
taskq_thread_t, tqt_thread_list);
|
|
|
|
struct task_struct *thread = tqt->tqt_thread;
|
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
|
|
|
|
|
|
|
kthread_stop(thread);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2009-01-05 23:08:03 +00:00
|
|
|
int
|
|
|
|
spl_taskq_init(void)
|
|
|
|
{
|
2018-02-16 01:53:18 +00:00
|
|
|
init_rwsem(&tq_list_sem);
|
2015-12-02 22:52:46 +00:00
|
|
|
tsd_create(&taskq_tsd, NULL);
|
|
|
|
|
2020-12-10 22:09:23 +00:00
|
|
|
#ifdef HAVE_CPU_HOTPLUG
|
|
|
|
spl_taskq_cpuhp_state = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
|
|
|
|
"fs/spl_taskq:online", spl_taskq_expand, spl_taskq_prepare_down);
|
|
|
|
#endif
|
|
|
|
|
2015-06-24 16:53:47 +00:00
|
|
|
system_taskq = taskq_create("spl_system_taskq", MAX(boot_ncpus, 64),
|
2015-07-24 17:32:55 +00:00
|
|
|
maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
|
2009-01-05 23:08:03 +00:00
|
|
|
if (system_taskq == NULL)
|
Linux SPL module init: Handle memory allocation failures correctly
Upon inspection of our code, I noticed that we assume that
__alloc_percpu() cannot fail, and while it probably never has failed in
practice, technically, it can fail, so we should handle that.
Additionally, we incorrectly assume that `taskq_create()` in
spl_kmem_cache_init() cannot fail. The same remark applies to it.
Lastly, `spl-init()` failures should always return negative error
values, but in some places, we are returning positive 1, which is
incorrect. We change those values to their correct error codes.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13847
2022-09-08 17:28:20 +00:00
|
|
|
return (-ENOMEM);
|
2009-01-05 23:08:03 +00:00
|
|
|
|
2016-12-08 21:00:20 +00:00
|
|
|
system_delay_taskq = taskq_create("spl_delay_taskq", MAX(boot_ncpus, 4),
|
|
|
|
maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
|
|
|
|
if (system_delay_taskq == NULL) {
|
2020-12-10 22:09:23 +00:00
|
|
|
#ifdef HAVE_CPU_HOTPLUG
|
|
|
|
cpuhp_remove_multi_state(spl_taskq_cpuhp_state);
|
|
|
|
#endif
|
2016-12-08 21:00:20 +00:00
|
|
|
taskq_destroy(system_taskq);
|
Linux SPL module init: Handle memory allocation failures correctly
Upon inspection of our code, I noticed that we assume that
__alloc_percpu() cannot fail, and while it probably never has failed in
practice, technically, it can fail, so we should handle that.
Additionally, we incorrectly assume that `taskq_create()` in
spl_kmem_cache_init() cannot fail. The same remark applies to it.
Lastly, `spl-init()` failures should always return negative error
values, but in some places, we are returning positive 1, which is
incorrect. We change those values to their correct error codes.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13847
2022-09-08 17:28:20 +00:00
|
|
|
return (-ENOMEM);
|
2016-12-08 21:00:20 +00:00
|
|
|
}
|
|
|
|
|
2015-06-08 21:36:27 +00:00
|
|
|
dynamic_taskq = taskq_create("spl_dynamic_taskq", 1,
|
2015-07-24 17:32:55 +00:00
|
|
|
maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE);
|
2015-06-08 21:36:27 +00:00
|
|
|
if (dynamic_taskq == NULL) {
|
2020-12-10 22:09:23 +00:00
|
|
|
#ifdef HAVE_CPU_HOTPLUG
|
|
|
|
cpuhp_remove_multi_state(spl_taskq_cpuhp_state);
|
|
|
|
#endif
|
2015-06-08 21:36:27 +00:00
|
|
|
taskq_destroy(system_taskq);
|
2016-12-08 21:00:20 +00:00
|
|
|
taskq_destroy(system_delay_taskq);
|
Linux SPL module init: Handle memory allocation failures correctly
Upon inspection of our code, I noticed that we assume that
__alloc_percpu() cannot fail, and while it probably never has failed in
practice, technically, it can fail, so we should handle that.
Additionally, we incorrectly assume that `taskq_create()` in
spl_kmem_cache_init() cannot fail. The same remark applies to it.
Lastly, `spl-init()` failures should always return negative error
values, but in some places, we are returning positive 1, which is
incorrect. We change those values to their correct error codes.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13847
2022-09-08 17:28:20 +00:00
|
|
|
return (-ENOMEM);
|
2015-06-08 21:36:27 +00:00
|
|
|
}
|
|
|
|
|
2015-12-12 00:15:50 +00:00
|
|
|
/*
|
|
|
|
* This is used to annotate tq_lock, so
|
|
|
|
* taskq_dispatch -> taskq_thread_spawn -> taskq_dispatch
|
2015-10-13 23:56:51 +00:00
|
|
|
* does not trigger a lockdep warning re: possible recursive locking
|
|
|
|
*/
|
|
|
|
dynamic_taskq->tq_lock_class = TQ_LOCK_DYNAMIC;
|
|
|
|
|
2014-11-05 22:30:35 +00:00
|
|
|
return (0);
|
2009-01-05 23:08:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
spl_taskq_fini(void)
|
|
|
|
{
|
2015-06-08 21:36:27 +00:00
|
|
|
taskq_destroy(dynamic_taskq);
|
|
|
|
dynamic_taskq = NULL;
|
|
|
|
|
2016-12-08 21:00:20 +00:00
|
|
|
taskq_destroy(system_delay_taskq);
|
|
|
|
system_delay_taskq = NULL;
|
|
|
|
|
2009-01-05 23:08:03 +00:00
|
|
|
taskq_destroy(system_taskq);
|
2015-06-08 21:36:27 +00:00
|
|
|
system_taskq = NULL;
|
2015-12-02 22:52:46 +00:00
|
|
|
|
|
|
|
tsd_destroy(&taskq_tsd);
|
2020-12-10 22:09:23 +00:00
|
|
|
|
|
|
|
#ifdef HAVE_CPU_HOTPLUG
|
|
|
|
cpuhp_remove_multi_state(spl_taskq_cpuhp_state);
|
|
|
|
spl_taskq_cpuhp_state = 0;
|
|
|
|
#endif
|
2009-01-05 23:08:03 +00:00
|
|
|
}
|