From fd01167ffd9b5d4634bab503e0584e6756bb8ef8 Mon Sep 17 00:00:00 2001 From: LOLi Date: Fri, 30 Mar 2018 21:10:01 +0200 Subject: [PATCH] Fix hung z_zvol tasks during 'zfs receive' During a receive operation zvol_create_minors_impl() can wait needlessly for the prefetch thread because both share the same tasks queue. This results in hung tasks: <3>INFO: task z_zvol:5541 blocked for more than 120 seconds. <3> Tainted: P O 3.16.0-4-amd64 <3>"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. The first z_zvol:5541 (zvol_task_cb) is waiting for the long running traverse_prefetch_thread:260 root@linux:~# cat /proc/spl/taskq taskq act nthr spwn maxt pri mina spl_system_taskq/0 1 2 0 64 100 1 active: [260]traverse_prefetch_thread [zfs](0xffff88003347ae40) wait: 5541 spl_delay_taskq/0 0 1 0 4 100 1 delay: spa_deadman [zfs](0xffff880039924000) z_zvol/1 1 1 0 1 120 1 active: [5541]zvol_task_cb [zfs](0xffff88001fde6400) pend: zvol_task_cb [zfs](0xffff88001fde6800) This change adds a dedicated, per-pool, prefetch taskq to prevent the traverse code from monopolizing the global (and limited) system_taskq by inappropriately scheduling long running tasks on it. Reviewed-by: Albert Lee Signed-off-by: Brian Behlendorf Signed-off-by: loli10K Closes #6330 Closes #6890 Closes #7343 --- include/sys/spa_impl.h | 1 + module/zfs/dmu_traverse.c | 3 ++- module/zfs/spa.c | 13 +++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 73ad1c60c5..b1e78c1d59 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -275,6 +275,7 @@ struct spa { spa_stats_t spa_stats; /* assorted spa statistics */ hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */ taskq_t *spa_zvol_taskq; /* Taskq for minor management */ + taskq_t *spa_prefetch_taskq; /* Taskq for prefetch threads */ uint64_t spa_multihost; /* multihost aware (mmp) */ mmp_thread_t spa_mmp; /* multihost mmp thread */ diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c index b494bef358..f63903ef64 100644 --- a/module/zfs/dmu_traverse.c +++ b/module/zfs/dmu_traverse.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -623,7 +624,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, } if (!(flags & TRAVERSE_PREFETCH_DATA) || - taskq_dispatch(system_taskq, traverse_prefetch_thread, + taskq_dispatch(spa->spa_prefetch_taskq, traverse_prefetch_thread, td, TQ_NOQUEUE) == TASKQID_INVALID) pd->pd_exited = B_TRUE; diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 561f4d04bf..1add7ad246 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -1182,6 +1182,14 @@ spa_activate(spa_t *spa, int mode) spa->spa_zvol_taskq = taskq_create("z_zvol", 1, defclsyspri, 1, INT_MAX, 0); + /* + * Taskq dedicated to prefetcher threads: this is used to prevent the + * pool traverse code from monopolizing the global (and limited) + * system_taskq by inappropriately scheduling long running tasks on it. + */ + spa->spa_prefetch_taskq = taskq_create("z_prefetch", boot_ncpus, + defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC); + /* * The taskq to upgrade datasets in this pool. Currently used by * feature SPA_FEATURE_USEROBJ_ACCOUNTING. @@ -1211,6 +1219,11 @@ spa_deactivate(spa_t *spa) spa->spa_zvol_taskq = NULL; } + if (spa->spa_prefetch_taskq) { + taskq_destroy(spa->spa_prefetch_taskq); + spa->spa_prefetch_taskq = NULL; + } + if (spa->spa_upgrade_taskq) { taskq_destroy(spa->spa_upgrade_taskq); spa->spa_upgrade_taskq = NULL;