Merge 0a9a614d2b
into 308f7c2f14
This commit is contained in:
commit
0a8f745386
|
@ -20,6 +20,10 @@
|
||||||
* You should have received a copy of the GNU General Public License along
|
* You should have received a copy of the GNU General Public License along
|
||||||
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024, Klara, Inc.
|
||||||
|
* Copyright (c) 2024, Syneto
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef _SPL_KSTAT_H
|
#ifndef _SPL_KSTAT_H
|
||||||
#define _SPL_KSTAT_H
|
#define _SPL_KSTAT_H
|
||||||
|
@ -89,6 +93,8 @@ typedef struct kstat_module {
|
||||||
struct list_head ksm_module_list; /* module linkage */
|
struct list_head ksm_module_list; /* module linkage */
|
||||||
struct list_head ksm_kstat_list; /* list of kstat entries */
|
struct list_head ksm_kstat_list; /* list of kstat entries */
|
||||||
struct proc_dir_entry *ksm_proc; /* proc entry */
|
struct proc_dir_entry *ksm_proc; /* proc entry */
|
||||||
|
struct kstat_module *ksm_parent; /* parent module in hierarchy */
|
||||||
|
uint_t ksm_nchildren; /* number of child modules */
|
||||||
} kstat_module_t;
|
} kstat_module_t;
|
||||||
|
|
||||||
typedef struct kstat_raw_ops {
|
typedef struct kstat_raw_ops {
|
||||||
|
|
|
@ -22,7 +22,8 @@
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2011, 2020 by Delphix. All rights reserved.
|
* Copyright (c) 2011, 2020 by Delphix. All rights reserved.
|
||||||
* Copyright (c) 2017, Intel Corporation.
|
* Copyright (c) 2017, Intel Corporation.
|
||||||
* Copyright (c) 2023, Klara Inc.
|
* Copyright (c) 2023, 2024, Klara, Inc.
|
||||||
|
* Copyright (c) 2024, Syneto
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _SYS_VDEV_IMPL_H
|
#ifndef _SYS_VDEV_IMPL_H
|
||||||
|
@ -41,6 +42,7 @@
|
||||||
#include <sys/vdev_rebuild.h>
|
#include <sys/vdev_rebuild.h>
|
||||||
#include <sys/vdev_removal.h>
|
#include <sys/vdev_removal.h>
|
||||||
#include <sys/zfs_ratelimit.h>
|
#include <sys/zfs_ratelimit.h>
|
||||||
|
#include <sys/wmsum.h>
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
@ -138,6 +140,25 @@ typedef union vdev_queue_class {
|
||||||
avl_tree_t vqc_tree;
|
avl_tree_t vqc_tree;
|
||||||
} vdev_queue_class_t;
|
} vdev_queue_class_t;
|
||||||
|
|
||||||
|
typedef struct vdev_queue_sums {
|
||||||
|
/* gauges (inc/dec counters, current value) */
|
||||||
|
wmsum_t vqs_io_queued;
|
||||||
|
wmsum_t vqs_io_class_queued[ZIO_PRIORITY_NUM_QUEUEABLE];
|
||||||
|
wmsum_t vqs_io_active;
|
||||||
|
wmsum_t vqs_io_class_active[ZIO_PRIORITY_NUM_QUEUEABLE];
|
||||||
|
|
||||||
|
/* counters (inc only, since queue creation ) */
|
||||||
|
wmsum_t vqs_io_enqueued_total;
|
||||||
|
wmsum_t vqs_io_class_enqueued_total[ZIO_PRIORITY_NUM_QUEUEABLE];
|
||||||
|
wmsum_t vqs_io_dequeued_total;
|
||||||
|
wmsum_t vqs_io_class_dequeued_total[ZIO_PRIORITY_NUM_QUEUEABLE];
|
||||||
|
wmsum_t vqs_io_aggregated_total;
|
||||||
|
wmsum_t vqs_io_aggregated_data_total;
|
||||||
|
wmsum_t vqs_io_aggregated_read_gap_total;
|
||||||
|
wmsum_t vqs_io_aggregated_write_gap_total;
|
||||||
|
wmsum_t vqs_io_aggregated_shrunk_total;
|
||||||
|
} vdev_queue_sums_t;
|
||||||
|
|
||||||
struct vdev_queue {
|
struct vdev_queue {
|
||||||
vdev_t *vq_vdev;
|
vdev_t *vq_vdev;
|
||||||
vdev_queue_class_t vq_class[ZIO_PRIORITY_NUM_QUEUEABLE];
|
vdev_queue_class_t vq_class[ZIO_PRIORITY_NUM_QUEUEABLE];
|
||||||
|
@ -155,6 +176,8 @@ struct vdev_queue {
|
||||||
hrtime_t vq_io_delta_ts;
|
hrtime_t vq_io_delta_ts;
|
||||||
zio_t vq_io_search; /* used as local for stack reduction */
|
zio_t vq_io_search; /* used as local for stack reduction */
|
||||||
kmutex_t vq_lock;
|
kmutex_t vq_lock;
|
||||||
|
vdev_queue_sums_t vq_sums;
|
||||||
|
kstat_t *vq_ksp;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef enum vdev_alloc_bias {
|
typedef enum vdev_alloc_bias {
|
||||||
|
|
|
@ -27,6 +27,10 @@
|
||||||
* [1] https://illumos.org/man/1M/kstat
|
* [1] https://illumos.org/man/1M/kstat
|
||||||
* [2] https://illumos.org/man/9f/kstat_create
|
* [2] https://illumos.org/man/9f/kstat_create
|
||||||
*/
|
*/
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024, Klara, Inc.
|
||||||
|
* Copyright (c) 2024, Syneto
|
||||||
|
*/
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/param.h>
|
#include <sys/param.h>
|
||||||
|
@ -287,7 +291,7 @@ __kstat_create(const char *module, int instance, const char *name,
|
||||||
char buf[KSTAT_STRLEN];
|
char buf[KSTAT_STRLEN];
|
||||||
struct sysctl_oid *root;
|
struct sysctl_oid *root;
|
||||||
kstat_t *ksp;
|
kstat_t *ksp;
|
||||||
char *pool;
|
char *p, *frag;
|
||||||
|
|
||||||
KASSERT(instance == 0, ("instance=%d", instance));
|
KASSERT(instance == 0, ("instance=%d", instance));
|
||||||
if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO))
|
if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO))
|
||||||
|
@ -345,74 +349,54 @@ __kstat_create(const char *module, int instance, const char *name,
|
||||||
else
|
else
|
||||||
ksp->ks_data = kmem_zalloc(ksp->ks_data_size, KM_SLEEP);
|
ksp->ks_data = kmem_zalloc(ksp->ks_data_size, KM_SLEEP);
|
||||||
|
|
||||||
/*
|
sysctl_ctx_init(&ksp->ks_sysctl_ctx);
|
||||||
* Some kstats use a module name like "zfs/poolname" to distinguish a
|
|
||||||
* set of kstats belonging to a specific pool. Split on '/' to add an
|
|
||||||
* extra node for the pool name if needed.
|
|
||||||
*/
|
|
||||||
(void) strlcpy(buf, module, KSTAT_STRLEN);
|
(void) strlcpy(buf, module, KSTAT_STRLEN);
|
||||||
module = buf;
|
|
||||||
pool = strchr(module, '/');
|
|
||||||
if (pool != NULL)
|
|
||||||
*pool++ = '\0';
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create sysctl tree for those statistics:
|
* Walk over the module name, splitting on '/', and create the
|
||||||
*
|
* intermediate nodes.
|
||||||
* kstat.<module>[.<pool>].<class>.<name>
|
|
||||||
*/
|
*/
|
||||||
sysctl_ctx_init(&ksp->ks_sysctl_ctx);
|
root = NULL;
|
||||||
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx,
|
p = buf;
|
||||||
SYSCTL_STATIC_CHILDREN(_kstat), OID_AUTO, module, CTLFLAG_RW, 0,
|
while ((frag = strsep(&p, "/")) != NULL) {
|
||||||
"");
|
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, root ?
|
||||||
if (root == NULL) {
|
SYSCTL_CHILDREN(root) : SYSCTL_STATIC_CHILDREN(_kstat),
|
||||||
printf("%s: Cannot create kstat.%s tree!\n", __func__, module);
|
OID_AUTO, frag, CTLFLAG_RW, 0, "");
|
||||||
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
|
|
||||||
free(ksp, M_KSTAT);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
if (pool != NULL) {
|
|
||||||
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx,
|
|
||||||
SYSCTL_CHILDREN(root), OID_AUTO, pool, CTLFLAG_RW, 0, "");
|
|
||||||
if (root == NULL) {
|
if (root == NULL) {
|
||||||
printf("%s: Cannot create kstat.%s.%s tree!\n",
|
printf("%s: Cannot create kstat.%s tree!\n",
|
||||||
__func__, module, pool);
|
__func__, buf);
|
||||||
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
|
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
|
||||||
free(ksp, M_KSTAT);
|
free(ksp, M_KSTAT);
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
if (p != NULL && p > frag)
|
||||||
|
p[-1] = '.';
|
||||||
}
|
}
|
||||||
|
|
||||||
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, SYSCTL_CHILDREN(root),
|
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, SYSCTL_CHILDREN(root),
|
||||||
OID_AUTO, class, CTLFLAG_RW, 0, "");
|
OID_AUTO, class, CTLFLAG_RW, 0, "");
|
||||||
if (root == NULL) {
|
if (root == NULL) {
|
||||||
if (pool != NULL)
|
printf("%s: Cannot create kstat.%s.%s tree!\n",
|
||||||
printf("%s: Cannot create kstat.%s.%s.%s tree!\n",
|
__func__, buf, class);
|
||||||
__func__, module, pool, class);
|
|
||||||
else
|
|
||||||
printf("%s: Cannot create kstat.%s.%s tree!\n",
|
|
||||||
__func__, module, class);
|
|
||||||
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
|
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
|
||||||
free(ksp, M_KSTAT);
|
free(ksp, M_KSTAT);
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ksp->ks_type == KSTAT_TYPE_NAMED) {
|
if (ksp->ks_type == KSTAT_TYPE_NAMED) {
|
||||||
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx,
|
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx,
|
||||||
SYSCTL_CHILDREN(root),
|
SYSCTL_CHILDREN(root),
|
||||||
OID_AUTO, name, CTLFLAG_RW, 0, "");
|
OID_AUTO, name, CTLFLAG_RW, 0, "");
|
||||||
if (root == NULL) {
|
if (root == NULL) {
|
||||||
if (pool != NULL)
|
printf("%s: Cannot create kstat.%s.%s.%s tree!\n",
|
||||||
printf("%s: Cannot create kstat.%s.%s.%s.%s "
|
__func__, buf, class, name);
|
||||||
"tree!\n", __func__, module, pool, class,
|
|
||||||
name);
|
|
||||||
else
|
|
||||||
printf("%s: Cannot create kstat.%s.%s.%s "
|
|
||||||
"tree!\n", __func__, module, class, name);
|
|
||||||
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
|
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
|
||||||
free(ksp, M_KSTAT);
|
free(ksp, M_KSTAT);
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ksp->ks_sysctl_root = root;
|
ksp->ks_sysctl_root = root;
|
||||||
|
|
||||||
return (ksp);
|
return (ksp);
|
||||||
|
@ -436,7 +420,26 @@ kstat_install_named(kstat_t *ksp)
|
||||||
if (ksent->data_type != 0) {
|
if (ksent->data_type != 0) {
|
||||||
typelast = ksent->data_type;
|
typelast = ksent->data_type;
|
||||||
namelast = ksent->name;
|
namelast = ksent->name;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If a sysctl with this name already exists on this on
|
||||||
|
* this root, first remove it by deleting it from its
|
||||||
|
* old context, and then destroying it.
|
||||||
|
*/
|
||||||
|
struct sysctl_oid *oid = NULL;
|
||||||
|
SYSCTL_FOREACH(oid,
|
||||||
|
SYSCTL_CHILDREN(ksp->ks_sysctl_root)) {
|
||||||
|
if (strcmp(oid->oid_name, namelast) == 0) {
|
||||||
|
kstat_t *oldksp =
|
||||||
|
(kstat_t *)oid->oid_arg1;
|
||||||
|
sysctl_ctx_entry_del(
|
||||||
|
&oldksp->ks_sysctl_ctx, oid);
|
||||||
|
sysctl_remove_oid(oid, 1, 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (typelast) {
|
switch (typelast) {
|
||||||
case KSTAT_DATA_CHAR:
|
case KSTAT_DATA_CHAR:
|
||||||
/* Not Implemented */
|
/* Not Implemented */
|
||||||
|
|
|
@ -26,6 +26,10 @@
|
||||||
* [1] https://illumos.org/man/1M/kstat
|
* [1] https://illumos.org/man/1M/kstat
|
||||||
* [2] https://illumos.org/man/9f/kstat_create
|
* [2] https://illumos.org/man/9f/kstat_create
|
||||||
*/
|
*/
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024, Klara, Inc.
|
||||||
|
* Copyright (c) 2024, Syneto
|
||||||
|
*/
|
||||||
|
|
||||||
#include <linux/seq_file.h>
|
#include <linux/seq_file.h>
|
||||||
#include <sys/kstat.h>
|
#include <sys/kstat.h>
|
||||||
|
@ -379,33 +383,72 @@ kstat_find_module(char *name)
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static kstat_module_t *
|
|
||||||
kstat_create_module(char *name)
|
|
||||||
{
|
|
||||||
kstat_module_t *module;
|
|
||||||
struct proc_dir_entry *pde;
|
|
||||||
|
|
||||||
pde = proc_mkdir(name, proc_spl_kstat);
|
|
||||||
if (pde == NULL)
|
|
||||||
return (NULL);
|
|
||||||
|
|
||||||
module = kmem_alloc(sizeof (kstat_module_t), KM_SLEEP);
|
|
||||||
module->ksm_proc = pde;
|
|
||||||
strlcpy(module->ksm_name, name, KSTAT_STRLEN);
|
|
||||||
INIT_LIST_HEAD(&module->ksm_kstat_list);
|
|
||||||
list_add_tail(&module->ksm_module_list, &kstat_module_list);
|
|
||||||
|
|
||||||
return (module);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
kstat_delete_module(kstat_module_t *module)
|
kstat_delete_module(kstat_module_t *module)
|
||||||
{
|
{
|
||||||
ASSERT(list_empty(&module->ksm_kstat_list));
|
ASSERT(list_empty(&module->ksm_kstat_list));
|
||||||
remove_proc_entry(module->ksm_name, proc_spl_kstat);
|
ASSERT0(module->ksm_nchildren);
|
||||||
|
|
||||||
|
kstat_module_t *parent = module->ksm_parent;
|
||||||
|
|
||||||
|
char *p = module->ksm_name, *frag;
|
||||||
|
while (p != NULL && (frag = strsep(&p, "/"))) {}
|
||||||
|
|
||||||
|
remove_proc_entry(frag, parent ? parent->ksm_proc : proc_spl_kstat);
|
||||||
list_del(&module->ksm_module_list);
|
list_del(&module->ksm_module_list);
|
||||||
kmem_free(module, sizeof (kstat_module_t));
|
kmem_free(module, sizeof (kstat_module_t));
|
||||||
|
|
||||||
|
if (parent) {
|
||||||
|
parent->ksm_nchildren--;
|
||||||
|
if (parent->ksm_nchildren == 0 &&
|
||||||
|
list_empty(&parent->ksm_kstat_list))
|
||||||
|
kstat_delete_module(parent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static kstat_module_t *
|
||||||
|
kstat_create_module(char *name)
|
||||||
|
{
|
||||||
|
char buf[KSTAT_STRLEN];
|
||||||
|
kstat_module_t *module, *parent;
|
||||||
|
|
||||||
|
(void) strlcpy(buf, name, KSTAT_STRLEN);
|
||||||
|
|
||||||
|
parent = NULL;
|
||||||
|
char *p = buf, *frag;
|
||||||
|
while ((frag = strsep(&p, "/")) != NULL) {
|
||||||
|
module = kstat_find_module(buf);
|
||||||
|
if (module == NULL) {
|
||||||
|
struct proc_dir_entry *pde = proc_mkdir(frag,
|
||||||
|
parent ? parent->ksm_proc : proc_spl_kstat);
|
||||||
|
if (pde == NULL) {
|
||||||
|
cmn_err(CE_WARN, "kstat_create('%s'): "
|
||||||
|
"module dir create failed", buf);
|
||||||
|
if (parent)
|
||||||
|
kstat_delete_module(parent);
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
module = kmem_alloc(sizeof (kstat_module_t), KM_SLEEP);
|
||||||
|
module->ksm_proc = pde;
|
||||||
|
strlcpy(module->ksm_name, buf, KSTAT_STRLEN);
|
||||||
|
INIT_LIST_HEAD(&module->ksm_kstat_list);
|
||||||
|
list_add_tail(&module->ksm_module_list,
|
||||||
|
&kstat_module_list);
|
||||||
|
|
||||||
|
if (parent != NULL) {
|
||||||
|
module->ksm_parent = parent;
|
||||||
|
parent->ksm_nchildren++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
parent = module;
|
||||||
|
if (p != NULL && p > frag)
|
||||||
|
p[-1] = '/';
|
||||||
|
}
|
||||||
|
|
||||||
|
return (module);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -624,12 +667,20 @@ kstat_proc_entry_install(kstat_proc_entry_t *kpep, mode_t mode,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Only one entry by this name per-module, on failure the module
|
* We can only have one entry of this name per module. If one already
|
||||||
* shouldn't be deleted because we know it has at least one entry.
|
* exists, replace it by first removing the proc entry, then removing
|
||||||
|
* it from the list. The kstat itself lives on; it just can't be
|
||||||
|
* inspected through the filesystem.
|
||||||
*/
|
*/
|
||||||
list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) {
|
list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) {
|
||||||
if (strncmp(tmp->kpe_name, kpep->kpe_name, KSTAT_STRLEN) == 0)
|
if (tmp->kpe_proc != NULL &&
|
||||||
goto out;
|
strncmp(tmp->kpe_name, kpep->kpe_name, KSTAT_STRLEN) == 0) {
|
||||||
|
ASSERT3P(tmp->kpe_owner, ==, module);
|
||||||
|
remove_proc_entry(tmp->kpe_name, module->ksm_proc);
|
||||||
|
tmp->kpe_proc = NULL;
|
||||||
|
list_del_init(&tmp->kpe_list);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
list_add_tail(&kpep->kpe_list, &module->ksm_kstat_list);
|
list_add_tail(&kpep->kpe_list, &module->ksm_kstat_list);
|
||||||
|
|
|
@ -25,6 +25,8 @@
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
||||||
|
* Copyright (c) 2024, Klara, Inc.
|
||||||
|
* Copyright (c) 2024, Syneto
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <sys/zfs_context.h>
|
#include <sys/zfs_context.h>
|
||||||
|
@ -228,6 +230,81 @@ uint_t zfs_vdev_queue_depth_pct = 300;
|
||||||
*/
|
*/
|
||||||
uint_t zfs_vdev_def_queue_depth = 32;
|
uint_t zfs_vdev_def_queue_depth = 32;
|
||||||
|
|
||||||
|
typedef struct vdev_queue_kstats {
|
||||||
|
kstat_named_t vqks_io_queued;
|
||||||
|
kstat_named_t vqks_io_class_queued[ZIO_PRIORITY_NUM_QUEUEABLE];
|
||||||
|
kstat_named_t vqks_io_active;
|
||||||
|
kstat_named_t vqks_io_class_active[ZIO_PRIORITY_NUM_QUEUEABLE];
|
||||||
|
kstat_named_t vqks_io_enqueued_total;
|
||||||
|
kstat_named_t vqks_io_class_enqueued_total[ZIO_PRIORITY_NUM_QUEUEABLE];
|
||||||
|
kstat_named_t vqks_io_dequeued_total;
|
||||||
|
kstat_named_t vqks_io_class_dequeued_total[ZIO_PRIORITY_NUM_QUEUEABLE];
|
||||||
|
kstat_named_t vqks_io_aggregated_total;
|
||||||
|
kstat_named_t vqks_io_aggregated_data_total;
|
||||||
|
kstat_named_t vqks_io_aggregated_read_gap_total;
|
||||||
|
kstat_named_t vqks_io_aggregated_write_gap_total;
|
||||||
|
kstat_named_t vqks_io_aggregated_shrunk_total;
|
||||||
|
} vdev_queue_kstats_t;
|
||||||
|
|
||||||
|
static vdev_queue_kstats_t vdev_queue_kstats_template = {
|
||||||
|
{ "io_queued", KSTAT_DATA_UINT64 },
|
||||||
|
{
|
||||||
|
{ "io_syncread_queued", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_syncwrite_queued", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_asyncread_queued", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_asyncwrite_queued", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_scrub_queued", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_removal_queued", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_initializing_queued", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_trim_queued", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_rebuild_queued", KSTAT_DATA_UINT64 },
|
||||||
|
},
|
||||||
|
{ "io_active", KSTAT_DATA_UINT64 },
|
||||||
|
{
|
||||||
|
{ "io_syncread_active", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_syncwrite_active", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_asyncread_active", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_asyncwrite_active", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_scrub_active", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_removal_active", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_initializing_active", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_trim_active", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_rebuild_active", KSTAT_DATA_UINT64 },
|
||||||
|
},
|
||||||
|
{ "io_enqueued_total", KSTAT_DATA_UINT64 },
|
||||||
|
{
|
||||||
|
{ "io_syncread_enqueued_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_syncwrite_enqueued_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_asyncread_enqueued_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_asyncwrite_enqueued_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_scrub_enqueued_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_removal_enqueued_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_initializing_enqueued_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_trim_enqueued_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_rebuild_enqueued_total", KSTAT_DATA_UINT64 },
|
||||||
|
},
|
||||||
|
{ "io_dequeued_total", KSTAT_DATA_UINT64 },
|
||||||
|
{
|
||||||
|
{ "io_syncread_dequeued_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_syncwrite_dequeued_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_asyncread_dequeued_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_asyncwrite_dequeued_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_scrub_dequeued_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_removal_dequeued_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_initializing_dequeued_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_trim_dequeued_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_rebuild_dequeued_total", KSTAT_DATA_UINT64 },
|
||||||
|
},
|
||||||
|
{ "io_aggregated_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_aggregated_data_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_aggregated_read_gap_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_aggregated_write_gap_total", KSTAT_DATA_UINT64 },
|
||||||
|
{ "io_aggregated_shrunk_total", KSTAT_DATA_UINT64 },
|
||||||
|
};
|
||||||
|
|
||||||
|
#define VQSTAT_INC(vq, stat) wmsum_add(&vq->vq_sums.vqs_##stat, 1)
|
||||||
|
#define VQSTAT_DEC(vq, stat) wmsum_add(&vq->vq_sums.vqs_##stat, -1)
|
||||||
|
|
||||||
static int
|
static int
|
||||||
vdev_queue_offset_compare(const void *x1, const void *x2)
|
vdev_queue_offset_compare(const void *x1, const void *x2)
|
||||||
{
|
{
|
||||||
|
@ -279,6 +356,10 @@ vdev_queue_class_add(vdev_queue_t *vq, zio_t *zio)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
avl_add(&vq->vq_class[p].vqc_tree, zio);
|
avl_add(&vq->vq_class[p].vqc_tree, zio);
|
||||||
|
VQSTAT_INC(vq, io_queued);
|
||||||
|
VQSTAT_INC(vq, io_class_queued[p]);
|
||||||
|
VQSTAT_INC(vq, io_enqueued_total);
|
||||||
|
VQSTAT_INC(vq, io_class_enqueued_total[p]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -297,6 +378,10 @@ vdev_queue_class_remove(vdev_queue_t *vq, zio_t *zio)
|
||||||
empty = avl_is_empty(tree);
|
empty = avl_is_empty(tree);
|
||||||
}
|
}
|
||||||
vq->vq_cqueued &= ~(empty << p);
|
vq->vq_cqueued &= ~(empty << p);
|
||||||
|
VQSTAT_DEC(vq, io_queued);
|
||||||
|
VQSTAT_DEC(vq, io_class_queued[p]);
|
||||||
|
VQSTAT_INC(vq, io_dequeued_total);
|
||||||
|
VQSTAT_INC(vq, io_class_dequeued_total[p]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint_t
|
static uint_t
|
||||||
|
@ -472,6 +557,129 @@ found:
|
||||||
return (p);
|
return (p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
vdev_queue_sums_init(vdev_queue_t *vq)
|
||||||
|
{
|
||||||
|
vdev_queue_sums_t *vqs = &vq->vq_sums;
|
||||||
|
wmsum_init(&vqs->vqs_io_queued, 0);
|
||||||
|
wmsum_init(&vqs->vqs_io_active, 0);
|
||||||
|
wmsum_init(&vqs->vqs_io_enqueued_total, 0);
|
||||||
|
wmsum_init(&vqs->vqs_io_dequeued_total, 0);
|
||||||
|
wmsum_init(&vqs->vqs_io_aggregated_total, 0);
|
||||||
|
wmsum_init(&vqs->vqs_io_aggregated_data_total, 0);
|
||||||
|
wmsum_init(&vqs->vqs_io_aggregated_read_gap_total, 0);
|
||||||
|
wmsum_init(&vqs->vqs_io_aggregated_write_gap_total, 0);
|
||||||
|
wmsum_init(&vqs->vqs_io_aggregated_shrunk_total, 0);
|
||||||
|
for (int i = 0; i < ZIO_PRIORITY_NUM_QUEUEABLE; i++) {
|
||||||
|
wmsum_init(&vqs->vqs_io_class_queued[i], 0);
|
||||||
|
wmsum_init(&vqs->vqs_io_class_active[i], 0);
|
||||||
|
wmsum_init(&vqs->vqs_io_class_enqueued_total[i], 0);
|
||||||
|
wmsum_init(&vqs->vqs_io_class_dequeued_total[i], 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
vdev_queue_sums_fini(vdev_queue_t *vq)
|
||||||
|
{
|
||||||
|
vdev_queue_sums_t *vqs = &vq->vq_sums;
|
||||||
|
wmsum_fini(&vqs->vqs_io_queued);
|
||||||
|
wmsum_fini(&vqs->vqs_io_active);
|
||||||
|
wmsum_fini(&vqs->vqs_io_enqueued_total);
|
||||||
|
wmsum_fini(&vqs->vqs_io_dequeued_total);
|
||||||
|
wmsum_fini(&vqs->vqs_io_aggregated_total);
|
||||||
|
wmsum_fini(&vqs->vqs_io_aggregated_data_total);
|
||||||
|
wmsum_fini(&vqs->vqs_io_aggregated_read_gap_total);
|
||||||
|
wmsum_fini(&vqs->vqs_io_aggregated_write_gap_total);
|
||||||
|
wmsum_fini(&vqs->vqs_io_aggregated_shrunk_total);
|
||||||
|
for (int i = 0; i < ZIO_PRIORITY_NUM_QUEUEABLE; i++) {
|
||||||
|
wmsum_fini(&vqs->vqs_io_class_queued[i]);
|
||||||
|
wmsum_fini(&vqs->vqs_io_class_active[i]);
|
||||||
|
wmsum_fini(&vqs->vqs_io_class_enqueued_total[i]);
|
||||||
|
wmsum_fini(&vqs->vqs_io_class_dequeued_total[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
vdev_queue_kstats_update(kstat_t *ksp, int rw)
|
||||||
|
{
|
||||||
|
if (rw == KSTAT_WRITE)
|
||||||
|
return (EACCES);
|
||||||
|
|
||||||
|
vdev_queue_t *vq = ksp->ks_private;
|
||||||
|
vdev_queue_kstats_t *vqks = ksp->ks_data;
|
||||||
|
vdev_queue_sums_t *vqs = &vq->vq_sums;
|
||||||
|
|
||||||
|
vqks->vqks_io_queued.value.ui64 =
|
||||||
|
wmsum_value(&vqs->vqs_io_queued);
|
||||||
|
vqks->vqks_io_active.value.ui64 =
|
||||||
|
wmsum_value(&vqs->vqs_io_active);
|
||||||
|
vqks->vqks_io_enqueued_total.value.ui64 =
|
||||||
|
wmsum_value(&vqs->vqs_io_enqueued_total);
|
||||||
|
vqks->vqks_io_dequeued_total.value.ui64 =
|
||||||
|
wmsum_value(&vqs->vqs_io_dequeued_total);
|
||||||
|
vqks->vqks_io_aggregated_total.value.ui64 =
|
||||||
|
wmsum_value(&vqs->vqs_io_aggregated_total);
|
||||||
|
vqks->vqks_io_aggregated_data_total.value.ui64 =
|
||||||
|
wmsum_value(&vqs->vqs_io_aggregated_data_total);
|
||||||
|
vqks->vqks_io_aggregated_read_gap_total.value.ui64 =
|
||||||
|
wmsum_value(&vqs->vqs_io_aggregated_read_gap_total);
|
||||||
|
vqks->vqks_io_aggregated_write_gap_total.value.ui64 =
|
||||||
|
wmsum_value(&vqs->vqs_io_aggregated_write_gap_total);
|
||||||
|
vqks->vqks_io_aggregated_shrunk_total.value.ui64 =
|
||||||
|
wmsum_value(&vqs->vqs_io_aggregated_shrunk_total);
|
||||||
|
for (int i = 0; i < ZIO_PRIORITY_NUM_QUEUEABLE; i++) {
|
||||||
|
vqks->vqks_io_class_queued[i].value.ui64 =
|
||||||
|
wmsum_value(&vqs->vqs_io_class_queued[i]);
|
||||||
|
vqks->vqks_io_class_active[i].value.ui64 =
|
||||||
|
wmsum_value(&vqs->vqs_io_class_active[i]);
|
||||||
|
vqks->vqks_io_class_enqueued_total[i].value.ui64 =
|
||||||
|
wmsum_value(&vqs->vqs_io_class_enqueued_total[i]);
|
||||||
|
vqks->vqks_io_class_dequeued_total[i].value.ui64 =
|
||||||
|
wmsum_value(&vqs->vqs_io_class_dequeued_total[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
vdev_queue_kstats_init(vdev_queue_t *vq)
|
||||||
|
{
|
||||||
|
char *module =
|
||||||
|
kmem_asprintf("zfs/%s/vdev/%llu", spa_name(vq->vq_vdev->vdev_spa),
|
||||||
|
(u_longlong_t)vq->vq_vdev->vdev_guid);
|
||||||
|
|
||||||
|
kstat_t *ksp = kstat_create(module, 0, "queue", "misc",
|
||||||
|
KSTAT_TYPE_NAMED,
|
||||||
|
sizeof (vdev_queue_kstats_t) / sizeof (kstat_named_t),
|
||||||
|
KSTAT_FLAG_VIRTUAL);
|
||||||
|
|
||||||
|
kmem_strfree(module);
|
||||||
|
|
||||||
|
if (ksp == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
ksp->ks_private = vq;
|
||||||
|
ksp->ks_update = vdev_queue_kstats_update;
|
||||||
|
ksp->ks_data = kmem_alloc(sizeof (vdev_queue_kstats_t), KM_SLEEP);
|
||||||
|
memcpy(ksp->ks_data, &vdev_queue_kstats_template,
|
||||||
|
sizeof (vdev_queue_kstats_t));
|
||||||
|
kstat_install(ksp);
|
||||||
|
|
||||||
|
vq->vq_ksp = ksp;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
vdev_queue_kstats_fini(vdev_queue_t *vq)
|
||||||
|
{
|
||||||
|
if (vq->vq_ksp == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
kmem_free(vq->vq_ksp->ks_data, sizeof (vdev_queue_kstats_t));
|
||||||
|
kstat_delete(vq->vq_ksp);
|
||||||
|
|
||||||
|
vq->vq_ksp = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
vdev_queue_init(vdev_t *vd)
|
vdev_queue_init(vdev_t *vd)
|
||||||
{
|
{
|
||||||
|
@ -502,6 +710,19 @@ vdev_queue_init(vdev_t *vd)
|
||||||
list_create(&vq->vq_active_list, sizeof (struct zio),
|
list_create(&vq->vq_active_list, sizeof (struct zio),
|
||||||
offsetof(struct zio, io_queue_node.l));
|
offsetof(struct zio, io_queue_node.l));
|
||||||
mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
|
mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||||
|
|
||||||
|
vdev_queue_sums_init(vq);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* IO for interior vdevs and distributed spares never go through the
|
||||||
|
* queue, so do not create kstat nodes for them.
|
||||||
|
* See zio_vdev_io_start().
|
||||||
|
*/
|
||||||
|
if (spa_load_state(vd->vdev_spa) != SPA_LOAD_TRYIMPORT &&
|
||||||
|
vd->vdev_ops->vdev_op_leaf &&
|
||||||
|
vd->vdev_ops != &vdev_draid_spare_ops) {
|
||||||
|
vdev_queue_kstats_init(vq);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -509,6 +730,9 @@ vdev_queue_fini(vdev_t *vd)
|
||||||
{
|
{
|
||||||
vdev_queue_t *vq = &vd->vdev_queue;
|
vdev_queue_t *vq = &vd->vdev_queue;
|
||||||
|
|
||||||
|
vdev_queue_kstats_fini(vq);
|
||||||
|
vdev_queue_sums_fini(vq);
|
||||||
|
|
||||||
for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
|
for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
|
||||||
if (vdev_queue_class_fifo(p))
|
if (vdev_queue_class_fifo(p))
|
||||||
list_destroy(&vq->vq_class[p].vqc_list);
|
list_destroy(&vq->vq_class[p].vqc_list);
|
||||||
|
@ -563,9 +787,12 @@ vdev_queue_pending_add(vdev_queue_t *vq, zio_t *zio)
|
||||||
{
|
{
|
||||||
ASSERT(MUTEX_HELD(&vq->vq_lock));
|
ASSERT(MUTEX_HELD(&vq->vq_lock));
|
||||||
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
|
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
|
||||||
vq->vq_cactive[zio->io_priority]++;
|
zio_priority_t p = zio->io_priority;
|
||||||
|
vq->vq_cactive[p]++;
|
||||||
vq->vq_active++;
|
vq->vq_active++;
|
||||||
if (vdev_queue_is_interactive(zio->io_priority)) {
|
VQSTAT_INC(vq, io_active);
|
||||||
|
VQSTAT_INC(vq, io_class_active[p]);
|
||||||
|
if (vdev_queue_is_interactive(p)) {
|
||||||
if (++vq->vq_ia_active == 1)
|
if (++vq->vq_ia_active == 1)
|
||||||
vq->vq_nia_credit = 1;
|
vq->vq_nia_credit = 1;
|
||||||
} else if (vq->vq_ia_active > 0) {
|
} else if (vq->vq_ia_active > 0) {
|
||||||
|
@ -580,9 +807,12 @@ vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio)
|
||||||
{
|
{
|
||||||
ASSERT(MUTEX_HELD(&vq->vq_lock));
|
ASSERT(MUTEX_HELD(&vq->vq_lock));
|
||||||
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
|
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
|
||||||
vq->vq_cactive[zio->io_priority]--;
|
zio_priority_t p = zio->io_priority;
|
||||||
|
vq->vq_cactive[p]--;
|
||||||
vq->vq_active--;
|
vq->vq_active--;
|
||||||
if (vdev_queue_is_interactive(zio->io_priority)) {
|
VQSTAT_DEC(vq, io_active);
|
||||||
|
VQSTAT_DEC(vq, io_class_active[p]);
|
||||||
|
if (vdev_queue_is_interactive(p)) {
|
||||||
if (--vq->vq_ia_active == 0)
|
if (--vq->vq_ia_active == 0)
|
||||||
vq->vq_nia_credit = 0;
|
vq->vq_nia_credit = 0;
|
||||||
else
|
else
|
||||||
|
@ -777,6 +1007,8 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
|
||||||
flags | ZIO_FLAG_DONT_QUEUE, vdev_queue_agg_io_done, NULL);
|
flags | ZIO_FLAG_DONT_QUEUE, vdev_queue_agg_io_done, NULL);
|
||||||
aio->io_timestamp = first->io_timestamp;
|
aio->io_timestamp = first->io_timestamp;
|
||||||
|
|
||||||
|
VQSTAT_INC(vq, io_aggregated_total);
|
||||||
|
|
||||||
nio = first;
|
nio = first;
|
||||||
next_offset = first->io_offset;
|
next_offset = first->io_offset;
|
||||||
do {
|
do {
|
||||||
|
@ -785,6 +1017,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
|
||||||
ASSERT3P(dio, !=, NULL);
|
ASSERT3P(dio, !=, NULL);
|
||||||
zio_add_child(dio, aio);
|
zio_add_child(dio, aio);
|
||||||
vdev_queue_io_remove(vq, dio);
|
vdev_queue_io_remove(vq, dio);
|
||||||
|
VQSTAT_INC(vq, io_aggregated_data_total);
|
||||||
|
|
||||||
if (dio->io_offset != next_offset) {
|
if (dio->io_offset != next_offset) {
|
||||||
/* allocate a buffer for a read gap */
|
/* allocate a buffer for a read gap */
|
||||||
|
@ -793,6 +1026,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
|
||||||
abd = abd_alloc_for_io(
|
abd = abd_alloc_for_io(
|
||||||
dio->io_offset - next_offset, B_TRUE);
|
dio->io_offset - next_offset, B_TRUE);
|
||||||
abd_gang_add(aio->io_abd, abd, B_TRUE);
|
abd_gang_add(aio->io_abd, abd, B_TRUE);
|
||||||
|
VQSTAT_INC(vq, io_aggregated_read_gap_total);
|
||||||
}
|
}
|
||||||
if (dio->io_abd &&
|
if (dio->io_abd &&
|
||||||
(dio->io_size != abd_get_size(dio->io_abd))) {
|
(dio->io_size != abd_get_size(dio->io_abd))) {
|
||||||
|
@ -800,6 +1034,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
|
||||||
ASSERT3U(abd_get_size(dio->io_abd), >, dio->io_size);
|
ASSERT3U(abd_get_size(dio->io_abd), >, dio->io_size);
|
||||||
abd = abd_get_offset_size(dio->io_abd, 0, dio->io_size);
|
abd = abd_get_offset_size(dio->io_abd, 0, dio->io_size);
|
||||||
abd_gang_add(aio->io_abd, abd, B_TRUE);
|
abd_gang_add(aio->io_abd, abd, B_TRUE);
|
||||||
|
VQSTAT_INC(vq, io_aggregated_shrunk_total);
|
||||||
} else {
|
} else {
|
||||||
if (dio->io_flags & ZIO_FLAG_NODATA) {
|
if (dio->io_flags & ZIO_FLAG_NODATA) {
|
||||||
/* allocate a buffer for a write gap */
|
/* allocate a buffer for a write gap */
|
||||||
|
@ -807,6 +1042,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
|
||||||
ASSERT3P(dio->io_abd, ==, NULL);
|
ASSERT3P(dio->io_abd, ==, NULL);
|
||||||
abd_gang_add(aio->io_abd,
|
abd_gang_add(aio->io_abd,
|
||||||
abd_get_zeros(dio->io_size), B_TRUE);
|
abd_get_zeros(dio->io_size), B_TRUE);
|
||||||
|
VQSTAT_INC(vq, io_aggregated_write_gap_total);
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* We pass B_FALSE to abd_gang_add()
|
* We pass B_FALSE to abd_gang_add()
|
||||||
|
|
Loading…
Reference in New Issue