2010-05-17 22:18:00 +00:00
|
|
|
/*****************************************************************************\
|
|
|
|
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
|
|
|
|
* Copyright (C) 2007 The Regents of the University of California.
|
|
|
|
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
|
|
|
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
|
2008-05-26 04:38:26 +00:00
|
|
|
* UCRL-CODE-235197
|
|
|
|
*
|
2010-05-17 22:18:00 +00:00
|
|
|
* This file is part of the SPL, Solaris Porting Layer.
|
2013-03-05 01:26:55 +00:00
|
|
|
* For details, see <http://zfsonlinux.org/>.
|
2010-05-17 22:18:00 +00:00
|
|
|
*
|
|
|
|
* The SPL is free software; you can redistribute it and/or modify it
|
|
|
|
* under the terms of the GNU General Public License as published by the
|
|
|
|
* Free Software Foundation; either version 2 of the License, or (at your
|
|
|
|
* option) any later version.
|
2008-05-26 04:38:26 +00:00
|
|
|
*
|
2010-05-17 22:18:00 +00:00
|
|
|
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
2008-05-26 04:38:26 +00:00
|
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
|
|
* for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
2010-05-17 22:18:00 +00:00
|
|
|
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*****************************************************************************
|
|
|
|
* Solaris Porting Layer (SPL) Proc Implementation.
|
|
|
|
\*****************************************************************************/
|
2008-05-26 04:38:26 +00:00
|
|
|
|
2010-06-11 21:37:46 +00:00
|
|
|
#include <sys/systeminfo.h>
|
|
|
|
#include <sys/kstat.h>
|
2014-12-08 18:04:42 +00:00
|
|
|
#include <sys/kmem.h>
|
|
|
|
#include <sys/kmem_cache.h>
|
|
|
|
#include <sys/vmem.h>
|
2015-10-19 12:47:52 +00:00
|
|
|
#include <sys/taskq.h>
|
2014-12-08 18:04:42 +00:00
|
|
|
#include <linux/ctype.h>
|
2010-06-11 21:37:46 +00:00
|
|
|
#include <linux/kmod.h>
|
|
|
|
#include <linux/seq_file.h>
|
|
|
|
#include <linux/proc_compat.h>
|
2014-12-08 18:04:42 +00:00
|
|
|
#include <linux/uaccess.h>
|
2013-03-27 15:33:14 +00:00
|
|
|
#include <linux/version.h>
|
2008-04-18 23:39:58 +00:00
|
|
|
|
2013-03-27 15:33:14 +00:00
|
|
|
#if defined(CONSTIFY_PLUGIN) && LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)
|
|
|
|
typedef struct ctl_table __no_const spl_ctl_table;
|
|
|
|
#else
|
|
|
|
typedef struct ctl_table spl_ctl_table;
|
|
|
|
#endif
|
|
|
|
|
2008-04-18 23:39:58 +00:00
|
|
|
static unsigned long table_min = 0;
|
|
|
|
static unsigned long table_max = ~0;
|
2008-05-07 17:58:22 +00:00
|
|
|
|
|
|
|
static struct ctl_table_header *spl_header = NULL;
|
2008-06-04 06:00:46 +00:00
|
|
|
static struct proc_dir_entry *proc_spl = NULL;
|
|
|
|
static struct proc_dir_entry *proc_spl_kmem = NULL;
|
2008-06-27 21:40:11 +00:00
|
|
|
static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
|
2015-10-19 12:47:52 +00:00
|
|
|
static struct proc_dir_entry *proc_spl_taskq_all = NULL;
|
|
|
|
static struct proc_dir_entry *proc_spl_taskq = NULL;
|
2008-06-04 06:00:46 +00:00
|
|
|
struct proc_dir_entry *proc_spl_kstat = NULL;
|
2008-04-18 23:39:58 +00:00
|
|
|
|
|
|
|
static int
|
|
|
|
proc_copyin_string(char *kbuffer, int kbuffer_size,
|
|
|
|
const char *ubuffer, int ubuffer_size)
|
|
|
|
{
|
|
|
|
int size;
|
|
|
|
|
|
|
|
if (ubuffer_size > kbuffer_size)
|
|
|
|
return -EOVERFLOW;
|
|
|
|
|
|
|
|
if (copy_from_user((void *)kbuffer, (void *)ubuffer, ubuffer_size))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
/* strip trailing whitespace */
|
|
|
|
size = strnlen(kbuffer, ubuffer_size);
|
|
|
|
while (size-- >= 0)
|
|
|
|
if (!isspace(kbuffer[size]))
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* empty string */
|
|
|
|
if (size < 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* no space to terminate */
|
|
|
|
if (size == kbuffer_size)
|
|
|
|
return -EOVERFLOW;
|
|
|
|
|
|
|
|
kbuffer[size + 1] = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
proc_copyout_string(char *ubuffer, int ubuffer_size,
|
|
|
|
const char *kbuffer, char *append)
|
|
|
|
{
|
|
|
|
/* NB if 'append' != NULL, it's a single character to append to the
|
|
|
|
* copied out string - usually "\n", for /proc entries and
|
|
|
|
* (i.e. a terminating zero byte) for sysctl entries
|
|
|
|
*/
|
|
|
|
int size = MIN(strlen(kbuffer), ubuffer_size);
|
|
|
|
|
|
|
|
if (copy_to_user(ubuffer, kbuffer, size))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
if (append != NULL && size < ubuffer_size) {
|
|
|
|
if (copy_to_user(ubuffer + size, append, 1))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
size++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
2008-05-09 22:53:20 +00:00
|
|
|
#ifdef DEBUG_KMEM
|
2014-10-02 18:15:19 +00:00
|
|
|
static int
|
|
|
|
proc_domemused(struct ctl_table *table, int write,
|
|
|
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
2008-04-18 23:39:58 +00:00
|
|
|
{
|
|
|
|
int rc = 0;
|
|
|
|
unsigned long min = 0, max = ~0, val;
|
2013-03-27 15:33:14 +00:00
|
|
|
spl_ctl_table dummy = *table;
|
2008-04-18 23:39:58 +00:00
|
|
|
|
|
|
|
dummy.data = &val;
|
|
|
|
dummy.proc_handler = &proc_dointvec;
|
|
|
|
dummy.extra1 = &min;
|
|
|
|
dummy.extra2 = &max;
|
|
|
|
|
|
|
|
if (write) {
|
|
|
|
*ppos += *lenp;
|
|
|
|
} else {
|
2009-12-04 23:54:12 +00:00
|
|
|
# ifdef HAVE_ATOMIC64_T
|
2008-11-03 20:34:17 +00:00
|
|
|
val = atomic64_read((atomic64_t *)table->data);
|
2009-12-04 23:54:12 +00:00
|
|
|
# else
|
|
|
|
val = atomic_read((atomic_t *)table->data);
|
|
|
|
# endif /* HAVE_ATOMIC64_T */
|
2014-10-02 18:15:19 +00:00
|
|
|
rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
|
2008-04-18 23:39:58 +00:00
|
|
|
}
|
|
|
|
|
2014-11-05 22:30:35 +00:00
|
|
|
return (rc);
|
2008-04-18 23:39:58 +00:00
|
|
|
}
|
2015-07-20 19:18:56 +00:00
|
|
|
#endif /* DEBUG_KMEM */
|
2011-03-26 07:03:32 +00:00
|
|
|
|
2014-10-02 18:15:19 +00:00
|
|
|
static int
|
|
|
|
proc_doslab(struct ctl_table *table, int write,
|
|
|
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
2011-03-26 07:03:32 +00:00
|
|
|
{
|
|
|
|
int rc = 0;
|
|
|
|
unsigned long min = 0, max = ~0, val = 0, mask;
|
2013-03-27 15:33:14 +00:00
|
|
|
spl_ctl_table dummy = *table;
|
2011-03-26 07:03:32 +00:00
|
|
|
spl_kmem_cache_t *skc;
|
|
|
|
|
|
|
|
dummy.data = &val;
|
|
|
|
dummy.proc_handler = &proc_dointvec;
|
|
|
|
dummy.extra1 = &min;
|
|
|
|
dummy.extra2 = &max;
|
|
|
|
|
|
|
|
if (write) {
|
|
|
|
*ppos += *lenp;
|
|
|
|
} else {
|
|
|
|
down_read(&spl_kmem_cache_sem);
|
|
|
|
mask = (unsigned long)table->data;
|
|
|
|
|
|
|
|
list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
|
|
|
|
|
|
|
|
/* Only use slabs of the correct kmem/vmem type */
|
|
|
|
if (!(skc->skc_flags & mask))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Sum the specified field for selected slabs */
|
|
|
|
switch (mask & (KMC_TOTAL | KMC_ALLOC | KMC_MAX)) {
|
|
|
|
case KMC_TOTAL:
|
|
|
|
val += skc->skc_slab_size * skc->skc_slab_total;
|
|
|
|
break;
|
|
|
|
case KMC_ALLOC:
|
|
|
|
val += skc->skc_obj_size * skc->skc_obj_alloc;
|
|
|
|
break;
|
|
|
|
case KMC_MAX:
|
|
|
|
val += skc->skc_obj_size * skc->skc_obj_max;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
up_read(&spl_kmem_cache_sem);
|
2014-10-02 18:15:19 +00:00
|
|
|
rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
|
2011-03-26 07:03:32 +00:00
|
|
|
}
|
|
|
|
|
2014-11-05 22:30:35 +00:00
|
|
|
return (rc);
|
2011-03-26 07:03:32 +00:00
|
|
|
}
|
2008-04-18 23:39:58 +00:00
|
|
|
|
2014-10-02 18:15:19 +00:00
|
|
|
static int
|
|
|
|
proc_dohostid(struct ctl_table *table, int write,
|
|
|
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
2008-04-18 23:39:58 +00:00
|
|
|
{
|
|
|
|
int len, rc = 0;
|
|
|
|
char *end, str[32];
|
|
|
|
|
|
|
|
if (write) {
|
2014-10-02 18:15:19 +00:00
|
|
|
/* We can't use proc_doulongvec_minmax() in the write
|
2011-04-25 01:48:56 +00:00
|
|
|
* case here because hostid while a hex value has no
|
2009-03-12 21:23:34 +00:00
|
|
|
* leading 0x which confuses the helper function. */
|
2008-04-18 23:39:58 +00:00
|
|
|
rc = proc_copyin_string(str, sizeof(str), buffer, *lenp);
|
|
|
|
if (rc < 0)
|
2014-11-05 22:30:35 +00:00
|
|
|
return (rc);
|
2008-04-18 23:39:58 +00:00
|
|
|
|
2011-04-11 19:49:50 +00:00
|
|
|
spl_hostid = simple_strtoul(str, &end, 16);
|
2009-03-12 21:23:34 +00:00
|
|
|
if (str == end)
|
2014-11-05 22:30:35 +00:00
|
|
|
return (-EINVAL);
|
2008-04-18 23:39:58 +00:00
|
|
|
|
|
|
|
} else {
|
2011-04-25 01:48:56 +00:00
|
|
|
len = snprintf(str, sizeof(str), "%lx", spl_hostid);
|
2008-04-18 23:39:58 +00:00
|
|
|
if (*ppos >= len)
|
|
|
|
rc = 0;
|
|
|
|
else
|
2010-03-04 20:14:56 +00:00
|
|
|
rc = proc_copyout_string(buffer,*lenp,str+*ppos,"\n");
|
2008-04-18 23:39:58 +00:00
|
|
|
|
|
|
|
if (rc >= 0) {
|
|
|
|
*lenp = rc;
|
|
|
|
*ppos += rc;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-05 22:30:35 +00:00
|
|
|
return (rc);
|
2008-04-18 23:39:58 +00:00
|
|
|
}
|
|
|
|
|
2015-10-19 12:47:52 +00:00
|
|
|
static void
|
|
|
|
taskq_seq_show_headers(struct seq_file *f)
|
|
|
|
{
|
|
|
|
seq_printf(f, "%-25s %5s %5s %5s %5s %5s %5s %12s %5s %10s\n",
|
|
|
|
"taskq", "act", "nthr", "spwn", "maxt", "pri",
|
|
|
|
"mina", "maxa", "cura", "flags");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* indices into the lheads array below */
|
|
|
|
#define LHEAD_PEND 0
|
|
|
|
#define LHEAD_PRIO 1
|
|
|
|
#define LHEAD_DELAY 2
|
|
|
|
#define LHEAD_WAIT 3
|
|
|
|
#define LHEAD_ACTIVE 4
|
|
|
|
#define LHEAD_SIZE 5
|
|
|
|
|
|
|
|
static int
|
|
|
|
taskq_seq_show_impl(struct seq_file *f, void *p, boolean_t allflag)
|
|
|
|
{
|
|
|
|
taskq_t *tq = p;
|
|
|
|
taskq_thread_t *tqt;
|
|
|
|
wait_queue_t *wq;
|
|
|
|
struct task_struct *tsk;
|
|
|
|
taskq_ent_t *tqe;
|
|
|
|
char name[100];
|
|
|
|
struct list_head *lheads[LHEAD_SIZE], *lh;
|
|
|
|
static char *list_names[LHEAD_SIZE] =
|
|
|
|
{"pend", "prio", "delay", "wait", "active" };
|
|
|
|
int i, j, have_lheads = 0;
|
|
|
|
unsigned long wflags, flags;
|
|
|
|
|
|
|
|
spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
|
|
|
|
spin_lock_irqsave(&tq->tq_wait_waitq.lock, wflags);
|
|
|
|
|
|
|
|
/* get the various lists and check whether they're empty */
|
|
|
|
lheads[LHEAD_PEND] = &tq->tq_pend_list;
|
|
|
|
lheads[LHEAD_PRIO] = &tq->tq_prio_list;
|
|
|
|
lheads[LHEAD_DELAY] = &tq->tq_delay_list;
|
|
|
|
lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.task_list;
|
|
|
|
lheads[LHEAD_ACTIVE] = &tq->tq_active_list;
|
|
|
|
|
|
|
|
for (i = 0; i < LHEAD_SIZE; ++i) {
|
|
|
|
if (list_empty(lheads[i]))
|
|
|
|
lheads[i] = NULL;
|
|
|
|
else
|
|
|
|
++have_lheads;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* early return in non-"all" mode if lists are all empty */
|
|
|
|
if (!allflag && !have_lheads) {
|
|
|
|
spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
|
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* unlock the waitq quickly */
|
|
|
|
if (!lheads[LHEAD_WAIT])
|
|
|
|
spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
|
|
|
|
|
|
|
|
/* show the base taskq contents */
|
|
|
|
snprintf(name, sizeof(name), "%s/%d", tq->tq_name, tq->tq_instance);
|
|
|
|
seq_printf(f, "%-25s ", name);
|
|
|
|
seq_printf(f, "%5d %5d %5d %5d %5d %5d %12d %5d %10x\n",
|
|
|
|
tq->tq_nactive, tq->tq_nthreads, tq->tq_nspawn,
|
|
|
|
tq->tq_maxthreads, tq->tq_pri, tq->tq_minalloc, tq->tq_maxalloc,
|
|
|
|
tq->tq_nalloc, tq->tq_flags);
|
|
|
|
|
|
|
|
/* show the active list */
|
|
|
|
if (lheads[LHEAD_ACTIVE]) {
|
|
|
|
j = 0;
|
|
|
|
list_for_each_entry(tqt, &tq->tq_active_list, tqt_active_list) {
|
|
|
|
if (j == 0)
|
|
|
|
seq_printf(f, "\t%s:", list_names[LHEAD_ACTIVE]);
|
|
|
|
else if (j == 2) {
|
|
|
|
seq_printf(f, "\n\t ");
|
|
|
|
j = 0;
|
|
|
|
}
|
|
|
|
seq_printf(f, " [%d]%pf(%ps)",
|
|
|
|
tqt->tqt_thread->pid,
|
|
|
|
tqt->tqt_task->tqent_func,
|
|
|
|
tqt->tqt_task->tqent_arg);
|
|
|
|
++j;
|
|
|
|
}
|
|
|
|
seq_printf(f, "\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = LHEAD_PEND; i <= LHEAD_WAIT; ++i)
|
|
|
|
if (lheads[i]) {
|
|
|
|
j = 0;
|
|
|
|
list_for_each(lh, lheads[i]) {
|
|
|
|
/* show the wait waitq list */
|
|
|
|
if (i == LHEAD_WAIT) {
|
|
|
|
wq = list_entry(lh, wait_queue_t, task_list);
|
|
|
|
if (j == 0)
|
|
|
|
seq_printf(f, "\t%s:",
|
|
|
|
list_names[i]);
|
|
|
|
else if (j == 12) {
|
|
|
|
seq_printf(f, "\n\t ");
|
|
|
|
j = 0;
|
|
|
|
}
|
|
|
|
tsk = wq->private;
|
|
|
|
seq_printf(f, " %d", tsk->pid);
|
|
|
|
/* pend, prio and delay lists */
|
|
|
|
} else {
|
|
|
|
tqe = list_entry(lh, taskq_ent_t,
|
|
|
|
tqent_list);
|
|
|
|
if (j == 0)
|
|
|
|
seq_printf(f, "\t%s:",
|
|
|
|
list_names[i]);
|
|
|
|
else if (j == 2) {
|
|
|
|
seq_printf(f, "\n\t ");
|
|
|
|
j = 0;
|
|
|
|
}
|
|
|
|
seq_printf(f, " %pf(%ps)",
|
|
|
|
tqe->tqent_func,
|
|
|
|
tqe->tqent_arg);
|
|
|
|
}
|
|
|
|
++j;
|
|
|
|
}
|
|
|
|
seq_printf(f, "\n");
|
|
|
|
}
|
|
|
|
if (lheads[LHEAD_WAIT])
|
|
|
|
spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
|
|
|
|
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
taskq_all_seq_show(struct seq_file *f, void *p)
|
|
|
|
{
|
|
|
|
return (taskq_seq_show_impl(f, p, B_TRUE));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
taskq_seq_show(struct seq_file *f, void *p)
|
|
|
|
{
|
|
|
|
return (taskq_seq_show_impl(f, p, B_FALSE));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *
|
|
|
|
taskq_seq_start(struct seq_file *f, loff_t *pos)
|
|
|
|
{
|
|
|
|
struct list_head *p;
|
|
|
|
loff_t n = *pos;
|
|
|
|
|
|
|
|
down_read(&tq_list_sem);
|
|
|
|
if (!n)
|
|
|
|
taskq_seq_show_headers(f);
|
|
|
|
|
|
|
|
p = tq_list.next;
|
|
|
|
while (n--) {
|
|
|
|
p = p->next;
|
|
|
|
if (p == &tq_list)
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (list_entry(p, taskq_t, tq_taskqs));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *
|
|
|
|
taskq_seq_next(struct seq_file *f, void *p, loff_t *pos)
|
|
|
|
{
|
|
|
|
taskq_t *tq = p;
|
|
|
|
|
|
|
|
++*pos;
|
|
|
|
return ((tq->tq_taskqs.next == &tq_list) ?
|
|
|
|
NULL : list_entry(tq->tq_taskqs.next, taskq_t, tq_taskqs));
|
|
|
|
}
|
|
|
|
|
2008-06-27 21:40:11 +00:00
|
|
|
static void
|
|
|
|
slab_seq_show_headers(struct seq_file *f)
|
|
|
|
{
|
2011-03-26 04:50:40 +00:00
|
|
|
seq_printf(f,
|
|
|
|
"--------------------- cache ----------"
|
|
|
|
"--------------------------------------------- "
|
|
|
|
"----- slab ------ "
|
2012-10-29 23:51:59 +00:00
|
|
|
"---- object ----- "
|
|
|
|
"--- emergency ---\n");
|
2011-03-26 04:50:40 +00:00
|
|
|
seq_printf(f,
|
|
|
|
"name "
|
|
|
|
" flags size alloc slabsize objsize "
|
|
|
|
"total alloc max "
|
2012-10-29 23:51:59 +00:00
|
|
|
"total alloc max "
|
|
|
|
"dlock alloc max\n");
|
2008-06-27 21:40:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
slab_seq_show(struct seq_file *f, void *p)
|
|
|
|
{
|
2009-12-11 19:20:08 +00:00
|
|
|
spl_kmem_cache_t *skc = p;
|
2008-06-27 21:40:11 +00:00
|
|
|
|
2009-12-11 19:20:08 +00:00
|
|
|
ASSERT(skc->skc_magic == SKC_MAGIC);
|
2008-06-27 21:40:11 +00:00
|
|
|
|
2013-12-08 22:01:45 +00:00
|
|
|
/*
|
|
|
|
* Backed by Linux slab see /proc/slabinfo.
|
|
|
|
*/
|
|
|
|
if (skc->skc_flags & KMC_SLAB)
|
|
|
|
return (0);
|
|
|
|
|
2009-12-11 19:20:08 +00:00
|
|
|
spin_lock(&skc->skc_lock);
|
2011-03-26 04:50:40 +00:00
|
|
|
seq_printf(f, "%-36s ", skc->skc_name);
|
|
|
|
seq_printf(f, "0x%05lx %9lu %9lu %8u %8u "
|
2012-10-29 23:51:59 +00:00
|
|
|
"%5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu\n",
|
2011-03-26 04:50:40 +00:00
|
|
|
(long unsigned)skc->skc_flags,
|
|
|
|
(long unsigned)(skc->skc_slab_size * skc->skc_slab_total),
|
|
|
|
(long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
|
|
|
|
(unsigned)skc->skc_slab_size,
|
|
|
|
(unsigned)skc->skc_obj_size,
|
|
|
|
(long unsigned)skc->skc_slab_total,
|
|
|
|
(long unsigned)skc->skc_slab_alloc,
|
|
|
|
(long unsigned)skc->skc_slab_max,
|
|
|
|
(long unsigned)skc->skc_obj_total,
|
|
|
|
(long unsigned)skc->skc_obj_alloc,
|
Emergency slab objects
This patch is designed to resolve a deadlock which can occur with
__vmalloc() based slabs. The issue is that the Linux kernel does
not honor the flags passed to __vmalloc(). This makes it unsafe
to use in a writeback context. Unfortunately, this is a use case
ZFS depends on for correct operation.
Fixing this issue in the upstream kernel was pursued and patches
are available which resolve the issue.
https://bugs.gentoo.org/show_bug.cgi?id=416685
However, these changes were rejected because upstream felt that
using __vmalloc() in the context of writeback should never be done.
Their solution was for us to rewrite parts of ZFS to accomidate
the Linux VM.
While that is probably the right long term solution, and it is
something we want to pursue, it is not a trivial task and will
likely destabilize the existing code. This work has been planned
for the 0.7.0 release but in the meanwhile we want to improve the
SPL slab implementation to accomidate this expected ZFS usage.
This is accomplished by performing the __vmalloc() asynchronously
in the context of a work queue. This doesn't prevent the posibility
of the worker thread from deadlocking. However, the caller can now
safely block on a wait queue for the slab allocation to complete.
Normally this will occur in a reasonable amount of time and the
caller will be woken up when the new slab is available,. The objects
will then get cached in the per-cpu magazines and everything will
proceed as usual.
However, if the __vmalloc() deadlocks for the reasons described
above, or is just very slow, then the callers on the wait queues
will timeout out. When this rare situation occurs they will attempt
to kmalloc() a single minimally sized object using the GFP_NOIO flags.
This allocation will not deadlock because kmalloc() will honor the
passed flags and the caller will be able to make forward progress.
As long as forward progress can be maintained then even if the
worker thread is deadlocked the critical thread will make progress.
This will eventually allow the deadlocked worker thread to complete
and normal operation will resume.
These emergency allocations will likely be slow since they require
contiguous pages. However, their use should be rare so the impact
is expected to be minimal. If that turns out not to be the case in
practice further optimizations are possible.
One additional concern is if these emergency objects are long lived.
Right now they are simply tracked on a list which must be walked when
an object is freed. Is they accumulate on a system and the list
grows freeing objects will become more expensive. This could be
handled relatively easily by using a hash instead of a list, but that
optimization (if needed) is left for a follow up patch.
Additionally, these emeregency objects could be repacked in to existing
slabs as objects are freed if the kmem_cache_set_move() functionality
was implemented. See issue https://github.com/zfsonlinux/spl/issues/26
for full details. This work would also help reduce ZFS's memory
fragmentation problems.
The /proc/spl/kmem/slab file has had two new columns added at the
end. The 'emerg' column reports the current number of these emergency
objects in use for the cache, and the following 'max' column shows
the historical worst case. These value should give us a good idea
of how often these objects are needed. Based on these values under
real use cases we can tune the default behavior.
Lastly, as a side benefit using a single work queue for the slab
allocations should reduce cpu contention on the global virtual address
space lock. This should manifest itself as reduced cpu usage for
the system.
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2012-08-07 23:59:50 +00:00
|
|
|
(long unsigned)skc->skc_obj_max,
|
2012-10-29 23:51:59 +00:00
|
|
|
(long unsigned)skc->skc_obj_deadlock,
|
Emergency slab objects
This patch is designed to resolve a deadlock which can occur with
__vmalloc() based slabs. The issue is that the Linux kernel does
not honor the flags passed to __vmalloc(). This makes it unsafe
to use in a writeback context. Unfortunately, this is a use case
ZFS depends on for correct operation.
Fixing this issue in the upstream kernel was pursued and patches
are available which resolve the issue.
https://bugs.gentoo.org/show_bug.cgi?id=416685
However, these changes were rejected because upstream felt that
using __vmalloc() in the context of writeback should never be done.
Their solution was for us to rewrite parts of ZFS to accomidate
the Linux VM.
While that is probably the right long term solution, and it is
something we want to pursue, it is not a trivial task and will
likely destabilize the existing code. This work has been planned
for the 0.7.0 release but in the meanwhile we want to improve the
SPL slab implementation to accomidate this expected ZFS usage.
This is accomplished by performing the __vmalloc() asynchronously
in the context of a work queue. This doesn't prevent the posibility
of the worker thread from deadlocking. However, the caller can now
safely block on a wait queue for the slab allocation to complete.
Normally this will occur in a reasonable amount of time and the
caller will be woken up when the new slab is available,. The objects
will then get cached in the per-cpu magazines and everything will
proceed as usual.
However, if the __vmalloc() deadlocks for the reasons described
above, or is just very slow, then the callers on the wait queues
will timeout out. When this rare situation occurs they will attempt
to kmalloc() a single minimally sized object using the GFP_NOIO flags.
This allocation will not deadlock because kmalloc() will honor the
passed flags and the caller will be able to make forward progress.
As long as forward progress can be maintained then even if the
worker thread is deadlocked the critical thread will make progress.
This will eventually allow the deadlocked worker thread to complete
and normal operation will resume.
These emergency allocations will likely be slow since they require
contiguous pages. However, their use should be rare so the impact
is expected to be minimal. If that turns out not to be the case in
practice further optimizations are possible.
One additional concern is if these emergency objects are long lived.
Right now they are simply tracked on a list which must be walked when
an object is freed. Is they accumulate on a system and the list
grows freeing objects will become more expensive. This could be
handled relatively easily by using a hash instead of a list, but that
optimization (if needed) is left for a follow up patch.
Additionally, these emeregency objects could be repacked in to existing
slabs as objects are freed if the kmem_cache_set_move() functionality
was implemented. See issue https://github.com/zfsonlinux/spl/issues/26
for full details. This work would also help reduce ZFS's memory
fragmentation problems.
The /proc/spl/kmem/slab file has had two new columns added at the
end. The 'emerg' column reports the current number of these emergency
objects in use for the cache, and the following 'max' column shows
the historical worst case. These value should give us a good idea
of how often these objects are needed. Based on these values under
real use cases we can tune the default behavior.
Lastly, as a side benefit using a single work queue for the slab
allocations should reduce cpu contention on the global virtual address
space lock. This should manifest itself as reduced cpu usage for
the system.
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2012-08-07 23:59:50 +00:00
|
|
|
(long unsigned)skc->skc_obj_emergency,
|
|
|
|
(long unsigned)skc->skc_obj_emergency_max);
|
2009-12-11 19:20:08 +00:00
|
|
|
|
|
|
|
spin_unlock(&skc->skc_lock);
|
2008-06-27 21:40:11 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *
|
|
|
|
slab_seq_start(struct seq_file *f, loff_t *pos)
|
|
|
|
{
|
|
|
|
struct list_head *p;
|
|
|
|
loff_t n = *pos;
|
|
|
|
|
|
|
|
down_read(&spl_kmem_cache_sem);
|
|
|
|
if (!n)
|
|
|
|
slab_seq_show_headers(f);
|
|
|
|
|
|
|
|
p = spl_kmem_cache_list.next;
|
|
|
|
while (n--) {
|
|
|
|
p = p->next;
|
|
|
|
if (p == &spl_kmem_cache_list)
|
2014-11-05 22:30:35 +00:00
|
|
|
return (NULL);
|
2008-06-27 21:40:11 +00:00
|
|
|
}
|
|
|
|
|
2014-11-05 22:30:35 +00:00
|
|
|
return (list_entry(p, spl_kmem_cache_t, skc_list));
|
2008-06-27 21:40:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void *
|
|
|
|
slab_seq_next(struct seq_file *f, void *p, loff_t *pos)
|
|
|
|
{
|
|
|
|
spl_kmem_cache_t *skc = p;
|
|
|
|
|
|
|
|
++*pos;
|
2014-11-05 22:30:35 +00:00
|
|
|
return ((skc->skc_list.next == &spl_kmem_cache_list) ?
|
2010-03-04 20:14:56 +00:00
|
|
|
NULL : list_entry(skc->skc_list.next,spl_kmem_cache_t,skc_list));
|
2008-06-27 21:40:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
slab_seq_stop(struct seq_file *f, void *v)
|
|
|
|
{
|
|
|
|
up_read(&spl_kmem_cache_sem);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct seq_operations slab_seq_ops = {
|
|
|
|
.show = slab_seq_show,
|
|
|
|
.start = slab_seq_start,
|
|
|
|
.next = slab_seq_next,
|
|
|
|
.stop = slab_seq_stop,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int
|
|
|
|
proc_slab_open(struct inode *inode, struct file *filp)
|
|
|
|
{
|
|
|
|
return seq_open(filp, &slab_seq_ops);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct file_operations proc_slab_operations = {
|
|
|
|
.open = proc_slab_open,
|
|
|
|
.read = seq_read,
|
|
|
|
.llseek = seq_lseek,
|
|
|
|
.release = seq_release,
|
|
|
|
};
|
|
|
|
|
2015-10-19 12:47:52 +00:00
|
|
|
static void
|
|
|
|
taskq_seq_stop(struct seq_file *f, void *v)
|
|
|
|
{
|
|
|
|
up_read(&tq_list_sem);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct seq_operations taskq_all_seq_ops = {
|
|
|
|
.show = taskq_all_seq_show,
|
|
|
|
.start = taskq_seq_start,
|
|
|
|
.next = taskq_seq_next,
|
|
|
|
.stop = taskq_seq_stop,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct seq_operations taskq_seq_ops = {
|
|
|
|
.show = taskq_seq_show,
|
|
|
|
.start = taskq_seq_start,
|
|
|
|
.next = taskq_seq_next,
|
|
|
|
.stop = taskq_seq_stop,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int
|
|
|
|
proc_taskq_all_open(struct inode *inode, struct file *filp)
|
|
|
|
{
|
|
|
|
return seq_open(filp, &taskq_all_seq_ops);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
proc_taskq_open(struct inode *inode, struct file *filp)
|
|
|
|
{
|
|
|
|
return seq_open(filp, &taskq_seq_ops);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct file_operations proc_taskq_all_operations = {
|
|
|
|
.open = proc_taskq_all_open,
|
|
|
|
.read = seq_read,
|
|
|
|
.llseek = seq_lseek,
|
|
|
|
.release = seq_release,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct file_operations proc_taskq_operations = {
|
|
|
|
.open = proc_taskq_open,
|
|
|
|
.read = seq_read,
|
|
|
|
.llseek = seq_lseek,
|
|
|
|
.release = seq_release,
|
|
|
|
};
|
|
|
|
|
2008-05-05 20:18:49 +00:00
|
|
|
static struct ctl_table spl_kmem_table[] = {
|
2015-07-20 19:18:56 +00:00
|
|
|
#ifdef DEBUG_KMEM
|
2008-04-18 23:39:58 +00:00
|
|
|
{
|
|
|
|
.procname = "kmem_used",
|
|
|
|
.data = &kmem_alloc_used,
|
2009-12-04 23:54:12 +00:00
|
|
|
# ifdef HAVE_ATOMIC64_T
|
2008-04-18 23:39:58 +00:00
|
|
|
.maxlen = sizeof(atomic64_t),
|
2009-12-04 23:54:12 +00:00
|
|
|
# else
|
|
|
|
.maxlen = sizeof(atomic_t),
|
|
|
|
# endif /* HAVE_ATOMIC64_T */
|
2008-04-18 23:39:58 +00:00
|
|
|
.mode = 0444,
|
2009-12-04 23:54:12 +00:00
|
|
|
.proc_handler = &proc_domemused,
|
2008-04-18 23:39:58 +00:00
|
|
|
},
|
|
|
|
{
|
|
|
|
.procname = "kmem_max",
|
|
|
|
.data = &kmem_alloc_max,
|
|
|
|
.maxlen = sizeof(unsigned long),
|
|
|
|
.extra1 = &table_min,
|
|
|
|
.extra2 = &table_max,
|
|
|
|
.mode = 0444,
|
|
|
|
.proc_handler = &proc_doulongvec_minmax,
|
|
|
|
},
|
2015-07-20 19:18:56 +00:00
|
|
|
#endif /* DEBUG_KMEM */
|
2011-03-26 07:03:32 +00:00
|
|
|
{
|
|
|
|
.procname = "slab_kmem_total",
|
|
|
|
.data = (void *)(KMC_KMEM | KMC_TOTAL),
|
|
|
|
.maxlen = sizeof(unsigned long),
|
|
|
|
.extra1 = &table_min,
|
|
|
|
.extra2 = &table_max,
|
|
|
|
.mode = 0444,
|
|
|
|
.proc_handler = &proc_doslab,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.procname = "slab_kmem_alloc",
|
|
|
|
.data = (void *)(KMC_KMEM | KMC_ALLOC),
|
|
|
|
.maxlen = sizeof(unsigned long),
|
|
|
|
.extra1 = &table_min,
|
|
|
|
.extra2 = &table_max,
|
|
|
|
.mode = 0444,
|
|
|
|
.proc_handler = &proc_doslab,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.procname = "slab_kmem_max",
|
|
|
|
.data = (void *)(KMC_KMEM | KMC_MAX),
|
|
|
|
.maxlen = sizeof(unsigned long),
|
|
|
|
.extra1 = &table_min,
|
|
|
|
.extra2 = &table_max,
|
|
|
|
.mode = 0444,
|
|
|
|
.proc_handler = &proc_doslab,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.procname = "slab_vmem_total",
|
|
|
|
.data = (void *)(KMC_VMEM | KMC_TOTAL),
|
|
|
|
.maxlen = sizeof(unsigned long),
|
|
|
|
.extra1 = &table_min,
|
|
|
|
.extra2 = &table_max,
|
|
|
|
.mode = 0444,
|
|
|
|
.proc_handler = &proc_doslab,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.procname = "slab_vmem_alloc",
|
|
|
|
.data = (void *)(KMC_VMEM | KMC_ALLOC),
|
|
|
|
.maxlen = sizeof(unsigned long),
|
|
|
|
.extra1 = &table_min,
|
|
|
|
.extra2 = &table_max,
|
|
|
|
.mode = 0444,
|
|
|
|
.proc_handler = &proc_doslab,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.procname = "slab_vmem_max",
|
|
|
|
.data = (void *)(KMC_VMEM | KMC_MAX),
|
|
|
|
.maxlen = sizeof(unsigned long),
|
|
|
|
.extra1 = &table_min,
|
|
|
|
.extra2 = &table_max,
|
|
|
|
.mode = 0444,
|
|
|
|
.proc_handler = &proc_doslab,
|
|
|
|
},
|
2008-05-05 20:18:49 +00:00
|
|
|
{0},
|
|
|
|
};
|
2008-05-08 23:21:47 +00:00
|
|
|
|
|
|
|
static struct ctl_table spl_kstat_table[] = {
|
|
|
|
{0},
|
|
|
|
};
|
2008-05-05 20:18:49 +00:00
|
|
|
|
|
|
|
static struct ctl_table spl_table[] = {
|
|
|
|
/* NB No .strategy entries have been provided since
|
|
|
|
* sysctl(8) prefers to go via /proc for portability.
|
|
|
|
*/
|
|
|
|
{
|
|
|
|
.procname = "version",
|
|
|
|
.data = spl_version,
|
|
|
|
.maxlen = sizeof(spl_version),
|
|
|
|
.mode = 0444,
|
|
|
|
.proc_handler = &proc_dostring,
|
|
|
|
},
|
2008-04-18 23:39:58 +00:00
|
|
|
{
|
|
|
|
.procname = "hostid",
|
|
|
|
.data = &spl_hostid,
|
|
|
|
.maxlen = sizeof(unsigned long),
|
|
|
|
.mode = 0644,
|
|
|
|
.proc_handler = &proc_dohostid,
|
|
|
|
},
|
2008-05-05 20:18:49 +00:00
|
|
|
{
|
|
|
|
.procname = "kmem",
|
|
|
|
.mode = 0555,
|
|
|
|
.child = spl_kmem_table,
|
|
|
|
},
|
2008-05-08 23:21:47 +00:00
|
|
|
{
|
|
|
|
.procname = "kstat",
|
|
|
|
.mode = 0555,
|
|
|
|
.child = spl_kstat_table,
|
|
|
|
},
|
2008-04-18 23:39:58 +00:00
|
|
|
{ 0 },
|
|
|
|
};
|
|
|
|
|
2008-05-05 20:18:49 +00:00
|
|
|
static struct ctl_table spl_dir[] = {
|
2008-04-18 23:39:58 +00:00
|
|
|
{
|
|
|
|
.procname = "spl",
|
|
|
|
.mode = 0555,
|
|
|
|
.child = spl_table,
|
|
|
|
},
|
2008-06-02 17:28:49 +00:00
|
|
|
{ 0 }
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct ctl_table spl_root[] = {
|
|
|
|
{
|
2014-09-30 22:15:22 +00:00
|
|
|
#ifdef HAVE_CTL_NAME
|
|
|
|
.ctl_name = CTL_KERN,
|
|
|
|
#endif
|
2008-06-02 17:28:49 +00:00
|
|
|
.procname = "kernel",
|
|
|
|
.mode = 0555,
|
|
|
|
.child = spl_dir,
|
|
|
|
},
|
|
|
|
{ 0 }
|
2008-04-18 23:39:58 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
int
|
2011-11-11 17:03:31 +00:00
|
|
|
spl_proc_init(void)
|
2008-04-18 23:39:58 +00:00
|
|
|
{
|
2008-05-07 17:58:22 +00:00
|
|
|
int rc = 0;
|
2008-04-18 23:39:58 +00:00
|
|
|
|
2014-09-30 21:10:35 +00:00
|
|
|
spl_header = register_sysctl_table(spl_root);
|
2008-04-18 23:39:58 +00:00
|
|
|
if (spl_header == NULL)
|
2014-11-05 22:30:35 +00:00
|
|
|
return (-EUNATCH);
|
2008-05-05 20:18:49 +00:00
|
|
|
|
2008-06-04 06:00:46 +00:00
|
|
|
proc_spl = proc_mkdir("spl", NULL);
|
2014-11-05 22:30:35 +00:00
|
|
|
if (proc_spl == NULL) {
|
|
|
|
rc = -EUNATCH;
|
|
|
|
goto out;
|
|
|
|
}
|
2008-05-07 17:58:22 +00:00
|
|
|
|
2015-10-19 12:47:52 +00:00
|
|
|
proc_spl_taskq_all = proc_create_data("taskq-all", 0444,
|
|
|
|
proc_spl, &proc_taskq_all_operations, NULL);
|
|
|
|
if (proc_spl_taskq_all == NULL) {
|
|
|
|
rc = -EUNATCH;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
proc_spl_taskq = proc_create_data("taskq", 0444,
|
|
|
|
proc_spl, &proc_taskq_operations, NULL);
|
|
|
|
if (proc_spl_taskq == NULL) {
|
|
|
|
rc = -EUNATCH;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2008-06-04 06:00:46 +00:00
|
|
|
proc_spl_kmem = proc_mkdir("kmem", proc_spl);
|
2014-11-05 22:30:35 +00:00
|
|
|
if (proc_spl_kmem == NULL) {
|
|
|
|
rc = -EUNATCH;
|
|
|
|
goto out;
|
|
|
|
}
|
2008-06-27 21:40:11 +00:00
|
|
|
|
2013-07-03 20:34:52 +00:00
|
|
|
proc_spl_kmem_slab = proc_create_data("slab", 0444,
|
|
|
|
proc_spl_kmem, &proc_slab_operations, NULL);
|
2014-11-05 22:30:35 +00:00
|
|
|
if (proc_spl_kmem_slab == NULL) {
|
|
|
|
rc = -EUNATCH;
|
|
|
|
goto out;
|
|
|
|
}
|
2008-06-27 21:40:11 +00:00
|
|
|
|
2008-06-04 06:00:46 +00:00
|
|
|
proc_spl_kstat = proc_mkdir("kstat", proc_spl);
|
2014-11-05 22:30:35 +00:00
|
|
|
if (proc_spl_kstat == NULL) {
|
|
|
|
rc = -EUNATCH;
|
|
|
|
goto out;
|
|
|
|
}
|
2008-05-07 17:58:22 +00:00
|
|
|
out:
|
2008-06-04 06:00:46 +00:00
|
|
|
if (rc) {
|
|
|
|
remove_proc_entry("kstat", proc_spl);
|
2008-06-27 21:40:11 +00:00
|
|
|
remove_proc_entry("slab", proc_spl_kmem);
|
2008-06-04 06:00:46 +00:00
|
|
|
remove_proc_entry("kmem", proc_spl);
|
2015-10-19 12:47:52 +00:00
|
|
|
remove_proc_entry("taskq-all", proc_spl);
|
|
|
|
remove_proc_entry("taskq", proc_spl);
|
2008-06-04 06:09:16 +00:00
|
|
|
remove_proc_entry("spl", NULL);
|
2014-09-30 21:10:35 +00:00
|
|
|
unregister_sysctl_table(spl_header);
|
2008-06-04 06:00:46 +00:00
|
|
|
}
|
|
|
|
|
2014-11-05 22:30:35 +00:00
|
|
|
return (rc);
|
2008-04-18 23:39:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2011-11-11 17:03:31 +00:00
|
|
|
spl_proc_fini(void)
|
2008-04-18 23:39:58 +00:00
|
|
|
{
|
2008-06-04 06:00:46 +00:00
|
|
|
remove_proc_entry("kstat", proc_spl);
|
2008-06-27 21:40:11 +00:00
|
|
|
remove_proc_entry("slab", proc_spl_kmem);
|
2008-06-04 06:00:46 +00:00
|
|
|
remove_proc_entry("kmem", proc_spl);
|
2015-10-19 12:47:52 +00:00
|
|
|
remove_proc_entry("taskq-all", proc_spl);
|
|
|
|
remove_proc_entry("taskq", proc_spl);
|
2008-06-04 06:09:16 +00:00
|
|
|
remove_proc_entry("spl", NULL);
|
2008-06-04 06:00:46 +00:00
|
|
|
|
2008-04-18 23:39:58 +00:00
|
|
|
ASSERT(spl_header != NULL);
|
2014-09-30 21:10:35 +00:00
|
|
|
unregister_sysctl_table(spl_header);
|
2008-04-18 23:39:58 +00:00
|
|
|
}
|