diff --git a/ChangeLog b/ChangeLog index 0106bbd5e1..a65d6b15d5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2008-04-26 Brian Behlendorf + + * include/sys/mutex.h : Implemented a close approximation + of adaptive mutexes. These changes however required me to + export a new symbol from the kernel proper 'task_curr()' + which means we are now dependant on a patched kernel. + 2008-04-24 Brian Behlendorf * : Tag spl-0.2.1 diff --git a/include/sys/debug.h b/include/sys/debug.h index 64aa1808f6..39585ba196 100644 --- a/include/sys/debug.h +++ b/include/sys/debug.h @@ -310,16 +310,19 @@ do { \ return RETURN__ret; \ } while (0) -#define ENTRY \ +#define __ENTRY(subsys) \ do { \ - CDEBUG(D_TRACE, "Process entered\n"); \ + __CDEBUG(NULL, subsys, D_TRACE, "Process entered\n"); \ } while (0) -#define EXIT \ +#define __EXIT(subsys) \ do { \ - CDEBUG(D_TRACE, "Process leaving\n"); \ + __CDEBUG(NULL, subsys, D_TRACE, "Process leaving\n"); \ } while(0) +#define ENTRY __ENTRY(DEBUG_SUBSYSTEM) +#define EXIT __EXIT(DEBUG_SUBSYSTEM) + extern int spl_debug_vmsg(spl_debug_limit_state_t *cdls, int subsys, int mask, const char *file, const char *fn, const int line, const char *format1, va_list args, const char *format2, ...); diff --git a/include/sys/kmem.h b/include/sys/kmem.h index 73965c58b6..cc56ddd368 100644 --- a/include/sys/kmem.h +++ b/include/sys/kmem.h @@ -54,9 +54,9 @@ extern int kmem_warning_flag; if (unlikely(atomic64_read(&kmem_alloc_used)>kmem_alloc_max)) \ kmem_alloc_max = atomic64_read(&kmem_alloc_used); \ \ - __CDEBUG_LIMIT(S_KMEM, D_INFO, "kmem_alloc(%d, 0x%x)'d " \ + __CDEBUG_LIMIT(S_KMEM, D_INFO, "kmem_alloc(%d, 0x%x) = %p " \ "(%ld/%ld)\n", (int)(size), (int)(flags), \ - atomic64_read(&kmem_alloc_used), \ + _ptr_, atomic64_read(&kmem_alloc_used), \ kmem_alloc_max); \ } \ \ @@ -70,8 +70,8 @@ extern int kmem_warning_flag; ({ \ ASSERT((ptr) || (size > 0)); \ atomic64_sub((size), &kmem_alloc_used); \ - __CDEBUG_LIMIT(S_KMEM, D_INFO, "kmem_free(%d)'d (%ld/%ld)\n", \ - (int)(size), atomic64_read(&kmem_alloc_used), \ + __CDEBUG_LIMIT(S_KMEM, D_INFO, "kmem_free(%p, %d) (%ld/%ld)\n", \ + (ptr), (int)(size), atomic64_read(&kmem_alloc_used), \ kmem_alloc_max); \ memset(ptr, 0x5a, (size)); /* Poison */ \ kfree(ptr); \ @@ -99,9 +99,9 @@ extern int kmem_warning_flag; if (unlikely(atomic64_read(&vmem_alloc_used)>vmem_alloc_max)) \ vmem_alloc_max = atomic64_read(&vmem_alloc_used); \ \ - __CDEBUG_LIMIT(S_KMEM, D_INFO, "vmem_alloc(%d, 0x%x)'d " \ + __CDEBUG_LIMIT(S_KMEM, D_INFO, "vmem_alloc(%d, 0x%x) = %p " \ "(%ld/%ld)\n", (int)(size), (int)(flags), \ - atomic64_read(&vmem_alloc_used), \ + _ptr_, atomic64_read(&vmem_alloc_used), \ vmem_alloc_max); \ } \ \ @@ -116,8 +116,8 @@ extern int kmem_warning_flag; ({ \ ASSERT((ptr) || (size > 0)); \ atomic64_sub((size), &vmem_alloc_used); \ - __CDEBUG_LIMIT(S_KMEM, D_INFO, "vmem_free(%d)'d (%ld/%ld)\n", \ - (int)(size), atomic64_read(&vmem_alloc_used), \ + __CDEBUG_LIMIT(S_KMEM, D_INFO, "vmem_free(%p, %d) (%ld/%ld)\n", \ + (ptr), (int)(size), atomic64_read(&vmem_alloc_used), \ vmem_alloc_max); \ memset(ptr, 0x5a, (size)); /* Poison */ \ vfree(ptr); \ diff --git a/include/sys/mutex.h b/include/sys/mutex.h index 045842d728..fd787bb9d5 100644 --- a/include/sys/mutex.h +++ b/include/sys/mutex.h @@ -8,175 +8,86 @@ extern "C" { #include #include #include +#include -/* See the "Big Theory Statement" in solaris mutex.c. - * - * Spin mutexes apparently aren't needed by zfs so we assert - * if ibc is non-zero. - * - * Our impementation of adaptive mutexes aren't really adaptive. - * They go to sleep every time. - */ +//#define DEBUG_MUTEX +#undef DEBUG_MUTEX #define MUTEX_DEFAULT 0 -#define MUTEX_HELD(x) (mutex_owned(x)) +#define MUTEX_SPIN 1 +#define MUTEX_ADAPTIVE 2 + +#define MUTEX_ENTER_TOTAL 0 +#define MUTEX_ENTER_NOT_HELD 1 +#define MUTEX_ENTER_SPIN 2 +#define MUTEX_ENTER_SLEEP 3 +#define MUTEX_TRYENTER_TOTAL 4 +#define MUTEX_TRYENTER_NOT_HELD 5 +#define MUTEX_STATS_SIZE 6 #define KM_MAGIC 0x42424242 #define KM_POISON 0x84 typedef struct { - int km_magic; + int32_t km_magic; + int16_t km_type; + int16_t km_name_size; char *km_name; struct task_struct *km_owner; - struct semaphore km_sem; - spinlock_t km_lock; + struct semaphore *km_sem; +#ifdef DEBUG_MUTEX + int *km_stats; + struct list_head km_list; +#endif } kmutex_t; +extern int mutex_spin_max; + +#ifdef DEBUG_MUTEX +extern int mutex_stats[MUTEX_STATS_SIZE]; +extern struct mutex mutex_stats_lock; +extern struct list_head mutex_stats_list; +#define MUTEX_STAT_INC(stats, stat) ((stats)[stat]++) +#else +#define MUTEX_STAT_INC(stats, stat) +#endif + +int spl_mutex_init(void); +void spl_mutex_fini(void); + +extern void __spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc); +extern void __spl_mutex_destroy(kmutex_t *mp); +extern int __mutex_tryenter(kmutex_t *mp); +extern void __mutex_enter(kmutex_t *mp); +extern void __mutex_exit(kmutex_t *mp); +extern int __mutex_owned(kmutex_t *mp); +extern kthread_t *__spl_mutex_owner(kmutex_t *mp); + #undef mutex_init -static __inline__ void -mutex_init(kmutex_t *mp, char *name, int type, void *ibc) -{ - ENTRY; - ASSERT(mp); - ASSERT(ibc == NULL); /* XXX - Spin mutexes not needed */ - ASSERT(type == MUTEX_DEFAULT); /* XXX - Only default type supported */ - - mp->km_magic = KM_MAGIC; - spin_lock_init(&mp->km_lock); - sema_init(&mp->km_sem, 1); - mp->km_owner = NULL; - mp->km_name = NULL; - - if (name) { - mp->km_name = kmalloc(strlen(name) + 1, GFP_KERNEL); - if (mp->km_name) - strcpy(mp->km_name, name); - } - EXIT; -} - #undef mutex_destroy -static __inline__ void -mutex_destroy(kmutex_t *mp) -{ - ENTRY; - ASSERT(mp); - ASSERT(mp->km_magic == KM_MAGIC); - spin_lock(&mp->km_lock); - if (mp->km_name) - kfree(mp->km_name); +#define mutex_init(mp, name, type, ibc) \ +({ \ + __ENTRY(S_MUTEX); \ + if ((name) == NULL) \ + __spl_mutex_init(mp, #mp, type, ibc); \ + else \ + __spl_mutex_init(mp, name, type, ibc); \ + __EXIT(S_MUTEX); \ +}) +#define mutex_destroy(mp) \ +({ \ + __ENTRY(S_MUTEX); \ + __spl_mutex_destroy(mp); \ + __EXIT(S_MUTEX); \ +}) - memset(mp, KM_POISON, sizeof(*mp)); - spin_unlock(&mp->km_lock); - EXIT; -} - -static __inline__ void -mutex_enter(kmutex_t *mp) -{ - ENTRY; - ASSERT(mp); - ASSERT(mp->km_magic == KM_MAGIC); - spin_lock(&mp->km_lock); - - if (unlikely(in_atomic() && !current->exit_state)) { - spin_unlock(&mp->km_lock); - __CDEBUG_LIMIT(S_MUTEX, D_ERROR, - "May schedule while atomic: %s/0x%08x/%d\n", - current->comm, preempt_count(), current->pid); - SBUG(); - } - - spin_unlock(&mp->km_lock); - - down(&mp->km_sem); - - spin_lock(&mp->km_lock); - ASSERT(mp->km_owner == NULL); - mp->km_owner = current; - spin_unlock(&mp->km_lock); - EXIT; -} - -/* Return 1 if we acquired the mutex, else zero. */ -static __inline__ int -mutex_tryenter(kmutex_t *mp) -{ - int rc; - ENTRY; - - ASSERT(mp); - ASSERT(mp->km_magic == KM_MAGIC); - spin_lock(&mp->km_lock); - - if (unlikely(in_atomic() && !current->exit_state)) { - spin_unlock(&mp->km_lock); - __CDEBUG_LIMIT(S_MUTEX, D_ERROR, - "May schedule while atomic: %s/0x%08x/%d\n", - current->comm, preempt_count(), current->pid); - SBUG(); - } - - spin_unlock(&mp->km_lock); - rc = down_trylock(&mp->km_sem); /* returns 0 if acquired */ - if (rc == 0) { - spin_lock(&mp->km_lock); - ASSERT(mp->km_owner == NULL); - mp->km_owner = current; - spin_unlock(&mp->km_lock); - RETURN(1); - } - - RETURN(0); -} - -static __inline__ void -mutex_exit(kmutex_t *mp) -{ - ENTRY; - ASSERT(mp); - ASSERT(mp->km_magic == KM_MAGIC); - spin_lock(&mp->km_lock); - - ASSERT(mp->km_owner == current); - mp->km_owner = NULL; - spin_unlock(&mp->km_lock); - up(&mp->km_sem); - EXIT; -} - -/* Return 1 if mutex is held by current process, else zero. */ -static __inline__ int -mutex_owned(kmutex_t *mp) -{ - int rc; - ENTRY; - - ASSERT(mp); - ASSERT(mp->km_magic == KM_MAGIC); - spin_lock(&mp->km_lock); - rc = (mp->km_owner == current); - spin_unlock(&mp->km_lock); - - RETURN(rc); -} - -/* Return owner if mutex is owned, else NULL. */ -static __inline__ kthread_t * -mutex_owner(kmutex_t *mp) -{ - kthread_t *thr; - ENTRY; - - ASSERT(mp); - ASSERT(mp->km_magic == KM_MAGIC); - spin_lock(&mp->km_lock); - thr = mp->km_owner; - spin_unlock(&mp->km_lock); - - RETURN(thr); -} +#define mutex_tryenter(mp) __mutex_tryenter(mp) +#define mutex_enter(mp) __mutex_enter(mp) +#define mutex_exit(mp) __mutex_exit(mp) +#define mutex_owned(mp) __mutex_owned(mp) +#define mutex_owner(mp) __spl_mutex_owner(mp) +#define MUTEX_HELD(mp) mutex_owned(mp) #ifdef __cplusplus } diff --git a/modules/spl/Makefile.in b/modules/spl/Makefile.in index ff283dfd6f..bd2a5f9f2d 100644 --- a/modules/spl/Makefile.in +++ b/modules/spl/Makefile.in @@ -22,6 +22,7 @@ spl-objs += spl-kobj.o spl-objs += spl-module.o spl-objs += spl-generic.o spl-objs += spl-atomic.o +spl-objs += spl-mutex.o splmodule := spl.ko splmoduledir := @kmoduledir@/kernel/lib/ diff --git a/modules/spl/spl-generic.c b/modules/spl/spl-generic.c index 1aadb990e6..99497dd512 100644 --- a/modules/spl/spl-generic.c +++ b/modules/spl/spl-generic.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -99,21 +100,26 @@ static int __init spl_init(void) if ((rc = kmem_init())) GOTO(out , rc); - if ((rc = vn_init())) - GOTO(out2, rc); + if ((rc = spl_mutex_init())) + GOTO(out2 , rc); - if ((rc = proc_init())) + if ((rc = vn_init())) GOTO(out3, rc); + if ((rc = proc_init())) + GOTO(out4, rc); + if ((rc = set_hostid())) - GOTO(out4, rc = -EADDRNOTAVAIL); + GOTO(out5, rc = -EADDRNOTAVAIL); printk("SPL: Loaded Solaris Porting Layer v%s\n", VERSION); RETURN(rc); -out4: +out5: proc_fini(); -out3: +out4: vn_fini(); +out3: + spl_mutex_fini(); out2: kmem_fini(); out: diff --git a/modules/spl/spl-mutex.c b/modules/spl/spl-mutex.c new file mode 100644 index 0000000000..06a8f316b6 --- /dev/null +++ b/modules/spl/spl-mutex.c @@ -0,0 +1,256 @@ +#include + +#ifdef DEBUG_SUBSYSTEM +#undef DEBUG_SUBSYSTEM +#endif + +#define DEBUG_SUBSYSTEM S_MUTEX + +/* Mutex implementation based on those found in Solaris. This means + * they the MUTEX_DEFAULT type is an adaptive mutex. When calling + * mutex_enter() your process will spin waiting for the lock if it's + * likely the lock will be free'd shortly. If it looks like the + * lock will be held for a longer time we schedule and sleep waiting + * for it. This determination is made by checking if the holder of + * the lock is currently running on cpu or sleeping waiting to be + * scheduled. If the holder is currently running it's likely the + * lock will be shortly dropped. + * + * XXX: This is basically a rough implementation to see if this + * helps our performance. If it does a more careful implementation + * should be done, perhaps in assembly. + */ + +/* 0: Never spin when trying to aquire lock + * -1: Spin until aquired or holder yeilds without dropping lock + * 1-MAX_INT: Spin for N attempts before sleeping for lock + */ +int mutex_spin_max = 100; + +#ifdef DEBUG_MUTEX +int mutex_stats[MUTEX_STATS_SIZE] = { 0 }; +DEFINE_MUTEX(mutex_stats_lock); +LIST_HEAD(mutex_stats_list); +#endif + +void +__spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc) +{ + ASSERT(mp); + ASSERT(name); + ASSERT(ibc == NULL); + ASSERT(mp->km_magic != KM_MAGIC); /* Never double init */ + + mp->km_magic = KM_MAGIC; + mp->km_owner = NULL; + mp->km_name = NULL; + mp->km_name_size = strlen(name) + 1; + + switch (type) { + case MUTEX_DEFAULT: + mp->km_type = MUTEX_ADAPTIVE; + break; + case MUTEX_SPIN: + case MUTEX_ADAPTIVE: + mp->km_type = type; + break; + default: + SBUG(); + } + + /* Semaphore kmem_alloc'ed to keep struct size down (<64b) */ + mp->km_sem = kmem_alloc(sizeof(struct semaphore), KM_SLEEP); + if (mp->km_sem == NULL) + return; + + mp->km_name = kmem_alloc(mp->km_name_size, KM_SLEEP); + if (mp->km_name == NULL) { + kmem_free(mp->km_sem, sizeof(struct semaphore)); + return; + } + + sema_init(mp->km_sem, 1); + strcpy(mp->km_name, name); + +#ifdef DEBUG_MUTEX + mp->km_stats = kmem_zalloc(sizeof(int) * MUTEX_STATS_SIZE, KM_SLEEP); + if (mp->km_stats == NULL) { + kmem_free(mp->km_name, mp->km_name_size); + kmem_free(mp->km_sem, sizeof(struct semaphore)); + return; + } + + mutex_lock(&mutex_stats_lock); + list_add_tail(&mp->km_list, &mutex_stats_list); + mutex_unlock(&mutex_stats_lock); +#endif +} +EXPORT_SYMBOL(__spl_mutex_init); + +void +__spl_mutex_destroy(kmutex_t *mp) +{ + ASSERT(mp); + ASSERT(mp->km_magic == KM_MAGIC); + +#ifdef DEBUG_MUTEX + mutex_lock(&mutex_stats_lock); + list_del_init(&mp->km_list); + mutex_unlock(&mutex_stats_lock); + + kmem_free(mp->km_stats, sizeof(int) * MUTEX_STATS_SIZE); +#endif + kmem_free(mp->km_name, mp->km_name_size); + kmem_free(mp->km_sem, sizeof(struct semaphore)); + + memset(mp, KM_POISON, sizeof(*mp)); +} +EXPORT_SYMBOL(__spl_mutex_destroy); + +/* Return 1 if we acquired the mutex, else zero. */ +int +__mutex_tryenter(kmutex_t *mp) +{ + int rc; + ENTRY; + + ASSERT(mp); + ASSERT(mp->km_magic == KM_MAGIC); + MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_TOTAL); + MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_TOTAL); + + rc = down_trylock(mp->km_sem); + if (rc == 0) { + ASSERT(mp->km_owner == NULL); + mp->km_owner = current; + MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_NOT_HELD); + MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_NOT_HELD); + } + + RETURN(!rc); +} +EXPORT_SYMBOL(__mutex_tryenter); + +static void +mutex_enter_adaptive(kmutex_t *mp) +{ + struct task_struct *owner; + int count = 0; + + /* Lock is not held so we expect to aquire the lock */ + if ((owner = mp->km_owner) == NULL) { + down(mp->km_sem); + MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_NOT_HELD); + MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_NOT_HELD); + } else { + /* The lock is held by a currently running task which + * we expect will drop the lock before leaving the + * head of the runqueue. So the ideal thing to do + * is spin until we aquire the lock and avoid a + * context switch. However it is also possible the + * task holding the lock yields the processor with + * out dropping lock. In which case, we know it's + * going to be a while so we stop spinning and go + * to sleep waiting for the lock to be available. + * This should strike the optimum balance between + * spinning and sleeping waiting for a lock. + */ + while (task_curr(owner) && (count <= mutex_spin_max)) { + if (down_trylock(mp->km_sem) == 0) { + MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN); + MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN); + GOTO(out, count); + } + count++; + } + + /* The lock is held by a sleeping task so it's going to + * cost us minimally one context switch. We might as + * well sleep and yield the processor to other tasks. + */ + down(mp->km_sem); + MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SLEEP); + MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SLEEP); + } +out: + MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_TOTAL); + MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_TOTAL); +} + +void +__mutex_enter(kmutex_t *mp) +{ + ENTRY; + ASSERT(mp); + ASSERT(mp->km_magic == KM_MAGIC); + + switch (mp->km_type) { + case MUTEX_SPIN: + while (down_trylock(mp->km_sem)); + MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN); + MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN); + break; + case MUTEX_ADAPTIVE: + mutex_enter_adaptive(mp); + break; + } + + ASSERT(mp->km_owner == NULL); + mp->km_owner = current; + + EXIT; +} +EXPORT_SYMBOL(__mutex_enter); + +void +__mutex_exit(kmutex_t *mp) +{ + ENTRY; + ASSERT(mp); + ASSERT(mp->km_magic == KM_MAGIC); + ASSERT(mp->km_owner == current); + mp->km_owner = NULL; + up(mp->km_sem); + EXIT; +} +EXPORT_SYMBOL(__mutex_exit); + +/* Return 1 if mutex is held by current process, else zero. */ +int +__mutex_owned(kmutex_t *mp) +{ + ENTRY; + ASSERT(mp); + ASSERT(mp->km_magic == KM_MAGIC); + RETURN(mp->km_owner == current); +} +EXPORT_SYMBOL(__mutex_owned); + +/* Return owner if mutex is owned, else NULL. */ +kthread_t * +__spl_mutex_owner(kmutex_t *mp) +{ + ENTRY; + ASSERT(mp); + ASSERT(mp->km_magic == KM_MAGIC); + RETURN(mp->km_owner); +} +EXPORT_SYMBOL(__spl_mutex_owner); + +int +spl_mutex_init(void) +{ + ENTRY; + RETURN(0); +} + +void +spl_mutex_fini(void) +{ + ENTRY; +#ifdef DEBUG_MUTEX + ASSERT(list_empty(&mutex_stats_list)); +#endif + EXIT; +} + diff --git a/modules/spl/spl-proc.c b/modules/spl/spl-proc.c index 94dd937a16..64423c1868 100644 --- a/modules/spl/spl-proc.c +++ b/modules/spl/spl-proc.c @@ -3,8 +3,10 @@ #include #include #include +#include #include #include +#include #include #include "config.h" @@ -18,10 +20,17 @@ static struct ctl_table_header *spl_header = NULL; static unsigned long table_min = 0; static unsigned long table_max = ~0; -#define CTL_SPL 0x87 +#define CTL_SPL 0x87 +#define CTL_SPL_DEBUG 0x88 +#define CTL_SPL_MUTEX 0x89 +#define CTL_SPL_KMEM 0x90 + enum { CTL_VERSION = 1, /* Version */ - CTL_DEBUG_SUBSYS, /* Debug subsystem */ + CTL_HOSTID, /* Host id reported by /usr/bin/hostid */ + CTL_HW_SERIAL, /* Hardware serial number from hostid */ + + CTL_DEBUG_SUBSYS, /* Debug subsystem */ CTL_DEBUG_MASK, /* Debug mask */ CTL_DEBUG_PRINTK, /* Force all messages to console */ CTL_DEBUG_MB, /* Debug buffer size */ @@ -31,19 +40,23 @@ enum { CTL_DEBUG_PATH, /* Dump log location */ CTL_DEBUG_DUMP, /* Dump debug buffer to file */ CTL_DEBUG_FORCE_BUG, /* Hook to force a BUG */ - CTL_CONSOLE_RATELIMIT, /* Ratelimit console messages */ + CTL_DEBUG_STACK_SIZE, /* Max observed stack size */ + + CTL_CONSOLE_RATELIMIT, /* Ratelimit console messages */ CTL_CONSOLE_MAX_DELAY_CS, /* Max delay at which we skip messages */ CTL_CONSOLE_MIN_DELAY_CS, /* Init delay at which we skip messages */ CTL_CONSOLE_BACKOFF, /* Delay increase factor */ - CTL_STACK_SIZE, /* Max observed stack size */ + #ifdef DEBUG_KMEM CTL_KMEM_KMEMUSED, /* Crrently alloc'd kmem bytes */ CTL_KMEM_KMEMMAX, /* Max alloc'd by kmem bytes */ CTL_KMEM_VMEMUSED, /* Currently alloc'd vmem bytes */ CTL_KMEM_VMEMMAX, /* Max alloc'd by vmem bytes */ #endif - CTL_HOSTID, /* Host id reported by /usr/bin/hostid */ - CTL_HW_SERIAL, /* Hardware serial number from hostid */ + + CTL_MUTEX_STATS, /* Global mutex statistics */ + CTL_MUTEX_STATS_PER, /* Per mutex statistics */ + CTL_MUTEX_SPIN_MAX, /* Maximum mutex spin iterations */ }; static int @@ -368,21 +381,107 @@ proc_dohostid(struct ctl_table *table, int write, struct file *filp, RETURN(rc); } -static struct ctl_table spl_table[] = { - /* NB No .strategy entries have been provided since - * sysctl(8) prefers to go via /proc for portability. - */ - { - .ctl_name = CTL_VERSION, - .procname = "version", - .data = spl_version, - .maxlen = sizeof(spl_version), - .mode = 0444, - .proc_handler = &proc_dostring, - }, +#ifdef DEBUG_MUTEX +static void +mutex_seq_show_headers(struct seq_file *f) +{ + seq_printf(f, "%-36s %-4s %-16s\t" + "e_tot\te_nh\te_sp\te_sl\tte_tot\tte_nh\n", + "name", "type", "owner"); +} + +static int +mutex_seq_show(struct seq_file *f, void *p) +{ + kmutex_t *mp = p; + char t = 'X'; + int i; + + ASSERT(mp->km_magic == KM_MAGIC); + + switch (mp->km_type) { + case MUTEX_DEFAULT: t = 'D'; break; + case MUTEX_SPIN: t = 'S'; break; + case MUTEX_ADAPTIVE: t = 'A'; break; + default: + SBUG(); + } + seq_printf(f, "%-36s %c ", mp->km_name, t); + if (mp->km_owner) + seq_printf(f, "%p\t", mp->km_owner); + else + seq_printf(f, "%-16s\t", ""); + + for (i = 0; i < MUTEX_STATS_SIZE; i++) + seq_printf(f, "%d%c", mp->km_stats[i], + (i + 1 == MUTEX_STATS_SIZE) ? '\n' : '\t'); + + return 0; +} + +static void * +mutex_seq_start(struct seq_file *f, loff_t *pos) +{ + struct list_head *p; + loff_t n = *pos; + ENTRY; + + mutex_lock(&mutex_stats_lock); + if (!n) + mutex_seq_show_headers(f); + + p = mutex_stats_list.next; + while (n--) { + p = p->next; + if (p == &mutex_stats_list) + RETURN(NULL); + } + + RETURN(list_entry(p, kmutex_t, km_list)); +} + +static void * +mutex_seq_next(struct seq_file *f, void *p, loff_t *pos) +{ + kmutex_t *mp = p; + ENTRY; + + ++*pos; + RETURN((mp->km_list.next == &mutex_stats_list) ? + NULL : list_entry(mp->km_list.next, kmutex_t, km_list)); +} + +static void +mutex_seq_stop(struct seq_file *f, void *v) +{ + mutex_unlock(&mutex_stats_lock); +} + +static struct seq_operations mutex_seq_ops = { + .show = mutex_seq_show, + .start = mutex_seq_start, + .next = mutex_seq_next, + .stop = mutex_seq_stop, +}; + +static int +proc_mutex_open(struct inode *inode, struct file *filp) +{ + return seq_open(filp, &mutex_seq_ops); +} + +static struct file_operations proc_mutex_operations = { + .open = proc_mutex_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif /* DEBUG_MUTEX */ + +static struct ctl_table spl_debug_table[] = { { .ctl_name = CTL_DEBUG_SUBSYS, - .procname = "debug_subsystem", + .procname = "subsystem", .data = &spl_debug_subsys, .maxlen = sizeof(unsigned long), .mode = 0644, @@ -390,7 +489,7 @@ static struct ctl_table spl_table[] = { }, { .ctl_name = CTL_DEBUG_MASK, - .procname = "debug_mask", + .procname = "mask", .data = &spl_debug_mask, .maxlen = sizeof(unsigned long), .mode = 0644, @@ -398,7 +497,7 @@ static struct ctl_table spl_table[] = { }, { .ctl_name = CTL_DEBUG_PRINTK, - .procname = "debug_printk", + .procname = "printk", .data = &spl_debug_printk, .maxlen = sizeof(unsigned long), .mode = 0644, @@ -406,13 +505,13 @@ static struct ctl_table spl_table[] = { }, { .ctl_name = CTL_DEBUG_MB, - .procname = "debug_mb", + .procname = "mb", .mode = 0644, .proc_handler = &proc_debug_mb, }, { .ctl_name = CTL_DEBUG_BINARY, - .procname = "debug_binary", + .procname = "binary", .data = &spl_debug_binary, .maxlen = sizeof(int), .mode = 0644, @@ -436,7 +535,7 @@ static struct ctl_table spl_table[] = { }, { .ctl_name = CTL_DEBUG_PATH, - .procname = "debug_path", + .procname = "path", .data = spl_debug_file_path, .maxlen = sizeof(spl_debug_file_path), .mode = 0644, @@ -444,7 +543,7 @@ static struct ctl_table spl_table[] = { }, { .ctl_name = CTL_DEBUG_DUMP, - .procname = "debug_dump", + .procname = "dump", .mode = 0200, .proc_handler = &proc_dump_kernel, }, @@ -483,14 +582,40 @@ static struct ctl_table spl_table[] = { .proc_handler = &proc_console_backoff, }, { - .ctl_name = CTL_STACK_SIZE, + .ctl_name = CTL_DEBUG_STACK_SIZE, .procname = "stack_max", .data = &spl_debug_stack, .maxlen = sizeof(int), .mode = 0444, .proc_handler = &proc_dointvec, }, + {0}, +}; + +#ifdef DEBUG_MUTEX +static struct ctl_table spl_mutex_table[] = { + { + .ctl_name = CTL_MUTEX_STATS, + .procname = "stats", + .data = &mutex_stats, + .maxlen = sizeof(int) * MUTEX_STATS_SIZE, + .mode = 0444, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_MUTEX_SPIN_MAX, + .procname = "spin_max", + .data = &mutex_spin_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + {0}, +}; +#endif /* DEBUG_MUTEX */ + #ifdef DEBUG_KMEM +static struct ctl_table spl_kmem_table[] = { { .ctl_name = CTL_KMEM_KMEMUSED, .procname = "kmem_used", @@ -527,7 +652,22 @@ static struct ctl_table spl_table[] = { .mode = 0444, .proc_handler = &proc_doulongvec_minmax, }, -#endif + {0}, +}; +#endif /* DEBUG_MUTEX */ + +static struct ctl_table spl_table[] = { + /* NB No .strategy entries have been provided since + * sysctl(8) prefers to go via /proc for portability. + */ + { + .ctl_name = CTL_VERSION, + .procname = "version", + .data = spl_version, + .maxlen = sizeof(spl_version), + .mode = 0444, + .proc_handler = &proc_dostring, + }, { .ctl_name = CTL_HOSTID, .procname = "hostid", @@ -544,10 +684,32 @@ static struct ctl_table spl_table[] = { .mode = 0444, .proc_handler = &proc_dostring, }, + { + .ctl_name = CTL_SPL_DEBUG, + .procname = "debug", + .mode = 0555, + .child = spl_debug_table, + }, +#ifdef DEBUG_MUTEX + { + .ctl_name = CTL_SPL_MUTEX, + .procname = "mutex", + .mode = 0555, + .child = spl_mutex_table, + }, +#endif +#ifdef DEBUG_KMEM + { + .ctl_name = CTL_SPL_KMEM, + .procname = "kmem", + .mode = 0555, + .child = spl_kmem_table, + }, +#endif { 0 }, }; -static struct ctl_table spl_dir_table[] = { +static struct ctl_table spl_dir[] = { { .ctl_name = CTL_SPL, .procname = "spl", @@ -563,9 +725,22 @@ proc_init(void) ENTRY; #ifdef CONFIG_SYSCTL - spl_header = register_sysctl_table(spl_dir_table, 0); + spl_header = register_sysctl_table(spl_dir, 0); if (spl_header == NULL) RETURN(-EUNATCH); + +#ifdef DEBUG_MUTEX + { + struct proc_dir_entry *entry = create_proc_entry("mutex_stats", + 0444, NULL); + if (entry) { + entry->proc_fops = &proc_mutex_operations; + } else { + unregister_sysctl_table(spl_header); + RETURN(-EUNATCH); + } + } +#endif /* DEBUG_MUTEX */ #endif RETURN(0); } @@ -577,6 +752,7 @@ proc_fini(void) #ifdef CONFIG_SYSCTL ASSERT(spl_header != NULL); + remove_proc_entry("mutex_stats", NULL); unregister_sysctl_table(spl_header); #endif EXIT; diff --git a/modules/spl/spl-taskq.c b/modules/spl/spl-taskq.c index ad9be695b8..70deb0aea6 100644 --- a/modules/spl/spl-taskq.c +++ b/modules/spl/spl-taskq.c @@ -106,7 +106,7 @@ task_done(taskq_t *tq, task_t *t) t->t_id = 0; t->t_func = NULL; t->t_arg = NULL; - list_add(&t->t_list, &tq->tq_free_list); + list_add_tail(&t->t_list, &tq->tq_free_list); } else { task_free(tq, t); } @@ -209,7 +209,7 @@ __taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags) spin_lock(&t->t_lock); - list_add(&t->t_list, &tq->tq_pend_list); + list_add_tail(&t->t_list, &tq->tq_pend_list); t->t_id = rc = tq->tq_next_id; tq->tq_next_id++; t->t_func = func; @@ -282,7 +282,7 @@ taskq_thread(void *args) if (!list_empty(&tq->tq_pend_list)) { t = list_entry(tq->tq_pend_list.next, task_t, t_list); list_del_init(&t->t_list); - list_add(&t->t_list, &tq->tq_work_list); + list_add_tail(&t->t_list, &tq->tq_work_list); tq->tq_nactive++; spin_unlock_irq(&tq->tq_lock);