/* * This file is part of the SPL: Solaris Porting Layer. * * Copyright (c) 2008 Lawrence Livermore National Security, LLC. * Produced at Lawrence Livermore National Laboratory * Written by: * Brian Behlendorf , * Herb Wartens , * Jim Garlick * UCRL-CODE-235197 * * This is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include #ifdef DEBUG_SUBSYSTEM #undef DEBUG_SUBSYSTEM #endif #define DEBUG_SUBSYSTEM S_MUTEX /* Mutex implementation based on those found in Solaris. This means * they the MUTEX_DEFAULT type is an adaptive mutex. When calling * mutex_enter() your process will spin waiting for the lock if it's * likely the lock will be free'd shortly. If it looks like the * lock will be held for a longer time we schedule and sleep waiting * for it. This determination is made by checking if the holder of * the lock is currently running on cpu or sleeping waiting to be * scheduled. If the holder is currently running it's likely the * lock will be shortly dropped. * * XXX: This is basically a rough implementation to see if this * helps our performance. If it does a more careful implementation * should be done, perhaps in assembly. */ /* 0: Never spin when trying to aquire lock * -1: Spin until aquired or holder yeilds without dropping lock * 1-MAX_INT: Spin for N attempts before sleeping for lock */ int mutex_spin_max = 0; #ifdef DEBUG_MUTEX int mutex_stats[MUTEX_STATS_SIZE] = { 0 }; spinlock_t mutex_stats_lock; struct list_head mutex_stats_list; #endif void __spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc) { int flags = KM_SLEEP; ASSERT(mp); ASSERT(name); ASSERT(ibc == NULL); ASSERT(mp->km_magic != KM_MAGIC); /* Never double init */ mp->km_magic = KM_MAGIC; mp->km_owner = NULL; mp->km_name = NULL; mp->km_name_size = strlen(name) + 1; switch (type) { case MUTEX_DEFAULT: mp->km_type = MUTEX_ADAPTIVE; break; case MUTEX_SPIN: case MUTEX_ADAPTIVE: mp->km_type = type; break; default: SBUG(); } /* We may be called when there is a non-zero preempt_count or * interrupts are disabled is which case we must not sleep. */ if (current_thread_info()->preempt_count || irqs_disabled()) flags = KM_NOSLEEP; /* Semaphore kmem_alloc'ed to keep struct size down (<64b) */ mp->km_sem = kmem_alloc(sizeof(struct semaphore), flags); if (mp->km_sem == NULL) return; mp->km_name = kmem_alloc(mp->km_name_size, flags); if (mp->km_name == NULL) { kmem_free(mp->km_sem, sizeof(struct semaphore)); return; } sema_init(mp->km_sem, 1); strncpy(mp->km_name, name, mp->km_name_size); #ifdef DEBUG_MUTEX mp->km_stats = kmem_zalloc(sizeof(int) * MUTEX_STATS_SIZE, flags); if (mp->km_stats == NULL) { kmem_free(mp->km_name, mp->km_name_size); kmem_free(mp->km_sem, sizeof(struct semaphore)); return; } /* XXX - This appears to be a much more contended lock than I * would have expected. To run with this debugging enabled and * get reasonable performance we may need to be more clever and * do something like hash the mutex ptr on to one of several * lists to ease this single point of contention. */ spin_lock(&mutex_stats_lock); list_add_tail(&mp->km_list, &mutex_stats_list); spin_unlock(&mutex_stats_lock); #endif } EXPORT_SYMBOL(__spl_mutex_init); void __spl_mutex_destroy(kmutex_t *mp) { ASSERT(mp); ASSERT(mp->km_magic == KM_MAGIC); #ifdef DEBUG_MUTEX spin_lock(&mutex_stats_lock); list_del_init(&mp->km_list); spin_unlock(&mutex_stats_lock); kmem_free(mp->km_stats, sizeof(int) * MUTEX_STATS_SIZE); #endif kmem_free(mp->km_name, mp->km_name_size); kmem_free(mp->km_sem, sizeof(struct semaphore)); memset(mp, KM_POISON, sizeof(*mp)); } EXPORT_SYMBOL(__spl_mutex_destroy); /* Return 1 if we acquired the mutex, else zero. */ int __mutex_tryenter(kmutex_t *mp) { int rc; ENTRY; ASSERT(mp); ASSERT(mp->km_magic == KM_MAGIC); MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_TOTAL); MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_TOTAL); rc = down_trylock(mp->km_sem); if (rc == 0) { ASSERT(mp->km_owner == NULL); mp->km_owner = current; MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_NOT_HELD); MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_NOT_HELD); } RETURN(!rc); } EXPORT_SYMBOL(__mutex_tryenter); static void mutex_enter_adaptive(kmutex_t *mp) { struct task_struct *owner; int count = 0; /* Lock is not held so we expect to aquire the lock */ if ((owner = mp->km_owner) == NULL) { down(mp->km_sem); MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_NOT_HELD); MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_NOT_HELD); } else { /* The lock is held by a currently running task which * we expect will drop the lock before leaving the * head of the runqueue. So the ideal thing to do * is spin until we aquire the lock and avoid a * context switch. However it is also possible the * task holding the lock yields the processor with * out dropping lock. In which case, we know it's * going to be a while so we stop spinning and go * to sleep waiting for the lock to be available. * This should strike the optimum balance between * spinning and sleeping waiting for a lock. */ while (task_curr(owner) && (count <= mutex_spin_max)) { if (down_trylock(mp->km_sem) == 0) { MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN); MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN); GOTO(out, count); } count++; } /* The lock is held by a sleeping task so it's going to * cost us minimally one context switch. We might as * well sleep and yield the processor to other tasks. */ down(mp->km_sem); MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SLEEP); MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SLEEP); } out: MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_TOTAL); MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_TOTAL); } void __mutex_enter(kmutex_t *mp) { ENTRY; ASSERT(mp); ASSERT(mp->km_magic == KM_MAGIC); switch (mp->km_type) { case MUTEX_SPIN: while (down_trylock(mp->km_sem)); MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN); MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN); break; case MUTEX_ADAPTIVE: mutex_enter_adaptive(mp); break; } ASSERT(mp->km_owner == NULL); mp->km_owner = current; EXIT; } EXPORT_SYMBOL(__mutex_enter); void __mutex_exit(kmutex_t *mp) { ENTRY; ASSERT(mp); ASSERT(mp->km_magic == KM_MAGIC); ASSERT(mp->km_owner == current); mp->km_owner = NULL; up(mp->km_sem); EXIT; } EXPORT_SYMBOL(__mutex_exit); /* Return 1 if mutex is held by current process, else zero. */ int __mutex_owned(kmutex_t *mp) { ENTRY; ASSERT(mp); ASSERT(mp->km_magic == KM_MAGIC); RETURN(mp->km_owner == current); } EXPORT_SYMBOL(__mutex_owned); /* Return owner if mutex is owned, else NULL. */ kthread_t * __spl_mutex_owner(kmutex_t *mp) { ENTRY; ASSERT(mp); ASSERT(mp->km_magic == KM_MAGIC); RETURN(mp->km_owner); } EXPORT_SYMBOL(__spl_mutex_owner); int spl_mutex_init(void) { ENTRY; #ifdef DEBUG_MUTEX spin_lock_init(&mutex_stats_lock); INIT_LIST_HEAD(&mutex_stats_list); #endif RETURN(0); } void spl_mutex_fini(void) { ENTRY; #ifdef DEBUG_MUTEX ASSERT(list_empty(&mutex_stats_list)); #endif EXIT; } module_param(mutex_spin_max, int, 0644); MODULE_PARM_DESC(mutex_spin_max, "Spin a maximum of N times to aquire lock");