zfs/module/spl/spl-tsd.c

694 lines
20 KiB
C

/*****************************************************************************\
* Copyright (C) 2010 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*****************************************************************************
* Solaris Porting Layer (SPL) Thread Specific Data Implementation.
*
* Thread specific data has implemented using a hash table, this avoids
* the need to add a member to the task structure and allows maximum
* portability between kernels. This implementation has been optimized
* to keep the tsd_set() and tsd_get() times as small as possible.
*
* The majority of the entries in the hash table are for specific tsd
* entries. These entries are hashed by the product of their key and
* pid because by design the key and pid are guaranteed to be unique.
* Their product also has the desirable properly that it will be uniformly
* distributed over the hash bins providing neither the pid nor key is zero.
* Under linux the zero pid is always the init process and thus won't be
* used, and this implementation is careful to never to assign a zero key.
* By default the hash table is sized to 512 bins which is expected to
* be sufficient for light to moderate usage of thread specific data.
*
* The hash table contains two additional type of entries. They first
* type is entry is called a 'key' entry and it is added to the hash during
* tsd_create(). It is used to store the address of the destructor function
* and it is used as an anchor point. All tsd entries which use the same
* key will be linked to this entry. This is used during tsd_destory() to
* quickly call the destructor function for all tsd associated with the key.
* The 'key' entry may be looked up with tsd_hash_search() by passing the
* key you wish to lookup and DTOR_PID constant as the pid.
*
* The second type of entry is called a 'pid' entry and it is added to the
* hash the first time a process set a key. The 'pid' entry is also used
* as an anchor and all tsd for the process will be linked to it. This
* list is using during tsd_exit() to ensure all registered destructors
* are run for the process. The 'pid' entry may be looked up with
* tsd_hash_search() by passing the PID_KEY constant as the key, and
* the process pid. Note that tsd_exit() is called by thread_exit()
* so if your using the Solaris thread API you should not need to call
* tsd_exit() directly.
*
\*****************************************************************************/
#include <sys/kmem.h>
#include <sys/thread.h>
#include <sys/tsd.h>
#include <linux/hash.h>
typedef struct tsd_hash_bin {
spinlock_t hb_lock;
struct hlist_head hb_head;
} tsd_hash_bin_t;
typedef struct tsd_hash_table {
spinlock_t ht_lock;
uint_t ht_bits;
uint_t ht_key;
tsd_hash_bin_t *ht_bins;
} tsd_hash_table_t;
typedef struct tsd_hash_entry {
uint_t he_key;
pid_t he_pid;
dtor_func_t he_dtor;
void *he_value;
struct hlist_node he_list;
struct list_head he_key_list;
struct list_head he_pid_list;
} tsd_hash_entry_t;
static tsd_hash_table_t *tsd_hash_table = NULL;
/*
* tsd_hash_search - searches hash table for tsd_hash_entry
* @table: hash table
* @key: search key
* @pid: search pid
*/
static tsd_hash_entry_t *
tsd_hash_search(tsd_hash_table_t *table, uint_t key, pid_t pid)
{
struct hlist_node *node;
tsd_hash_entry_t *entry;
tsd_hash_bin_t *bin;
ulong_t hash;
hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
bin = &table->ht_bins[hash];
spin_lock(&bin->hb_lock);
hlist_for_each(node, &bin->hb_head) {
entry = list_entry(node, tsd_hash_entry_t, he_list);
if ((entry->he_key == key) && (entry->he_pid == pid)) {
spin_unlock(&bin->hb_lock);
return (entry);
}
}
spin_unlock(&bin->hb_lock);
return (NULL);
}
/*
* tsd_hash_dtor - call the destructor and free all entries on the list
* @work: list of hash entries
*
* For a list of entries which have all already been removed from the
* hash call their registered destructor then free the associated memory.
*/
static void
tsd_hash_dtor(struct hlist_head *work)
{
tsd_hash_entry_t *entry;
while (!hlist_empty(work)) {
entry = hlist_entry(work->first, tsd_hash_entry_t, he_list);
hlist_del(&entry->he_list);
if (entry->he_dtor && entry->he_pid != DTOR_PID)
entry->he_dtor(entry->he_value);
kmem_free(entry, sizeof(tsd_hash_entry_t));
}
}
/*
* tsd_hash_add - adds an entry to hash table
* @table: hash table
* @key: search key
* @pid: search pid
*
* The caller is responsible for ensuring the unique key/pid do not
* already exist in the hash table. This possible because all entries
* are thread specific thus a concurrent thread will never attempt to
* add this key/pid. Because multiple bins must be checked to add
* links to the dtor and pid entries the entire table is locked.
*/
static int
tsd_hash_add(tsd_hash_table_t *table, uint_t key, pid_t pid, void *value)
{
tsd_hash_entry_t *entry, *dtor_entry, *pid_entry;
tsd_hash_bin_t *bin;
ulong_t hash;
int rc = 0;
ASSERT3P(tsd_hash_search(table, key, pid), ==, NULL);
/* New entry allocate structure, set value, and add to hash */
entry = kmem_alloc(sizeof(tsd_hash_entry_t), KM_PUSHPAGE);
if (entry == NULL)
return (ENOMEM);
entry->he_key = key;
entry->he_pid = pid;
entry->he_value = value;
INIT_HLIST_NODE(&entry->he_list);
INIT_LIST_HEAD(&entry->he_key_list);
INIT_LIST_HEAD(&entry->he_pid_list);
spin_lock(&table->ht_lock);
/* Destructor entry must exist for all valid keys */
dtor_entry = tsd_hash_search(table, entry->he_key, DTOR_PID);
ASSERT3P(dtor_entry, !=, NULL);
entry->he_dtor = dtor_entry->he_dtor;
/* Process entry must exist for all valid processes */
pid_entry = tsd_hash_search(table, PID_KEY, entry->he_pid);
ASSERT3P(pid_entry, !=, NULL);
hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
bin = &table->ht_bins[hash];
spin_lock(&bin->hb_lock);
/* Add to the hash, key, and pid lists */
hlist_add_head(&entry->he_list, &bin->hb_head);
list_add(&entry->he_key_list, &dtor_entry->he_key_list);
list_add(&entry->he_pid_list, &pid_entry->he_pid_list);
spin_unlock(&bin->hb_lock);
spin_unlock(&table->ht_lock);
return (rc);
}
/*
* tsd_hash_add_key - adds a destructor entry to the hash table
* @table: hash table
* @keyp: search key
* @dtor: key destructor
*
* For every unique key there is a single entry in the hash which is used
* as anchor. All other thread specific entries for this key are linked
* to this anchor via the 'he_key_list' list head. On return they keyp
* will be set to the next available key for the hash table.
*/
static int
tsd_hash_add_key(tsd_hash_table_t *table, uint_t *keyp, dtor_func_t dtor)
{
tsd_hash_entry_t *tmp_entry, *entry;
tsd_hash_bin_t *bin;
ulong_t hash;
int keys_checked = 0;
ASSERT3P(table, !=, NULL);
/* Allocate entry to be used as a destructor for this key */
entry = kmem_alloc(sizeof(tsd_hash_entry_t), KM_PUSHPAGE);
if (entry == NULL)
return (ENOMEM);
/* Determine next available key value */
spin_lock(&table->ht_lock);
do {
/* Limited to TSD_KEYS_MAX concurrent unique keys */
if (table->ht_key++ > TSD_KEYS_MAX)
table->ht_key = 1;
/* Ensure failure when all TSD_KEYS_MAX keys are in use */
if (keys_checked++ >= TSD_KEYS_MAX) {
spin_unlock(&table->ht_lock);
return (ENOENT);
}
tmp_entry = tsd_hash_search(table, table->ht_key, DTOR_PID);
} while (tmp_entry);
/* Add destructor entry in to hash table */
entry->he_key = *keyp = table->ht_key;
entry->he_pid = DTOR_PID;
entry->he_dtor = dtor;
entry->he_value = NULL;
INIT_HLIST_NODE(&entry->he_list);
INIT_LIST_HEAD(&entry->he_key_list);
INIT_LIST_HEAD(&entry->he_pid_list);
hash = hash_long((ulong_t)*keyp * (ulong_t)DTOR_PID, table->ht_bits);
bin = &table->ht_bins[hash];
spin_lock(&bin->hb_lock);
hlist_add_head(&entry->he_list, &bin->hb_head);
spin_unlock(&bin->hb_lock);
spin_unlock(&table->ht_lock);
return (0);
}
/*
* tsd_hash_add_pid - adds a process entry to the hash table
* @table: hash table
* @pid: search pid
*
* For every process these is a single entry in the hash which is used
* as anchor. All other thread specific entries for this process are
* linked to this anchor via the 'he_pid_list' list head.
*/
static int
tsd_hash_add_pid(tsd_hash_table_t *table, pid_t pid)
{
tsd_hash_entry_t *entry;
tsd_hash_bin_t *bin;
ulong_t hash;
/* Allocate entry to be used as the process reference */
entry = kmem_alloc(sizeof(tsd_hash_entry_t), KM_PUSHPAGE);
if (entry == NULL)
return (ENOMEM);
spin_lock(&table->ht_lock);
entry->he_key = PID_KEY;
entry->he_pid = pid;
entry->he_dtor = NULL;
entry->he_value = NULL;
INIT_HLIST_NODE(&entry->he_list);
INIT_LIST_HEAD(&entry->he_key_list);
INIT_LIST_HEAD(&entry->he_pid_list);
hash = hash_long((ulong_t)PID_KEY * (ulong_t)pid, table->ht_bits);
bin = &table->ht_bins[hash];
spin_lock(&bin->hb_lock);
hlist_add_head(&entry->he_list, &bin->hb_head);
spin_unlock(&bin->hb_lock);
spin_unlock(&table->ht_lock);
return (0);
}
/*
* tsd_hash_del - delete an entry from hash table, key, and pid lists
* @table: hash table
* @key: search key
* @pid: search pid
*/
static void
tsd_hash_del(tsd_hash_table_t *table, tsd_hash_entry_t *entry)
{
ASSERT(spin_is_locked(&table->ht_lock));
hlist_del(&entry->he_list);
list_del_init(&entry->he_key_list);
list_del_init(&entry->he_pid_list);
}
/*
* tsd_hash_table_init - allocate a hash table
* @bits: hash table size
*
* A hash table with 2^bits bins will be created, it may not be resized
* after the fact and must be free'd with tsd_hash_table_fini().
*/
static tsd_hash_table_t *
tsd_hash_table_init(uint_t bits)
{
tsd_hash_table_t *table;
int hash, size = (1 << bits);
table = kmem_zalloc(sizeof(tsd_hash_table_t), KM_SLEEP);
if (table == NULL)
return (NULL);
table->ht_bins = kmem_zalloc(sizeof(tsd_hash_bin_t) * size, KM_SLEEP);
if (table->ht_bins == NULL) {
kmem_free(table, sizeof(tsd_hash_table_t));
return (NULL);
}
for (hash = 0; hash < size; hash++) {
spin_lock_init(&table->ht_bins[hash].hb_lock);
INIT_HLIST_HEAD(&table->ht_bins[hash].hb_head);
}
spin_lock_init(&table->ht_lock);
table->ht_bits = bits;
table->ht_key = 1;
return (table);
}
/*
* tsd_hash_table_fini - free a hash table
* @table: hash table
*
* Free a hash table allocated by tsd_hash_table_init(). If the hash
* table is not empty this function will call the proper destructor for
* all remaining entries before freeing the memory used by those entries.
*/
static void
tsd_hash_table_fini(tsd_hash_table_t *table)
{
HLIST_HEAD(work);
tsd_hash_bin_t *bin;
tsd_hash_entry_t *entry;
int size, i;
ASSERT3P(table, !=, NULL);
spin_lock(&table->ht_lock);
for (i = 0, size = (1 << table->ht_bits); i < size; i++) {
bin = &table->ht_bins[i];
spin_lock(&bin->hb_lock);
while (!hlist_empty(&bin->hb_head)) {
entry = hlist_entry(bin->hb_head.first,
tsd_hash_entry_t, he_list);
tsd_hash_del(table, entry);
hlist_add_head(&entry->he_list, &work);
}
spin_unlock(&bin->hb_lock);
}
spin_unlock(&table->ht_lock);
tsd_hash_dtor(&work);
kmem_free(table->ht_bins, sizeof(tsd_hash_bin_t)*(1<<table->ht_bits));
kmem_free(table, sizeof(tsd_hash_table_t));
}
/*
* tsd_remove_entry - remove a tsd entry for this thread
* @entry: entry to remove
*
* Remove the thread specific data @entry for this thread.
* If this is the last entry for this thread, also remove the PID entry.
*/
static void
tsd_remove_entry(tsd_hash_entry_t *entry)
{
HLIST_HEAD(work);
tsd_hash_table_t *table;
tsd_hash_entry_t *pid_entry;
tsd_hash_bin_t *pid_entry_bin, *entry_bin;
ulong_t hash;
table = tsd_hash_table;
ASSERT3P(table, !=, NULL);
ASSERT3P(entry, !=, NULL);
spin_lock(&table->ht_lock);
hash = hash_long((ulong_t)entry->he_key *
(ulong_t)entry->he_pid, table->ht_bits);
entry_bin = &table->ht_bins[hash];
/* save the possible pid_entry */
pid_entry = list_entry(entry->he_pid_list.next, tsd_hash_entry_t,
he_pid_list);
/* remove entry */
spin_lock(&entry_bin->hb_lock);
tsd_hash_del(table, entry);
hlist_add_head(&entry->he_list, &work);
spin_unlock(&entry_bin->hb_lock);
/* if pid_entry is indeed pid_entry, then remove it if it's empty */
if (pid_entry->he_key == PID_KEY &&
list_empty(&pid_entry->he_pid_list)) {
hash = hash_long((ulong_t)pid_entry->he_key *
(ulong_t)pid_entry->he_pid, table->ht_bits);
pid_entry_bin = &table->ht_bins[hash];
spin_lock(&pid_entry_bin->hb_lock);
tsd_hash_del(table, pid_entry);
hlist_add_head(&pid_entry->he_list, &work);
spin_unlock(&pid_entry_bin->hb_lock);
}
spin_unlock(&table->ht_lock);
tsd_hash_dtor(&work);
}
/*
* tsd_set - set thread specific data
* @key: lookup key
* @value: value to set
*
* Caller must prevent racing tsd_create() or tsd_destroy(), protected
* from racing tsd_get() or tsd_set() because it is thread specific.
* This function has been optimized to be fast for the update case.
* When setting the tsd initially it will be slower due to additional
* required locking and potential memory allocations.
*/
int
tsd_set(uint_t key, void *value)
{
tsd_hash_table_t *table;
tsd_hash_entry_t *entry;
pid_t pid;
int rc;
/* mark remove if value is NULL */
boolean_t remove = (value == NULL);
table = tsd_hash_table;
pid = curthread->pid;
ASSERT3P(table, !=, NULL);
if ((key == 0) || (key > TSD_KEYS_MAX))
return (EINVAL);
/* Entry already exists in hash table update value */
entry = tsd_hash_search(table, key, pid);
if (entry) {
entry->he_value = value;
/* remove the entry */
if (remove)
tsd_remove_entry(entry);
return (0);
}
/* don't create entry if value is NULL */
if (remove)
return (0);
/* Add a process entry to the hash if not yet exists */
entry = tsd_hash_search(table, PID_KEY, pid);
if (entry == NULL) {
rc = tsd_hash_add_pid(table, pid);
if (rc)
return (rc);
}
rc = tsd_hash_add(table, key, pid, value);
return (rc);
}
EXPORT_SYMBOL(tsd_set);
/*
* tsd_get - get thread specific data
* @key: lookup key
*
* Caller must prevent racing tsd_create() or tsd_destroy(). This
* implementation is designed to be fast and scalable, it does not
* lock the entire table only a single hash bin.
*/
void *
tsd_get(uint_t key)
{
tsd_hash_entry_t *entry;
ASSERT3P(tsd_hash_table, !=, NULL);
if ((key == 0) || (key > TSD_KEYS_MAX))
return (NULL);
entry = tsd_hash_search(tsd_hash_table, key, curthread->pid);
if (entry == NULL)
return (NULL);
return (entry->he_value);
}
EXPORT_SYMBOL(tsd_get);
/*
* tsd_create - create thread specific data key
* @keyp: lookup key address
* @dtor: destructor called during tsd_destroy() or tsd_exit()
*
* Provided key must be set to 0 or it assumed to be already in use.
* The dtor is allowed to be NULL in which case no additional cleanup
* for the data is performed during tsd_destroy() or tsd_exit().
*
* Caller must prevent racing tsd_set() or tsd_get(), this function is
* safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
*/
void
tsd_create(uint_t *keyp, dtor_func_t dtor)
{
ASSERT3P(keyp, !=, NULL);
if (*keyp)
return;
(void)tsd_hash_add_key(tsd_hash_table, keyp, dtor);
}
EXPORT_SYMBOL(tsd_create);
/*
* tsd_destroy - destroy thread specific data
* @keyp: lookup key address
*
* Destroys the thread specific data on all threads which use this key.
*
* Caller must prevent racing tsd_set() or tsd_get(), this function is
* safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
*/
void
tsd_destroy(uint_t *keyp)
{
HLIST_HEAD(work);
tsd_hash_table_t *table;
tsd_hash_entry_t *dtor_entry, *entry;
tsd_hash_bin_t *dtor_entry_bin, *entry_bin;
ulong_t hash;
table = tsd_hash_table;
ASSERT3P(table, !=, NULL);
spin_lock(&table->ht_lock);
dtor_entry = tsd_hash_search(table, *keyp, DTOR_PID);
if (dtor_entry == NULL) {
spin_unlock(&table->ht_lock);
return;
}
/*
* All threads which use this key must be linked off of the
* DTOR_PID entry. They are removed from the hash table and
* linked in to a private working list to be destroyed.
*/
while (!list_empty(&dtor_entry->he_key_list)) {
entry = list_entry(dtor_entry->he_key_list.next,
tsd_hash_entry_t, he_key_list);
ASSERT3U(dtor_entry->he_key, ==, entry->he_key);
ASSERT3P(dtor_entry->he_dtor, ==, entry->he_dtor);
hash = hash_long((ulong_t)entry->he_key *
(ulong_t)entry->he_pid, table->ht_bits);
entry_bin = &table->ht_bins[hash];
spin_lock(&entry_bin->hb_lock);
tsd_hash_del(table, entry);
hlist_add_head(&entry->he_list, &work);
spin_unlock(&entry_bin->hb_lock);
}
hash = hash_long((ulong_t)dtor_entry->he_key *
(ulong_t)dtor_entry->he_pid, table->ht_bits);
dtor_entry_bin = &table->ht_bins[hash];
spin_lock(&dtor_entry_bin->hb_lock);
tsd_hash_del(table, dtor_entry);
hlist_add_head(&dtor_entry->he_list, &work);
spin_unlock(&dtor_entry_bin->hb_lock);
spin_unlock(&table->ht_lock);
tsd_hash_dtor(&work);
*keyp = 0;
}
EXPORT_SYMBOL(tsd_destroy);
/*
* tsd_exit - destroys all thread specific data for this thread
*
* Destroys all the thread specific data for this thread.
*
* Caller must prevent racing tsd_set() or tsd_get(), this function is
* safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
*/
void
tsd_exit(void)
{
HLIST_HEAD(work);
tsd_hash_table_t *table;
tsd_hash_entry_t *pid_entry, *entry;
tsd_hash_bin_t *pid_entry_bin, *entry_bin;
ulong_t hash;
table = tsd_hash_table;
ASSERT3P(table, !=, NULL);
spin_lock(&table->ht_lock);
pid_entry = tsd_hash_search(table, PID_KEY, curthread->pid);
if (pid_entry == NULL) {
spin_unlock(&table->ht_lock);
return;
}
/*
* All keys associated with this pid must be linked off of the
* PID_KEY entry. They are removed from the hash table and
* linked in to a private working list to be destroyed.
*/
while (!list_empty(&pid_entry->he_pid_list)) {
entry = list_entry(pid_entry->he_pid_list.next,
tsd_hash_entry_t, he_pid_list);
ASSERT3U(pid_entry->he_pid, ==, entry->he_pid);
hash = hash_long((ulong_t)entry->he_key *
(ulong_t)entry->he_pid, table->ht_bits);
entry_bin = &table->ht_bins[hash];
spin_lock(&entry_bin->hb_lock);
tsd_hash_del(table, entry);
hlist_add_head(&entry->he_list, &work);
spin_unlock(&entry_bin->hb_lock);
}
hash = hash_long((ulong_t)pid_entry->he_key *
(ulong_t)pid_entry->he_pid, table->ht_bits);
pid_entry_bin = &table->ht_bins[hash];
spin_lock(&pid_entry_bin->hb_lock);
tsd_hash_del(table, pid_entry);
hlist_add_head(&pid_entry->he_list, &work);
spin_unlock(&pid_entry_bin->hb_lock);
spin_unlock(&table->ht_lock);
tsd_hash_dtor(&work);
}
EXPORT_SYMBOL(tsd_exit);
int
spl_tsd_init(void)
{
tsd_hash_table = tsd_hash_table_init(TSD_HASH_TABLE_BITS_DEFAULT);
if (tsd_hash_table == NULL)
return (1);
return (0);
}
void
spl_tsd_fini(void)
{
tsd_hash_table_fini(tsd_hash_table);
tsd_hash_table = NULL;
}