From 49ad53c46fda1854a057f53bf972ba34402d19f3 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 14 Jun 2010 16:14:23 -0700 Subject: [PATCH 1/3] Update fix-taskq to never sleep at interrupt time Updated fix to detect if we are in an interrupt and only sleep if it is safe to do some. I guess it must be safe to sleep under Solaris this must be handled in a sort interrupt handler there --- module/zfs/zio.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 88d80af4e9..db9bb65fdd 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -1048,7 +1048,10 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline) { spa_t *spa = zio->io_spa; zio_type_t t = zio->io_type; - int flags = TQ_SLEEP | (cutinline ? TQ_FRONT : 0); + int flags; + + flags = (cutinline ? TQ_FRONT : 0); + flags |= ((q == ZIO_TASKQ_INTERRUPT) ? TQ_NOSLEEP : TQ_SLEEP); /* * If we're a config writer or a probe, the normal issue and From f9f342c5f2ce0dd3887da54b5ae87be142aea5ba Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 14 Jun 2010 16:20:44 -0700 Subject: [PATCH 2/3] Add fix-pthreads topic branch This is a portability change which removes the dependence of the Solaris thread library. All locations where Solaris thread API was used before have been replaced with equivilant Solaris kernel style thread calls. In user space the kernel style threading API is implemented in term of the portable pthreads library. This includes all threads, mutexs, condition variables, reader/writer locks, and taskqs. --- .topdeps | 1 + .topmsg | 11 + cmd/ztest/ztest.c | 219 ++++++++-------- lib/libuutil/uu_misc.c | 7 +- lib/libzpool/include/sys/zfs_context.h | 114 +++++---- lib/libzpool/kernel.c | 336 +++++++++++++++++++------ lib/libzpool/taskq.c | 22 +- 7 files changed, 471 insertions(+), 239 deletions(-) create mode 100644 .topdeps create mode 100644 .topmsg diff --git a/.topdeps b/.topdeps new file mode 100644 index 0000000000..1f7391f92b --- /dev/null +++ b/.topdeps @@ -0,0 +1 @@ +master diff --git a/.topmsg b/.topmsg new file mode 100644 index 0000000000..4998f11235 --- /dev/null +++ b/.topmsg @@ -0,0 +1,11 @@ +From: Brian Behlendorf +Subject: [PATCH] fix pthreads + +This is a portability change which removes the dependence of the Solaris +thread library. All locations where Solaris thread API was used before +have been replaced with equivilant Solaris kernel style thread calls. +In user space the kernel style threading API is implemented in term of +the portable pthreads library. This includes all threads, mutexs, +condition variables, reader/writer locks, and taskqs. + +Signed-off-by: Brian Behlendorf diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index eed92ec72e..69086a34a2 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -57,6 +57,9 @@ * the transaction group number is less than the current, open txg. * If you add a new test, please do this if applicable. * + * (7) Threads are created with a reduced stack size, for sanity checking. + * Therefore, it's important not to allocate huge buffers on the stack. + * * When run with no arguments, ztest runs for about five minutes and * produces no output if successful. To get a little bit of information, * specify -V. To get more information, specify -VV, and so on. @@ -168,8 +171,8 @@ typedef enum { typedef struct rll { void *rll_writer; int rll_readers; - mutex_t rll_lock; - cond_t rll_cv; + kmutex_t rll_lock; + kcondvar_t rll_cv; } rll_t; typedef struct rl { @@ -206,7 +209,7 @@ typedef struct ztest_ds { uint64_t zd_seq; ztest_od_t *zd_od; /* debugging aid */ char zd_name[MAXNAMELEN]; - mutex_t zd_dirobj_lock; + kmutex_t zd_dirobj_lock; rll_t zd_object_lock[ZTEST_OBJECT_LOCKS]; rll_t zd_range_lock[ZTEST_RANGE_LOCKS]; } ztest_ds_t; @@ -299,8 +302,8 @@ ztest_info_t ztest_info[] = { * The callbacks are ordered by txg number. */ typedef struct ztest_cb_list { - mutex_t zcl_callbacks_lock; - list_t zcl_callbacks; + kmutex_t zcl_callbacks_lock; + list_t zcl_callbacks; } ztest_cb_list_t; /* @@ -319,8 +322,8 @@ typedef struct ztest_shared { uint64_t zs_vdev_aux; uint64_t zs_alloc; uint64_t zs_space; - mutex_t zs_vdev_lock; - rwlock_t zs_name_lock; + kmutex_t zs_vdev_lock; + krwlock_t zs_name_lock; ztest_info_t zs_info[ZTEST_FUNCS]; uint64_t zs_splits; uint64_t zs_mirrors; @@ -876,8 +879,8 @@ ztest_rll_init(rll_t *rll) { rll->rll_writer = NULL; rll->rll_readers = 0; - VERIFY(_mutex_init(&rll->rll_lock, USYNC_THREAD, NULL) == 0); - VERIFY(cond_init(&rll->rll_cv, USYNC_THREAD, NULL) == 0); + mutex_init(&rll->rll_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&rll->rll_cv, NULL, CV_DEFAULT, NULL); } static void @@ -885,32 +888,32 @@ ztest_rll_destroy(rll_t *rll) { ASSERT(rll->rll_writer == NULL); ASSERT(rll->rll_readers == 0); - VERIFY(_mutex_destroy(&rll->rll_lock) == 0); - VERIFY(cond_destroy(&rll->rll_cv) == 0); + mutex_destroy(&rll->rll_lock); + cv_destroy(&rll->rll_cv); } static void ztest_rll_lock(rll_t *rll, rl_type_t type) { - VERIFY(mutex_lock(&rll->rll_lock) == 0); + mutex_enter(&rll->rll_lock); if (type == RL_READER) { while (rll->rll_writer != NULL) - (void) cond_wait(&rll->rll_cv, &rll->rll_lock); + (void) cv_wait(&rll->rll_cv, &rll->rll_lock); rll->rll_readers++; } else { while (rll->rll_writer != NULL || rll->rll_readers) - (void) cond_wait(&rll->rll_cv, &rll->rll_lock); + (void) cv_wait(&rll->rll_cv, &rll->rll_lock); rll->rll_writer = curthread; } - VERIFY(mutex_unlock(&rll->rll_lock) == 0); + mutex_exit(&rll->rll_lock); } static void ztest_rll_unlock(rll_t *rll) { - VERIFY(mutex_lock(&rll->rll_lock) == 0); + mutex_enter(&rll->rll_lock); if (rll->rll_writer) { ASSERT(rll->rll_readers == 0); @@ -922,9 +925,9 @@ ztest_rll_unlock(rll_t *rll) } if (rll->rll_writer == NULL && rll->rll_readers == 0) - VERIFY(cond_broadcast(&rll->rll_cv) == 0); + cv_broadcast(&rll->rll_cv); - VERIFY(mutex_unlock(&rll->rll_lock) == 0); + mutex_exit(&rll->rll_lock); } static void @@ -980,7 +983,7 @@ ztest_zd_init(ztest_ds_t *zd, objset_t *os) zd->zd_seq = 0; dmu_objset_name(os, zd->zd_name); - VERIFY(_mutex_init(&zd->zd_dirobj_lock, USYNC_THREAD, NULL) == 0); + mutex_init(&zd->zd_dirobj_lock, NULL, MUTEX_DEFAULT, NULL); for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++) ztest_rll_init(&zd->zd_object_lock[l]); @@ -992,7 +995,7 @@ ztest_zd_init(ztest_ds_t *zd, objset_t *os) static void ztest_zd_fini(ztest_ds_t *zd) { - VERIFY(_mutex_destroy(&zd->zd_dirobj_lock) == 0); + mutex_destroy(&zd->zd_dirobj_lock); for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++) ztest_rll_destroy(&zd->zd_object_lock[l]); @@ -1729,7 +1732,7 @@ ztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count) int missing = 0; int error; - ASSERT(_mutex_held(&zd->zd_dirobj_lock)); + ASSERT(mutex_held(&zd->zd_dirobj_lock)); for (int i = 0; i < count; i++, od++) { od->od_object = 0; @@ -1769,7 +1772,7 @@ ztest_create(ztest_ds_t *zd, ztest_od_t *od, int count) { int missing = 0; - ASSERT(_mutex_held(&zd->zd_dirobj_lock)); + ASSERT(mutex_held(&zd->zd_dirobj_lock)); for (int i = 0; i < count; i++, od++) { if (missing) { @@ -1814,7 +1817,7 @@ ztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count) int missing = 0; int error; - ASSERT(_mutex_held(&zd->zd_dirobj_lock)); + ASSERT(mutex_held(&zd->zd_dirobj_lock)); od += count - 1; @@ -2028,13 +2031,13 @@ ztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove) int count = size / sizeof (*od); int rv = 0; - VERIFY(mutex_lock(&zd->zd_dirobj_lock) == 0); + mutex_enter(&zd->zd_dirobj_lock); if ((ztest_lookup(zd, od, count) != 0 || remove) && (ztest_remove(zd, od, count) != 0 || ztest_create(zd, od, count) != 0)) rv = -1; zd->zd_od = od; - VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0); + mutex_exit(&zd->zd_dirobj_lock); return (rv); } @@ -2090,7 +2093,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) * Attempt to create an existing pool. It shouldn't matter * what's in the nvroot; we should fail with EEXIST. */ - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 0, 1); VERIFY3U(EEXIST, ==, spa_create(zs->zs_pool, nvroot, NULL, NULL, NULL)); nvlist_free(nvroot); @@ -2098,7 +2101,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) VERIFY3U(EBUSY, ==, spa_destroy(zs->zs_pool)); spa_close(spa, FTAG); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } static vdev_t * @@ -2151,7 +2154,7 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) nvlist_t *nvroot; int error; - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * zopt_raidz; spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); @@ -2177,9 +2180,9 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) * dmu_objset_destroy() to fail with EBUSY thus * leaving the dataset in an inconsistent state. */ - VERIFY(rw_wrlock(&ztest_shared->zs_name_lock) == 0); + rw_enter(&ztest_shared->zs_name_lock, RW_WRITER); error = spa_vdev_remove(spa, guid, B_FALSE); - VERIFY(rw_unlock(&ztest_shared->zs_name_lock) == 0); + rw_exit(&ztest_shared->zs_name_lock); if (error && error != EEXIST) fatal(0, "spa_vdev_remove() = %d", error); @@ -2201,7 +2204,7 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) fatal(0, "spa_vdev_add() = %d", error); } - VERIFY(mutex_unlock(&ztest_shared->zs_vdev_lock) == 0); + mutex_exit(&ztest_shared->zs_vdev_lock); } /* @@ -2227,7 +2230,7 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id) aux = ZPOOL_CONFIG_L2CACHE; } - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); @@ -2283,7 +2286,7 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id) fatal(0, "spa_vdev_remove(%llu) = %d", guid, error); } - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); } /* @@ -2300,11 +2303,11 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id) uint_t c, children, schildren = 0, lastlogid = 0; int error = 0; - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); /* ensure we have a useable config; mirrors of raidz aren't supported */ if (zs->zs_mirrors < 3 || zopt_raidz > 1) { - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); return; } @@ -2363,9 +2366,9 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id) spa_config_exit(spa, SCL_VDEV, FTAG); - (void) rw_wrlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_WRITER); error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); nvlist_free(config); @@ -2378,7 +2381,7 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id) ++zs->zs_splits; --zs->zs_mirrors; } - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); } @@ -2407,7 +2410,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) int oldvd_is_log; int error, expected_error; - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); leaves = MAX(zs->zs_mirrors, 1) * zopt_raidz; spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); @@ -2468,7 +2471,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) if (error != 0 && error != ENODEV && error != EBUSY && error != ENOTSUP) fatal(0, "detach (%s) returned %d", oldpath, error); - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); return; } @@ -2561,7 +2564,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) (longlong_t)newsize, replacing, error, expected_error); } - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); } /* @@ -2690,7 +2693,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) uint64_t top; uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count; - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); spa_config_enter(spa, SCL_STATE, spa, RW_READER); top = ztest_random_vdev_top(spa, B_TRUE); @@ -2718,7 +2721,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) if (tvd->vdev_state != VDEV_STATE_HEALTHY || psize == 0 || psize >= 4 * zopt_vdev_size) { spa_config_exit(spa, SCL_STATE, spa); - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); return; } ASSERT(psize > 0); @@ -2743,7 +2746,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) "the vdev configuration changed.\n"); } spa_config_exit(spa, SCL_STATE, spa); - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); return; } @@ -2777,7 +2780,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) "intervening vdev offline or remove.\n"); } spa_config_exit(spa, SCL_STATE, spa); - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); return; } @@ -2805,7 +2808,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) } spa_config_exit(spa, SCL_STATE, spa); - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); } /* @@ -2912,7 +2915,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) char name[MAXNAMELEN]; zilog_t *zilog; - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); (void) snprintf(name, MAXNAMELEN, "%s/temp_%llu", zs->zs_pool, (u_longlong_t)id); @@ -2950,7 +2953,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) if (error) { if (error == ENOSPC) { ztest_record_enospc(FTAG); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); return; } fatal(0, "dmu_objset_create(%s) = %d", name, error); @@ -2999,7 +3002,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) dmu_objset_disown(os, FTAG); ztest_zd_fini(&zdtmp); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* @@ -3010,10 +3013,10 @@ ztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id) { ztest_shared_t *zs = ztest_shared; - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); (void) ztest_snapshot_destroy(zd->zd_name, id); (void) ztest_snapshot_create(zd->zd_name, id); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* @@ -3069,7 +3072,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) char *osname = zd->zd_name; int error; - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); ztest_dsl_dataset_cleanup(osname, id); @@ -3149,7 +3152,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) out: ztest_dsl_dataset_cleanup(osname, id); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* @@ -4070,9 +4073,9 @@ ztest_commit_callback(void *arg, int error) ASSERT3U(data->zcd_txg, !=, 0); /* Remove our callback from the list */ - (void) mutex_lock(&zcl.zcl_callbacks_lock); + (void) mutex_enter(&zcl.zcl_callbacks_lock); list_remove(&zcl.zcl_callbacks, data); - (void) mutex_unlock(&zcl.zcl_callbacks_lock); + (void) mutex_exit(&zcl.zcl_callbacks_lock); out: umem_free(data, sizeof (ztest_cb_data_t)); @@ -4088,6 +4091,7 @@ ztest_create_cb_data(objset_t *os, uint64_t txg) cb_data->zcd_txg = txg; cb_data->zcd_spa = dmu_objset_spa(os); + list_link_init(&cb_data->zcd_node); return (cb_data); } @@ -4174,7 +4178,7 @@ ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id) dmu_write(os, od[0].od_object, 0, sizeof (uint64_t), &txg, tx); - (void) mutex_lock(&zcl.zcl_callbacks_lock); + (void) mutex_enter(&zcl.zcl_callbacks_lock); /* * Since commit callbacks don't have any ordering requirement and since @@ -4221,7 +4225,7 @@ ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id) tmp_cb = cb_data[i]; } - (void) mutex_unlock(&zcl.zcl_callbacks_lock); + (void) mutex_exit(&zcl.zcl_callbacks_lock); dmu_tx_commit(tx); } @@ -4238,13 +4242,13 @@ ztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id) }; ztest_shared_t *zs = ztest_shared; - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); for (int p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++) (void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p], ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2)); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* ARGSUSED */ @@ -4254,7 +4258,7 @@ ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id) ztest_shared_t *zs = ztest_shared; nvlist_t *props = NULL; - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); (void) ztest_spa_prop_set_uint64(zs, ZPOOL_PROP_DEDUPDITTO, ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN)); @@ -4266,7 +4270,7 @@ ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id) nvlist_free(props); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* @@ -4284,14 +4288,14 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) char tag[100]; char osname[MAXNAMELEN]; - (void) rw_rdlock(&ztest_shared->zs_name_lock); + (void) rw_enter(&ztest_shared->zs_name_lock, RW_READER); dmu_objset_name(os, osname); - (void) snprintf(snapname, 100, "sh1_%llu", id); + (void) snprintf(snapname, 100, "sh1_%llu", (u_longlong_t)id); (void) snprintf(fullname, 100, "%s@%s", osname, snapname); - (void) snprintf(clonename, 100, "%s/ch1_%llu", osname, id); - (void) snprintf(tag, 100, "%tag_%llu", id); + (void) snprintf(clonename, 100, "%s/ch1_%llu",osname,(u_longlong_t)id); + (void) snprintf(tag, 100, "tag_%llu", (u_longlong_t)id); /* * Clean up from any previous run. @@ -4378,7 +4382,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) VERIFY(dmu_objset_hold(fullname, FTAG, &origin) == ENOENT); out: - (void) rw_unlock(&ztest_shared->zs_name_lock); + (void) rw_exit(&ztest_shared->zs_name_lock); } /* @@ -4406,11 +4410,11 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) uint64_t guid0 = 0; boolean_t islog = B_FALSE; - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); maxfaults = MAXFAULTS(); leaves = MAX(zs->zs_mirrors, 1) * zopt_raidz; mirror_save = zs->zs_mirrors; - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); ASSERT(leaves >= 1); @@ -4504,12 +4508,13 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) * leaving the dataset in an inconsistent state. */ if (islog) - (void) rw_wrlock(&ztest_shared->zs_name_lock); + (void) rw_enter(&ztest_shared->zs_name_lock, + RW_WRITER); VERIFY(vdev_offline(spa, guid0, flags) != EBUSY); if (islog) - (void) rw_unlock(&ztest_shared->zs_name_lock); + (void) rw_exit(&ztest_shared->zs_name_lock); } else { (void) vdev_online(spa, guid0, 0, NULL); } @@ -4536,9 +4541,9 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) if (offset >= fsize) continue; - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); if (mirror_save != zs->zs_mirrors) { - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); (void) close(fd); return; } @@ -4547,7 +4552,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) fatal(1, "can't inject bad word at 0x%llx in %s", offset, pathrand); - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); if (zopt_verbose >= 7) (void) printf("injected bad word into %s," @@ -4587,13 +4592,13 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) * Take the name lock as writer to prevent anyone else from changing * the pool and dataset properies we need to maintain during this test. */ - (void) rw_wrlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_WRITER); if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum, B_FALSE) != 0 || ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1, B_FALSE) != 0) { - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); return; } @@ -4607,7 +4612,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) dmu_tx_hold_write(tx, object, 0, copies * blocksize); txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); if (txg == 0) { - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); return; } @@ -4651,7 +4656,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) zio_buf_free(buf, psize); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* @@ -4680,7 +4685,7 @@ ztest_spa_rename(ztest_ds_t *zd, uint64_t id) char *oldname, *newname; spa_t *spa; - (void) rw_wrlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_WRITER); oldname = zs->zs_pool; newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL); @@ -4720,7 +4725,7 @@ ztest_spa_rename(ztest_ds_t *zd, uint64_t id) umem_free(newname, strlen(newname) + 1); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* @@ -4893,6 +4898,9 @@ ztest_resume_thread(void *arg) ztest_resume(spa); (void) poll(NULL, 0, 100); } + + thread_exit(); + return (NULL); } @@ -4908,6 +4916,7 @@ ztest_deadman_thread(void *arg) (void) poll(NULL, 0, (int)(1000 * delta)); fatal(0, "failed to complete within %d seconds of deadline", grace); + thread_exit(); return (NULL); } @@ -4969,6 +4978,8 @@ ztest_thread(void *arg) ztest_execute(zi, id); } + thread_exit(); + return (NULL); } @@ -5033,18 +5044,18 @@ ztest_dataset_open(ztest_shared_t *zs, int d) ztest_dataset_name(name, zs->zs_pool, d); - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); error = ztest_dataset_create(name); if (error == ENOSPC) { - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); ztest_record_enospc(FTAG); return (error); } ASSERT(error == 0 || error == EEXIST); VERIFY3U(dmu_objset_hold(name, zd, &os), ==, 0); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); ztest_zd_init(zd, os); @@ -5095,9 +5106,10 @@ ztest_dataset_close(ztest_shared_t *zs, int d) static void ztest_run(ztest_shared_t *zs) { - thread_t *tid; + kthread_t **tid; spa_t *spa; - thread_t resume_tid; + kthread_t *resume_thread; + uint64_t object; int error; ztest_exiting = B_FALSE; @@ -5105,8 +5117,8 @@ ztest_run(ztest_shared_t *zs) /* * Initialize parent/child shared state. */ - VERIFY(_mutex_init(&zs->zs_vdev_lock, USYNC_THREAD, NULL) == 0); - VERIFY(rwlock_init(&zs->zs_name_lock, USYNC_THREAD, NULL) == 0); + mutex_init(&zs->zs_vdev_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&zs->zs_name_lock, NULL, RW_DEFAULT, NULL); zs->zs_thread_start = gethrtime(); zs->zs_thread_stop = zs->zs_thread_start + zopt_passtime * NANOSEC; @@ -5115,7 +5127,7 @@ ztest_run(ztest_shared_t *zs) if (ztest_random(100) < zopt_killrate) zs->zs_thread_kill -= ztest_random(zopt_passtime * NANOSEC); - (void) _mutex_init(&zcl.zcl_callbacks_lock, USYNC_THREAD, NULL); + mutex_init(&zcl.zcl_callbacks_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t), offsetof(ztest_cb_data_t, zcd_node)); @@ -5142,14 +5154,14 @@ ztest_run(ztest_shared_t *zs) /* * Create a thread to periodically resume suspended I/O. */ - VERIFY(thr_create(0, 0, ztest_resume_thread, spa, THR_BOUND, - &resume_tid) == 0); + VERIFY3P((resume_thread = thread_create(NULL, 0, ztest_resume_thread, + spa, TS_RUN, NULL, 0, 0)), !=, NULL); /* * Create a deadman thread to abort() if we hang. */ - VERIFY(thr_create(0, 0, ztest_deadman_thread, zs, THR_BOUND, - NULL) == 0); + VERIFY3P(thread_create(NULL, 0, ztest_deadman_thread, zs, + TS_RUN, NULL, 0, 0), !=, NULL); /* * Verify that we can safely inquire about about any object, @@ -5175,7 +5187,7 @@ ztest_run(ztest_shared_t *zs) } zs->zs_enospc_count = 0; - tid = umem_zalloc(zopt_threads * sizeof (thread_t), UMEM_NOFAIL); + tid = umem_zalloc(zopt_threads * sizeof (kthread_t *), UMEM_NOFAIL); if (zopt_verbose >= 4) (void) printf("starting main threads...\n"); @@ -5186,8 +5198,9 @@ ztest_run(ztest_shared_t *zs) for (int t = 0; t < zopt_threads; t++) { if (t < zopt_datasets && ztest_dataset_open(zs, t) != 0) return; - VERIFY(thr_create(0, 0, ztest_thread, (void *)(uintptr_t)t, - THR_BOUND, &tid[t]) == 0); + + VERIFY3P(tid[t] = thread_create(NULL, 0, ztest_thread, + (void *)(uintptr_t)t, TS_RUN, NULL, 0, 0), !=, NULL); } /* @@ -5195,7 +5208,7 @@ ztest_run(ztest_shared_t *zs) * so we don't close datasets while threads are still using them. */ for (int t = zopt_threads - 1; t >= 0; t--) { - VERIFY(thr_join(tid[t], NULL, NULL) == 0); + thread_join(tid[t]->t_tid); if (t < zopt_datasets) ztest_dataset_close(zs, t); } @@ -5205,18 +5218,18 @@ ztest_run(ztest_shared_t *zs) zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa)); zs->zs_space = metaslab_class_get_space(spa_normal_class(spa)); - umem_free(tid, zopt_threads * sizeof (thread_t)); + umem_free(tid, zopt_threads * sizeof (kthread_t *)); /* Kill the resume thread */ ztest_exiting = B_TRUE; - VERIFY(thr_join(resume_tid, NULL, NULL) == 0); + thread_join(resume_thread->t_tid); ztest_resume(spa); /* * Right before closing the pool, kick off a bunch of async I/O; * spa_close() should wait for it to complete. */ - for (uint64_t object = 1; object < 50; object++) + for (object = 1; object < 50; object++) dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20); spa_close(spa, FTAG); @@ -5314,10 +5327,10 @@ ztest_freeze(ztest_shared_t *zs) list_destroy(&zcl.zcl_callbacks); - (void) _mutex_destroy(&zcl.zcl_callbacks_lock); + (void) mutex_destroy(&zcl.zcl_callbacks_lock); - (void) rwlock_destroy(&zs->zs_name_lock); - (void) _mutex_destroy(&zs->zs_vdev_lock); + (void) rw_destroy(&zs->zs_name_lock); + (void) mutex_destroy(&zs->zs_vdev_lock); } void @@ -5372,8 +5385,8 @@ ztest_init(ztest_shared_t *zs) spa_t *spa; nvlist_t *nvroot, *props; - VERIFY(_mutex_init(&zs->zs_vdev_lock, USYNC_THREAD, NULL) == 0); - VERIFY(rwlock_init(&zs->zs_name_lock, USYNC_THREAD, NULL) == 0); + mutex_init(&zs->zs_vdev_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&zs->zs_name_lock, NULL, RW_DEFAULT, NULL); kernel_init(FREAD | FWRITE); diff --git a/lib/libuutil/uu_misc.c b/lib/libuutil/uu_misc.c index 74ec177c11..fc57328c2b 100644 --- a/lib/libuutil/uu_misc.c +++ b/lib/libuutil/uu_misc.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #if !defined(TEXT_DOMAIN) @@ -70,11 +69,12 @@ static va_list uu_panic_args; static pthread_t uu_panic_thread; static uint32_t _uu_main_error; +static __thread int _uu_main_thread = 0; void uu_set_error(uint_t code) { - if (thr_main() != 0) { + if (_uu_main_thread) { _uu_main_error = code; return; } @@ -103,7 +103,7 @@ uu_set_error(uint_t code) uint32_t uu_error(void) { - if (thr_main() != 0) + if (_uu_main_thread) return (_uu_main_error); if (uu_error_key_setup < 0) /* can't happen? */ @@ -251,5 +251,6 @@ uu_release_child(void) static void uu_init(void) { + _uu_main_thread = 1; (void) pthread_atfork(uu_lockup, uu_release, uu_release_child); } diff --git a/lib/libzpool/include/sys/zfs_context.h b/lib/libzpool/include/sys/zfs_context.h index 9a6d712e53..db93a89b8b 100644 --- a/lib/libzpool/include/sys/zfs_context.h +++ b/lib/libzpool/include/sys/zfs_context.h @@ -50,8 +50,7 @@ extern "C" { #include #include #include -#include -#include +#include #include #include #include @@ -91,6 +90,8 @@ extern "C" { #define CE_PANIC 3 /* panic */ #define CE_IGNORE 4 /* print nothing */ +extern int aok; + /* * ZFS debugging */ @@ -196,27 +197,45 @@ _NOTE(CONSTCOND) } while (0) /* * Threads */ -#define curthread ((void *)(uintptr_t)thr_self()) - -typedef struct kthread kthread_t; - -#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \ - zk_thread_create(func, arg) -#define thread_exit() thr_exit(NULL) -#define thread_join(t) panic("libzpool cannot join threads") - -#define newproc(f, a, cid, pri, ctp, pid) (ENOSYS) +#define TS_RUN 0x00000002 +#define STACK_SIZE 8192 /* Linux x86 and amd64 */ /* in libzpool, p0 exists only to have its address taken */ -struct proc { +typedef struct proc { uintptr_t this_is_never_used_dont_dereference_it; -}; +} proc_t; extern struct proc p0; -#define PS_NONE -1 +typedef void (*thread_func_t)(void *); +typedef void (*thread_func_arg_t)(void *); +typedef pthread_t kt_did_t; -extern kthread_t *zk_thread_create(void (*func)(), void *arg); +typedef struct kthread { + kt_did_t t_tid; + thread_func_t t_func; + void * t_arg; +} kthread_t; + +/* XXX tsd_create()/tsd_destroy() missing */ +#define tsd_get(key) pthread_getspecific(key) +#define tsd_set(key, val) pthread_setspecific(key, val) +#define curthread zk_thread_current() +#define thread_exit zk_thread_exit +#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \ + zk_thread_create(stk, stksize, (thread_func_t)func, arg, \ + len, NULL, state, pri) +#define thread_join(t) zk_thread_join(t) +#define newproc(f,a,cid,pri,ctp,pid) (ENOSYS) + +extern kthread_t *zk_thread_current(void); +extern void zk_thread_exit(void); +extern kthread_t *zk_thread_create(caddr_t stk, size_t stksize, + thread_func_t func, void *arg, size_t len, + proc_t *pp, int state, pri_t pri); +extern void zk_thread_join(kt_did_t tid); + +#define PS_NONE -1 #define issig(why) (FALSE) #define ISSIG(thr, why) (FALSE) @@ -224,53 +243,51 @@ extern kthread_t *zk_thread_create(void (*func)(), void *arg); /* * Mutexes */ +#define MTX_MAGIC 0x9522f51362a6e326ull +#define MTX_INIT ((void *)NULL) +#define MTX_DEST ((void *)-1UL) + typedef struct kmutex { void *m_owner; - boolean_t initialized; - mutex_t m_lock; + uint64_t m_magic; + pthread_mutex_t m_lock; } kmutex_t; -#define MUTEX_DEFAULT USYNC_THREAD -#undef MUTEX_HELD -#define MUTEX_HELD(m) _mutex_held(&(m)->m_lock) +#define MUTEX_DEFAULT 0 +#define MUTEX_HELD(m) ((m)->m_owner == curthread) -/* - * Argh -- we have to get cheesy here because the kernel and userland - * have different signatures for the same routine. - */ -extern int _mutex_init(mutex_t *mp, int type, void *arg); -extern int _mutex_destroy(mutex_t *mp); - -#define mutex_init(mp, b, c, d) zmutex_init((kmutex_t *)(mp)) -#define mutex_destroy(mp) zmutex_destroy((kmutex_t *)(mp)) - -extern void zmutex_init(kmutex_t *mp); -extern void zmutex_destroy(kmutex_t *mp); +extern void mutex_init(kmutex_t *mp, char *name, int type, void *cookie); +extern void mutex_destroy(kmutex_t *mp); extern void mutex_enter(kmutex_t *mp); extern void mutex_exit(kmutex_t *mp); extern int mutex_tryenter(kmutex_t *mp); extern void *mutex_owner(kmutex_t *mp); +extern int mutex_held(kmutex_t *mp); /* * RW locks */ +#define RW_MAGIC 0x4d31fb123648e78aull +#define RW_INIT ((void *)NULL) +#define RW_DEST ((void *)-1UL) + typedef struct krwlock { - void *rw_owner; - boolean_t initialized; - rwlock_t rw_lock; + void *rw_owner; + void *rw_wr_owner; + uint64_t rw_magic; + pthread_rwlock_t rw_lock; + uint_t rw_readers; } krwlock_t; typedef int krw_t; #define RW_READER 0 #define RW_WRITER 1 -#define RW_DEFAULT USYNC_THREAD +#define RW_DEFAULT RW_READER -#undef RW_READ_HELD -#define RW_READ_HELD(x) _rw_read_held(&(x)->rw_lock) - -#undef RW_WRITE_HELD -#define RW_WRITE_HELD(x) _rw_write_held(&(x)->rw_lock) +#define RW_READ_HELD(x) ((x)->rw_readers > 0) +#define RW_WRITE_HELD(x) ((x)->rw_wr_owner == curthread) +#define RW_LOCK_HELD(x) (RW_READ_HELD(x) || RW_WRITE_HELD(x)) extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg); extern void rw_destroy(krwlock_t *rwlp); @@ -288,9 +305,14 @@ extern gid_t *crgetgroups(cred_t *cr); /* * Condition variables */ -typedef cond_t kcondvar_t; +#define CV_MAGIC 0xd31ea9a83b1b30c4ull -#define CV_DEFAULT USYNC_THREAD +typedef struct kcondvar { + uint64_t cv_magic; + pthread_cond_t cv; +} kcondvar_t; + +#define CV_DEFAULT 0 extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg); extern void cv_destroy(kcondvar_t *cv); @@ -355,7 +377,7 @@ extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t); extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t); extern void taskq_destroy(taskq_t *); extern void taskq_wait(taskq_t *); -extern int taskq_member(taskq_t *, void *); +extern int taskq_member(taskq_t *, kthread_t *); extern void system_taskq_init(void); extern void system_taskq_fini(void); @@ -482,7 +504,7 @@ extern void delay(clock_t ticks); #define minclsyspri 60 #define maxclsyspri 99 -#define CPU_SEQID (thr_self() & (max_ncpus - 1)) +#define CPU_SEQID (pthread_self() & (max_ncpus - 1)) #define kcred NULL #define CRED() NULL diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index 5284c12532..6f9e383a87 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -59,16 +60,150 @@ struct proc p0; * threads * ========================================================================= */ -/*ARGSUSED*/ -kthread_t * -zk_thread_create(void (*func)(), void *arg) + +pthread_cond_t kthread_cond = PTHREAD_COND_INITIALIZER; +pthread_mutex_t kthread_lock = PTHREAD_MUTEX_INITIALIZER; +pthread_key_t kthread_key; +int kthread_nr = 0; + +static void +thread_init(void) { - thread_t tid; + kthread_t *kt; - VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED, - &tid) == 0); + VERIFY3S(pthread_key_create(&kthread_key, NULL), ==, 0); - return ((void *)(uintptr_t)tid); + /* Create entry for primary kthread */ + kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL); + kt->t_tid = pthread_self(); + kt->t_func = NULL; + + VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0); + + /* Only the main thread should be running at the moment */ + ASSERT3S(kthread_nr, ==, 0); + kthread_nr = 1; +} + +static void +thread_fini(void) +{ + kthread_t *kt = curthread; + + ASSERT(pthread_equal(kt->t_tid, pthread_self())); + ASSERT3P(kt->t_func, ==, NULL); + + umem_free(kt, sizeof(kthread_t)); + + /* Wait for all threads to exit via thread_exit() */ + VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0); + + kthread_nr--; /* Main thread is exiting */ + + while (kthread_nr > 0) + VERIFY3S(pthread_cond_wait(&kthread_cond, &kthread_lock), ==, + 0); + + ASSERT3S(kthread_nr, ==, 0); + VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0); + + VERIFY3S(pthread_key_delete(kthread_key), ==, 0); +} + +kthread_t * +zk_thread_current(void) +{ + kthread_t *kt = pthread_getspecific(kthread_key); + + ASSERT3P(kt, !=, NULL); + + return kt; +} + +void * +zk_thread_helper(void *arg) +{ + kthread_t *kt = (kthread_t *) arg; + + VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0); + + VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0); + kthread_nr++; + VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0); + + kt->t_tid = pthread_self(); + ((thread_func_arg_t) kt->t_func)(kt->t_arg); + + /* Unreachable, thread must exit with thread_exit() */ + abort(); + + return NULL; +} + +kthread_t * +zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg, + size_t len, proc_t *pp, int state, pri_t pri) +{ + kthread_t *kt; + pthread_t tid; + pthread_attr_t attr; + size_t stack; + + /* + * Due to a race when getting/setting the thread ID, currently only + * detached threads are supported. + */ + ASSERT3S(state & ~TS_RUN, ==, 0); + + kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL); + kt->t_func = func; + kt->t_arg = arg; + + /* + * The Solaris kernel stack size in x86/x64 is 8K, so we reduce the + * default stack size in userspace, for sanity checking. + * + * PTHREAD_STACK_MIN is the stack required for a NULL procedure in + * userspace. + * + * XXX: Stack size for other architectures is not being taken into + * account. + */ + stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE); + + VERIFY3S(pthread_attr_init(&attr), ==, 0); + VERIFY3S(pthread_attr_setstacksize(&attr, stack), ==, 0); + VERIFY3S(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED), + ==, 0); + + VERIFY3S(pthread_create(&tid, &attr, &zk_thread_helper, kt), ==, 0); + + VERIFY3S(pthread_attr_destroy(&attr), ==, 0); + + return kt; +} + +void +zk_thread_exit(void) +{ + kthread_t *kt = curthread; + + ASSERT(pthread_equal(kt->t_tid, pthread_self())); + + umem_free(kt, sizeof(kthread_t)); + + pthread_mutex_lock(&kthread_lock); + kthread_nr--; + pthread_mutex_unlock(&kthread_lock); + + pthread_cond_broadcast(&kthread_cond); + pthread_exit(NULL); +} + +void +zk_thread_join(kt_did_t tid) +{ + pthread_join((pthread_t)tid, NULL); } /* @@ -99,42 +234,45 @@ kstat_delete(kstat_t *ksp) * mutexes * ========================================================================= */ + void -zmutex_init(kmutex_t *mp) +mutex_init(kmutex_t *mp, char *name, int type, void *cookie) { - mp->m_owner = NULL; - mp->initialized = B_TRUE; - (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL); + ASSERT3S(type, ==, MUTEX_DEFAULT); + ASSERT3P(cookie, ==, NULL); + mp->m_owner = MTX_INIT; + mp->m_magic = MTX_MAGIC; + VERIFY3S(pthread_mutex_init(&mp->m_lock, NULL), ==, 0); } void -zmutex_destroy(kmutex_t *mp) +mutex_destroy(kmutex_t *mp) { - ASSERT(mp->initialized == B_TRUE); - ASSERT(mp->m_owner == NULL); - (void) _mutex_destroy(&(mp)->m_lock); - mp->m_owner = (void *)-1UL; - mp->initialized = B_FALSE; + ASSERT3U(mp->m_magic, ==, MTX_MAGIC); + ASSERT3P(mp->m_owner, ==, MTX_INIT); + VERIFY3S(pthread_mutex_destroy(&(mp)->m_lock), ==, 0); + mp->m_owner = MTX_DEST; + mp->m_magic = 0; } void mutex_enter(kmutex_t *mp) { - ASSERT(mp->initialized == B_TRUE); - ASSERT(mp->m_owner != (void *)-1UL); - ASSERT(mp->m_owner != curthread); - VERIFY(mutex_lock(&mp->m_lock) == 0); - ASSERT(mp->m_owner == NULL); + ASSERT3U(mp->m_magic, ==, MTX_MAGIC); + ASSERT3P(mp->m_owner, !=, MTX_DEST); + ASSERT3P(mp->m_owner, !=, curthread); + VERIFY3S(pthread_mutex_lock(&mp->m_lock), ==, 0); + ASSERT3P(mp->m_owner, ==, MTX_INIT); mp->m_owner = curthread; } int mutex_tryenter(kmutex_t *mp) { - ASSERT(mp->initialized == B_TRUE); - ASSERT(mp->m_owner != (void *)-1UL); - if (0 == mutex_trylock(&mp->m_lock)) { - ASSERT(mp->m_owner == NULL); + ASSERT3U(mp->m_magic, ==, MTX_MAGIC); + ASSERT3P(mp->m_owner, !=, MTX_DEST); + if (0 == pthread_mutex_trylock(&mp->m_lock)) { + ASSERT3P(mp->m_owner, ==, MTX_INIT); mp->m_owner = curthread; return (1); } else { @@ -145,53 +283,71 @@ mutex_tryenter(kmutex_t *mp) void mutex_exit(kmutex_t *mp) { - ASSERT(mp->initialized == B_TRUE); - ASSERT(mutex_owner(mp) == curthread); - mp->m_owner = NULL; - VERIFY(mutex_unlock(&mp->m_lock) == 0); + ASSERT3U(mp->m_magic, ==, MTX_MAGIC); + ASSERT3P(mutex_owner(mp), ==, curthread); + mp->m_owner = MTX_INIT; + VERIFY3S(pthread_mutex_unlock(&mp->m_lock), ==, 0); } void * mutex_owner(kmutex_t *mp) { - ASSERT(mp->initialized == B_TRUE); + ASSERT3U(mp->m_magic, ==, MTX_MAGIC); return (mp->m_owner); } +int +mutex_held(kmutex_t *mp) +{ + return (mp->m_owner == curthread); +} + /* * ========================================================================= * rwlocks * ========================================================================= */ -/*ARGSUSED*/ + void rw_init(krwlock_t *rwlp, char *name, int type, void *arg) { - rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL); - rwlp->rw_owner = NULL; - rwlp->initialized = B_TRUE; + ASSERT3S(type, ==, RW_DEFAULT); + ASSERT3P(arg, ==, NULL); + VERIFY3S(pthread_rwlock_init(&rwlp->rw_lock, NULL), ==, 0); + rwlp->rw_owner = RW_INIT; + rwlp->rw_wr_owner = RW_INIT; + rwlp->rw_readers = 0; + rwlp->rw_magic = RW_MAGIC; } void rw_destroy(krwlock_t *rwlp) { - rwlock_destroy(&rwlp->rw_lock); - rwlp->rw_owner = (void *)-1UL; - rwlp->initialized = B_FALSE; + ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); + + VERIFY3S(pthread_rwlock_destroy(&rwlp->rw_lock), ==, 0); + rwlp->rw_magic = 0; } void rw_enter(krwlock_t *rwlp, krw_t rw) { - ASSERT(!RW_LOCK_HELD(rwlp)); - ASSERT(rwlp->initialized == B_TRUE); - ASSERT(rwlp->rw_owner != (void *)-1UL); - ASSERT(rwlp->rw_owner != curthread); + ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); + ASSERT3P(rwlp->rw_owner, !=, curthread); + ASSERT3P(rwlp->rw_wr_owner, !=, curthread); - if (rw == RW_READER) - VERIFY(rw_rdlock(&rwlp->rw_lock) == 0); - else - VERIFY(rw_wrlock(&rwlp->rw_lock) == 0); + if (rw == RW_READER) { + VERIFY3S(pthread_rwlock_rdlock(&rwlp->rw_lock), ==, 0); + ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT); + + atomic_inc_uint(&rwlp->rw_readers); + } else { + VERIFY3S(pthread_rwlock_wrlock(&rwlp->rw_lock), ==, 0); + ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT); + ASSERT3U(rwlp->rw_readers, ==, 0); + + rwlp->rw_wr_owner = curthread; + } rwlp->rw_owner = curthread; } @@ -199,11 +355,16 @@ rw_enter(krwlock_t *rwlp, krw_t rw) void rw_exit(krwlock_t *rwlp) { - ASSERT(rwlp->initialized == B_TRUE); - ASSERT(rwlp->rw_owner != (void *)-1UL); + ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); + ASSERT(RW_LOCK_HELD(rwlp)); - rwlp->rw_owner = NULL; - VERIFY(rw_unlock(&rwlp->rw_lock) == 0); + if (RW_READ_HELD(rwlp)) + atomic_dec_uint(&rwlp->rw_readers); + else + rwlp->rw_wr_owner = RW_INIT; + + rwlp->rw_owner = RW_INIT; + VERIFY3S(pthread_rwlock_unlock(&rwlp->rw_lock), ==, 0); } int @@ -211,28 +372,36 @@ rw_tryenter(krwlock_t *rwlp, krw_t rw) { int rv; - ASSERT(rwlp->initialized == B_TRUE); - ASSERT(rwlp->rw_owner != (void *)-1UL); + ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); if (rw == RW_READER) - rv = rw_tryrdlock(&rwlp->rw_lock); + rv = pthread_rwlock_tryrdlock(&rwlp->rw_lock); else - rv = rw_trywrlock(&rwlp->rw_lock); + rv = pthread_rwlock_trywrlock(&rwlp->rw_lock); if (rv == 0) { + ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT); + + if (rw == RW_READER) + atomic_inc_uint(&rwlp->rw_readers); + else { + ASSERT3U(rwlp->rw_readers, ==, 0); + rwlp->rw_wr_owner = curthread; + } + rwlp->rw_owner = curthread; return (1); } + VERIFY3S(rv, ==, EBUSY); + return (0); } -/*ARGSUSED*/ int rw_tryupgrade(krwlock_t *rwlp) { - ASSERT(rwlp->initialized == B_TRUE); - ASSERT(rwlp->rw_owner != (void *)-1UL); + ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); return (0); } @@ -242,26 +411,32 @@ rw_tryupgrade(krwlock_t *rwlp) * condition variables * ========================================================================= */ -/*ARGSUSED*/ + void cv_init(kcondvar_t *cv, char *name, int type, void *arg) { - VERIFY(cond_init(cv, type, NULL) == 0); + ASSERT3S(type, ==, CV_DEFAULT); + cv->cv_magic = CV_MAGIC; + VERIFY3S(pthread_cond_init(&cv->cv, NULL), ==, 0); } void cv_destroy(kcondvar_t *cv) { - VERIFY(cond_destroy(cv) == 0); + ASSERT3U(cv->cv_magic, ==, CV_MAGIC); + VERIFY3S(pthread_cond_destroy(&cv->cv), ==, 0); + cv->cv_magic = 0; } void cv_wait(kcondvar_t *cv, kmutex_t *mp) { - ASSERT(mutex_owner(mp) == curthread); - mp->m_owner = NULL; - int ret = cond_wait(cv, &mp->m_lock); - VERIFY(ret == 0 || ret == EINTR); + ASSERT3U(cv->cv_magic, ==, CV_MAGIC); + ASSERT3P(mutex_owner(mp), ==, curthread); + mp->m_owner = MTX_INIT; + int ret = pthread_cond_wait(&cv->cv, &mp->m_lock); + if (ret != 0) + VERIFY3S(ret, ==, EINTR); mp->m_owner = curthread; } @@ -269,29 +444,38 @@ clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) { int error; + struct timeval tv; timestruc_t ts; clock_t delta; + ASSERT3U(cv->cv_magic, ==, CV_MAGIC); + top: delta = abstime - ddi_get_lbolt(); if (delta <= 0) return (-1); - ts.tv_sec = delta / hz; - ts.tv_nsec = (delta % hz) * (NANOSEC / hz); + VERIFY(gettimeofday(&tv, NULL) == 0); - ASSERT(mutex_owner(mp) == curthread); - mp->m_owner = NULL; - error = cond_reltimedwait(cv, &mp->m_lock, &ts); + ts.tv_sec = tv.tv_sec + delta / hz; + ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz); + if (ts.tv_nsec >= NANOSEC) { + ts.tv_sec++; + ts.tv_nsec -= NANOSEC; + } + + ASSERT3P(mutex_owner(mp), ==, curthread); + mp->m_owner = MTX_INIT; + error = pthread_cond_timedwait(&cv->cv, &mp->m_lock, &ts); mp->m_owner = curthread; - if (error == ETIME) + if (error == ETIMEDOUT) return (-1); if (error == EINTR) goto top; - ASSERT(error == 0); + VERIFY3S(error, ==, 0); return (1); } @@ -299,13 +483,15 @@ top: void cv_signal(kcondvar_t *cv) { - VERIFY(cond_signal(cv) == 0); + ASSERT3U(cv->cv_magic, ==, CV_MAGIC); + VERIFY3S(pthread_cond_signal(&cv->cv), ==, 0); } void cv_broadcast(kcondvar_t *cv) { - VERIFY(cond_broadcast(cv) == 0); + ASSERT3U(cv->cv_magic, ==, CV_MAGIC); + VERIFY3S(pthread_cond_broadcast(&cv->cv), ==, 0); } /* @@ -565,7 +751,7 @@ __dprintf(const char *file, const char *func, int line, const char *fmt, ...) if (dprintf_find_string("pid")) (void) printf("%d ", getpid()); if (dprintf_find_string("tid")) - (void) printf("%u ", thr_self()); + (void) printf("%u ", (uint_t) pthread_self()); if (dprintf_find_string("cpu")) (void) printf("%u ", getcpuid()); if (dprintf_find_string("time")) @@ -818,6 +1004,7 @@ kernel_init(int mode) VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1); VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1); + thread_init(); system_taskq_init(); spa_init(mode); @@ -829,6 +1016,7 @@ kernel_fini(void) spa_fini(); system_taskq_fini(); + thread_fini(); close(random_fd); close(urandom_fd); diff --git a/lib/libzpool/taskq.c b/lib/libzpool/taskq.c index 8db5d11c13..36c0ec7dfc 100644 --- a/lib/libzpool/taskq.c +++ b/lib/libzpool/taskq.c @@ -42,7 +42,7 @@ struct taskq { krwlock_t tq_threadlock; kcondvar_t tq_dispatch_cv; kcondvar_t tq_wait_cv; - thread_t *tq_threadlist; + kthread_t **tq_threadlist; int tq_flags; int tq_active; int tq_nthreads; @@ -154,7 +154,7 @@ taskq_wait(taskq_t *tq) mutex_exit(&tq->tq_lock); } -static void * +static void taskq_thread(void *arg) { taskq_t *tq = arg; @@ -183,7 +183,7 @@ taskq_thread(void *arg) tq->tq_nthreads--; cv_broadcast(&tq->tq_wait_cv); mutex_exit(&tq->tq_lock); - return (NULL); + thread_exit(); } /*ARGSUSED*/ @@ -219,7 +219,7 @@ taskq_create(const char *name, int nthreads, pri_t pri, tq->tq_maxalloc = maxalloc; tq->tq_task.task_next = &tq->tq_task; tq->tq_task.task_prev = &tq->tq_task; - tq->tq_threadlist = kmem_alloc(nthreads * sizeof (thread_t), KM_SLEEP); + tq->tq_threadlist = kmem_alloc(nthreads*sizeof(kthread_t *), KM_SLEEP); if (flags & TASKQ_PREPOPULATE) { mutex_enter(&tq->tq_lock); @@ -229,8 +229,8 @@ taskq_create(const char *name, int nthreads, pri_t pri, } for (t = 0; t < nthreads; t++) - (void) thr_create(0, 0, taskq_thread, - tq, THR_BOUND, &tq->tq_threadlist[t]); + VERIFY((tq->tq_threadlist[t] = thread_create(NULL, 0, + taskq_thread, tq, TS_RUN, NULL, 0, 0)) != NULL); return (tq); } @@ -238,7 +238,6 @@ taskq_create(const char *name, int nthreads, pri_t pri, void taskq_destroy(taskq_t *tq) { - int t; int nthreads = tq->tq_nthreads; taskq_wait(tq); @@ -259,10 +258,7 @@ taskq_destroy(taskq_t *tq) mutex_exit(&tq->tq_lock); - for (t = 0; t < nthreads; t++) - (void) thr_join(tq->tq_threadlist[t], NULL, NULL); - - kmem_free(tq->tq_threadlist, nthreads * sizeof (thread_t)); + kmem_free(tq->tq_threadlist, nthreads * sizeof (kthread_t *)); rw_destroy(&tq->tq_threadlock); mutex_destroy(&tq->tq_lock); @@ -274,7 +270,7 @@ taskq_destroy(taskq_t *tq) } int -taskq_member(taskq_t *tq, void *t) +taskq_member(taskq_t *tq, kthread_t *t) { int i; @@ -282,7 +278,7 @@ taskq_member(taskq_t *tq, void *t) return (1); for (i = 0; i < tq->tq_nthreads; i++) - if (tq->tq_threadlist[i] == (thread_t)(uintptr_t)t) + if (tq->tq_threadlist[i] == t) return (1); return (0); From 6574b7cad4e2381097e830bfb31057c2f15ffbde Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 14 Jun 2010 16:21:57 -0700 Subject: [PATCH 3/3] New TopGit dependency: fix-pthreads --- .topdeps | 1 + 1 file changed, 1 insertion(+) diff --git a/.topdeps b/.topdeps index f6d854c193..9f596a1feb 100644 --- a/.topdeps +++ b/.topdeps @@ -11,3 +11,4 @@ fix-strncat fix-deadcode fix-acl fix-error-handling +fix-pthreads