diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 97211e01e7..83ca0df838 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -57,6 +57,9 @@ * the transaction group number is less than the current, open txg. * If you add a new test, please do this if applicable. * + * (7) Threads are created with a reduced stack size, for sanity checking. + * Therefore, it's important not to allocate huge buffers on the stack. + * * When run with no arguments, ztest runs for about five minutes and * produces no output if successful. To get a little bit of information, * specify -V. To get more information, specify -VV, and so on. @@ -168,8 +171,8 @@ typedef enum { typedef struct rll { void *rll_writer; int rll_readers; - mutex_t rll_lock; - cond_t rll_cv; + kmutex_t rll_lock; + kcondvar_t rll_cv; } rll_t; typedef struct rl { @@ -206,7 +209,7 @@ typedef struct ztest_ds { uint64_t zd_seq; ztest_od_t *zd_od; /* debugging aid */ char zd_name[MAXNAMELEN]; - mutex_t zd_dirobj_lock; + kmutex_t zd_dirobj_lock; rll_t zd_object_lock[ZTEST_OBJECT_LOCKS]; rll_t zd_range_lock[ZTEST_RANGE_LOCKS]; } ztest_ds_t; @@ -299,8 +302,8 @@ ztest_info_t ztest_info[] = { * The callbacks are ordered by txg number. */ typedef struct ztest_cb_list { - mutex_t zcl_callbacks_lock; - list_t zcl_callbacks; + kmutex_t zcl_callbacks_lock; + list_t zcl_callbacks; } ztest_cb_list_t; /* @@ -319,8 +322,8 @@ typedef struct ztest_shared { uint64_t zs_vdev_aux; uint64_t zs_alloc; uint64_t zs_space; - mutex_t zs_vdev_lock; - rwlock_t zs_name_lock; + kmutex_t zs_vdev_lock; + krwlock_t zs_name_lock; ztest_info_t zs_info[ZTEST_FUNCS]; uint64_t zs_splits; uint64_t zs_mirrors; @@ -876,8 +879,8 @@ ztest_rll_init(rll_t *rll) { rll->rll_writer = NULL; rll->rll_readers = 0; - VERIFY(_mutex_init(&rll->rll_lock, USYNC_THREAD, NULL) == 0); - VERIFY(cond_init(&rll->rll_cv, USYNC_THREAD, NULL) == 0); + mutex_init(&rll->rll_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&rll->rll_cv, NULL, CV_DEFAULT, NULL); } static void @@ -885,32 +888,32 @@ ztest_rll_destroy(rll_t *rll) { ASSERT(rll->rll_writer == NULL); ASSERT(rll->rll_readers == 0); - VERIFY(_mutex_destroy(&rll->rll_lock) == 0); - VERIFY(cond_destroy(&rll->rll_cv) == 0); + mutex_destroy(&rll->rll_lock); + cv_destroy(&rll->rll_cv); } static void ztest_rll_lock(rll_t *rll, rl_type_t type) { - VERIFY(mutex_lock(&rll->rll_lock) == 0); + mutex_enter(&rll->rll_lock); if (type == RL_READER) { while (rll->rll_writer != NULL) - (void) cond_wait(&rll->rll_cv, &rll->rll_lock); + (void) cv_wait(&rll->rll_cv, &rll->rll_lock); rll->rll_readers++; } else { while (rll->rll_writer != NULL || rll->rll_readers) - (void) cond_wait(&rll->rll_cv, &rll->rll_lock); + (void) cv_wait(&rll->rll_cv, &rll->rll_lock); rll->rll_writer = curthread; } - VERIFY(mutex_unlock(&rll->rll_lock) == 0); + mutex_exit(&rll->rll_lock); } static void ztest_rll_unlock(rll_t *rll) { - VERIFY(mutex_lock(&rll->rll_lock) == 0); + mutex_enter(&rll->rll_lock); if (rll->rll_writer) { ASSERT(rll->rll_readers == 0); @@ -922,9 +925,9 @@ ztest_rll_unlock(rll_t *rll) } if (rll->rll_writer == NULL && rll->rll_readers == 0) - VERIFY(cond_broadcast(&rll->rll_cv) == 0); + cv_broadcast(&rll->rll_cv); - VERIFY(mutex_unlock(&rll->rll_lock) == 0); + mutex_exit(&rll->rll_lock); } static void @@ -981,7 +984,7 @@ ztest_zd_init(ztest_ds_t *zd, objset_t *os) dmu_objset_name(os, zd->zd_name); int l; - VERIFY(_mutex_init(&zd->zd_dirobj_lock, USYNC_THREAD, NULL) == 0); + mutex_init(&zd->zd_dirobj_lock, NULL, MUTEX_DEFAULT, NULL); for (l = 0; l < ZTEST_OBJECT_LOCKS; l++) ztest_rll_init(&zd->zd_object_lock[l]); @@ -995,7 +998,7 @@ ztest_zd_fini(ztest_ds_t *zd) { int l; - VERIFY(_mutex_destroy(&zd->zd_dirobj_lock) == 0); + mutex_destroy(&zd->zd_dirobj_lock); for (l = 0; l < ZTEST_OBJECT_LOCKS; l++) ztest_rll_destroy(&zd->zd_object_lock[l]); @@ -1733,7 +1736,7 @@ ztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count) int error; int i; - ASSERT(_mutex_held(&zd->zd_dirobj_lock)); + ASSERT(mutex_held(&zd->zd_dirobj_lock)); for (i = 0; i < count; i++, od++) { od->od_object = 0; @@ -1774,7 +1777,7 @@ ztest_create(ztest_ds_t *zd, ztest_od_t *od, int count) int missing = 0; int i; - ASSERT(_mutex_held(&zd->zd_dirobj_lock)); + ASSERT(mutex_held(&zd->zd_dirobj_lock)); for (i = 0; i < count; i++, od++) { if (missing) { @@ -1820,7 +1823,7 @@ ztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count) int error; int i; - ASSERT(_mutex_held(&zd->zd_dirobj_lock)); + ASSERT(mutex_held(&zd->zd_dirobj_lock)); od += count - 1; @@ -2036,13 +2039,13 @@ ztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove) int count = size / sizeof (*od); int rv = 0; - VERIFY(mutex_lock(&zd->zd_dirobj_lock) == 0); + mutex_enter(&zd->zd_dirobj_lock); if ((ztest_lookup(zd, od, count) != 0 || remove) && (ztest_remove(zd, od, count) != 0 || ztest_create(zd, od, count) != 0)) rv = -1; zd->zd_od = od; - VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0); + mutex_exit(&zd->zd_dirobj_lock); return (rv); } @@ -2098,7 +2101,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) * Attempt to create an existing pool. It shouldn't matter * what's in the nvroot; we should fail with EEXIST. */ - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 0, 1); VERIFY3U(EEXIST, ==, spa_create(zs->zs_pool, nvroot, NULL, NULL, NULL)); nvlist_free(nvroot); @@ -2106,7 +2109,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) VERIFY3U(EBUSY, ==, spa_destroy(zs->zs_pool)); spa_close(spa, FTAG); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } static vdev_t * @@ -2160,7 +2163,7 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) nvlist_t *nvroot; int error; - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * zopt_raidz; spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); @@ -2186,9 +2189,9 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) * dmu_objset_destroy() to fail with EBUSY thus * leaving the dataset in an inconsistent state. */ - VERIFY(rw_wrlock(&ztest_shared->zs_name_lock) == 0); + rw_enter(&ztest_shared->zs_name_lock, RW_WRITER); error = spa_vdev_remove(spa, guid, B_FALSE); - VERIFY(rw_unlock(&ztest_shared->zs_name_lock) == 0); + rw_exit(&ztest_shared->zs_name_lock); if (error && error != EEXIST) fatal(0, "spa_vdev_remove() = %d", error); @@ -2210,7 +2213,7 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) fatal(0, "spa_vdev_add() = %d", error); } - VERIFY(mutex_unlock(&ztest_shared->zs_vdev_lock) == 0); + mutex_exit(&ztest_shared->zs_vdev_lock); } /* @@ -2236,7 +2239,7 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id) aux = ZPOOL_CONFIG_L2CACHE; } - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); @@ -2292,7 +2295,7 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id) fatal(0, "spa_vdev_remove(%llu) = %d", guid, error); } - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); } /* @@ -2309,11 +2312,11 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id) uint_t c, children, schildren = 0, lastlogid = 0; int error = 0; - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); /* ensure we have a useable config; mirrors of raidz aren't supported */ if (zs->zs_mirrors < 3 || zopt_raidz > 1) { - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); return; } @@ -2372,9 +2375,9 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id) spa_config_exit(spa, SCL_VDEV, FTAG); - (void) rw_wrlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_WRITER); error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); nvlist_free(config); @@ -2387,7 +2390,7 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id) ++zs->zs_splits; --zs->zs_mirrors; } - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); } @@ -2416,7 +2419,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) int oldvd_is_log; int error, expected_error; - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); leaves = MAX(zs->zs_mirrors, 1) * zopt_raidz; spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); @@ -2477,7 +2480,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) if (error != 0 && error != ENODEV && error != EBUSY && error != ENOTSUP) fatal(0, "detach (%s) returned %d", oldpath, error); - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); return; } @@ -2570,7 +2573,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) (longlong_t)newsize, replacing, error, expected_error); } - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); } /* @@ -2701,7 +2704,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) uint64_t top; uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count; - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); spa_config_enter(spa, SCL_STATE, spa, RW_READER); top = ztest_random_vdev_top(spa, B_TRUE); @@ -2729,7 +2732,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) if (tvd->vdev_state != VDEV_STATE_HEALTHY || psize == 0 || psize >= 4 * zopt_vdev_size) { spa_config_exit(spa, SCL_STATE, spa); - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); return; } ASSERT(psize > 0); @@ -2754,7 +2757,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) "the vdev configuration changed.\n"); } spa_config_exit(spa, SCL_STATE, spa); - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); return; } @@ -2788,7 +2791,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) "intervening vdev offline or remove.\n"); } spa_config_exit(spa, SCL_STATE, spa); - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); return; } @@ -2816,7 +2819,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) } spa_config_exit(spa, SCL_STATE, spa); - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); } /* @@ -2924,7 +2927,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) zilog_t *zilog; int i; - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); (void) snprintf(name, MAXNAMELEN, "%s/temp_%llu", zs->zs_pool, (u_longlong_t)id); @@ -2962,7 +2965,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) if (error) { if (error == ENOSPC) { ztest_record_enospc(FTAG); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); return; } fatal(0, "dmu_objset_create(%s) = %d", name, error); @@ -3011,7 +3014,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) dmu_objset_disown(os, FTAG); ztest_zd_fini(&zdtmp); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* @@ -3022,10 +3025,10 @@ ztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id) { ztest_shared_t *zs = ztest_shared; - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); (void) ztest_snapshot_destroy(zd->zd_name, id); (void) ztest_snapshot_create(zd->zd_name, id); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* @@ -3086,7 +3089,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) char *osname = zd->zd_name; int error; - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); ztest_dsl_dataset_cleanup(osname, id); @@ -3171,7 +3174,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) out: ztest_dsl_dataset_cleanup(osname, id); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* @@ -4094,9 +4097,9 @@ ztest_commit_callback(void *arg, int error) ASSERT3U(data->zcd_txg, !=, 0); /* Remove our callback from the list */ - (void) mutex_lock(&zcl.zcl_callbacks_lock); + (void) mutex_enter(&zcl.zcl_callbacks_lock); list_remove(&zcl.zcl_callbacks, data); - (void) mutex_unlock(&zcl.zcl_callbacks_lock); + (void) mutex_exit(&zcl.zcl_callbacks_lock); out: umem_free(data, sizeof (ztest_cb_data_t)); @@ -4112,6 +4115,7 @@ ztest_create_cb_data(objset_t *os, uint64_t txg) cb_data->zcd_txg = txg; cb_data->zcd_spa = dmu_objset_spa(os); + list_link_init(&cb_data->zcd_node); return (cb_data); } @@ -4198,7 +4202,7 @@ ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id) dmu_write(os, od[0].od_object, 0, sizeof (uint64_t), &txg, tx); - (void) mutex_lock(&zcl.zcl_callbacks_lock); + (void) mutex_enter(&zcl.zcl_callbacks_lock); /* * Since commit callbacks don't have any ordering requirement and since @@ -4245,7 +4249,7 @@ ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id) tmp_cb = cb_data[i]; } - (void) mutex_unlock(&zcl.zcl_callbacks_lock); + (void) mutex_exit(&zcl.zcl_callbacks_lock); dmu_tx_commit(tx); } @@ -4263,13 +4267,13 @@ ztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id) ztest_shared_t *zs = ztest_shared; int p; - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); for (p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++) (void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p], ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2)); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* ARGSUSED */ @@ -4279,7 +4283,7 @@ ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id) ztest_shared_t *zs = ztest_shared; nvlist_t *props = NULL; - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); (void) ztest_spa_prop_set_uint64(zs, ZPOOL_PROP_DEDUPDITTO, ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN)); @@ -4291,7 +4295,7 @@ ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id) nvlist_free(props); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* @@ -4309,14 +4313,14 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) char tag[100]; char osname[MAXNAMELEN]; - (void) rw_rdlock(&ztest_shared->zs_name_lock); + (void) rw_enter(&ztest_shared->zs_name_lock, RW_READER); dmu_objset_name(os, osname); - (void) snprintf(snapname, 100, "sh1_%llu", id); + (void) snprintf(snapname, 100, "sh1_%llu", (u_longlong_t)id); (void) snprintf(fullname, 100, "%s@%s", osname, snapname); - (void) snprintf(clonename, 100, "%s/ch1_%llu", osname, id); - (void) snprintf(tag, 100, "%tag_%llu", id); + (void) snprintf(clonename, 100, "%s/ch1_%llu",osname,(u_longlong_t)id); + (void) snprintf(tag, 100, "tag_%llu", (u_longlong_t)id); /* * Clean up from any previous run. @@ -4403,7 +4407,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) VERIFY(dmu_objset_hold(fullname, FTAG, &origin) == ENOENT); out: - (void) rw_unlock(&ztest_shared->zs_name_lock); + (void) rw_exit(&ztest_shared->zs_name_lock); } /* @@ -4431,11 +4435,11 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) uint64_t guid0 = 0; boolean_t islog = B_FALSE; - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); maxfaults = MAXFAULTS(); leaves = MAX(zs->zs_mirrors, 1) * zopt_raidz; mirror_save = zs->zs_mirrors; - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); ASSERT(leaves >= 1); @@ -4529,12 +4533,13 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) * leaving the dataset in an inconsistent state. */ if (islog) - (void) rw_wrlock(&ztest_shared->zs_name_lock); + (void) rw_enter(&ztest_shared->zs_name_lock, + RW_WRITER); VERIFY(vdev_offline(spa, guid0, flags) != EBUSY); if (islog) - (void) rw_unlock(&ztest_shared->zs_name_lock); + (void) rw_exit(&ztest_shared->zs_name_lock); } else { (void) vdev_online(spa, guid0, 0, NULL); } @@ -4561,9 +4566,9 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) if (offset >= fsize) continue; - VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0); + mutex_enter(&zs->zs_vdev_lock); if (mirror_save != zs->zs_mirrors) { - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); (void) close(fd); return; } @@ -4572,7 +4577,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) fatal(1, "can't inject bad word at 0x%llx in %s", offset, pathrand); - VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0); + mutex_exit(&zs->zs_vdev_lock); if (zopt_verbose >= 7) (void) printf("injected bad word into %s," @@ -4613,13 +4618,13 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) * Take the name lock as writer to prevent anyone else from changing * the pool and dataset properies we need to maintain during this test. */ - (void) rw_wrlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_WRITER); if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum, B_FALSE) != 0 || ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1, B_FALSE) != 0) { - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); return; } @@ -4633,7 +4638,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) dmu_tx_hold_write(tx, object, 0, copies * blocksize); txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); if (txg == 0) { - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); return; } @@ -4677,7 +4682,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) zio_buf_free(buf, psize); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* @@ -4706,7 +4711,7 @@ ztest_spa_rename(ztest_ds_t *zd, uint64_t id) char *oldname, *newname; spa_t *spa; - (void) rw_wrlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_WRITER); oldname = zs->zs_pool; newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL); @@ -4746,7 +4751,7 @@ ztest_spa_rename(ztest_ds_t *zd, uint64_t id) umem_free(newname, strlen(newname) + 1); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); } /* @@ -4919,6 +4924,9 @@ ztest_resume_thread(void *arg) ztest_resume(spa); (void) poll(NULL, 0, 100); } + + thread_exit(); + return (NULL); } @@ -4934,6 +4942,7 @@ ztest_deadman_thread(void *arg) (void) poll(NULL, 0, (int)(1000 * delta)); fatal(0, "failed to complete within %d seconds of deadline", grace); + thread_exit(); return (NULL); } @@ -4996,6 +5005,8 @@ ztest_thread(void *arg) ztest_execute(zi, id); } + thread_exit(); + return (NULL); } @@ -5061,18 +5072,18 @@ ztest_dataset_open(ztest_shared_t *zs, int d) ztest_dataset_name(name, zs->zs_pool, d); - (void) rw_rdlock(&zs->zs_name_lock); + (void) rw_enter(&zs->zs_name_lock, RW_READER); error = ztest_dataset_create(name); if (error == ENOSPC) { - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); ztest_record_enospc(FTAG); return (error); } ASSERT(error == 0 || error == EEXIST); VERIFY3U(dmu_objset_hold(name, zd, &os), ==, 0); - (void) rw_unlock(&zs->zs_name_lock); + (void) rw_exit(&zs->zs_name_lock); ztest_zd_init(zd, os); @@ -5123,9 +5134,10 @@ ztest_dataset_close(ztest_shared_t *zs, int d) static void ztest_run(ztest_shared_t *zs) { - thread_t *tid; + kthread_t **tid; spa_t *spa; - thread_t resume_tid; + kthread_t *resume_thread; + uint64_t object; int error; int t, d; @@ -5134,8 +5146,8 @@ ztest_run(ztest_shared_t *zs) /* * Initialize parent/child shared state. */ - VERIFY(_mutex_init(&zs->zs_vdev_lock, USYNC_THREAD, NULL) == 0); - VERIFY(rwlock_init(&zs->zs_name_lock, USYNC_THREAD, NULL) == 0); + mutex_init(&zs->zs_vdev_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&zs->zs_name_lock, NULL, RW_DEFAULT, NULL); zs->zs_thread_start = gethrtime(); zs->zs_thread_stop = zs->zs_thread_start + zopt_passtime * NANOSEC; @@ -5144,7 +5156,7 @@ ztest_run(ztest_shared_t *zs) if (ztest_random(100) < zopt_killrate) zs->zs_thread_kill -= ztest_random(zopt_passtime * NANOSEC); - (void) _mutex_init(&zcl.zcl_callbacks_lock, USYNC_THREAD, NULL); + mutex_init(&zcl.zcl_callbacks_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t), offsetof(ztest_cb_data_t, zcd_node)); @@ -5171,14 +5183,14 @@ ztest_run(ztest_shared_t *zs) /* * Create a thread to periodically resume suspended I/O. */ - VERIFY(thr_create(0, 0, ztest_resume_thread, spa, THR_BOUND, - &resume_tid) == 0); + VERIFY3P((resume_thread = thread_create(NULL, 0, ztest_resume_thread, + spa, TS_RUN, NULL, 0, 0)), !=, NULL); /* * Create a deadman thread to abort() if we hang. */ - VERIFY(thr_create(0, 0, ztest_deadman_thread, zs, THR_BOUND, - NULL) == 0); + VERIFY3P(thread_create(NULL, 0, ztest_deadman_thread, zs, + TS_RUN, NULL, 0, 0), !=, NULL); /* * Verify that we can safely inquire about about any object, @@ -5204,7 +5216,7 @@ ztest_run(ztest_shared_t *zs) } zs->zs_enospc_count = 0; - tid = umem_zalloc(zopt_threads * sizeof (thread_t), UMEM_NOFAIL); + tid = umem_zalloc(zopt_threads * sizeof (kthread_t *), UMEM_NOFAIL); if (zopt_verbose >= 4) (void) printf("starting main threads...\n"); @@ -5215,8 +5227,9 @@ ztest_run(ztest_shared_t *zs) for (t = 0; t < zopt_threads; t++) { if (t < zopt_datasets && ztest_dataset_open(zs, t) != 0) return; - VERIFY(thr_create(0, 0, ztest_thread, (void *)(uintptr_t)t, - THR_BOUND, &tid[t]) == 0); + + VERIFY3P(tid[t] = thread_create(NULL, 0, ztest_thread, + (void *)(uintptr_t)t, TS_RUN, NULL, 0, 0), !=, NULL); } /* @@ -5224,7 +5237,7 @@ ztest_run(ztest_shared_t *zs) * so we don't close datasets while threads are still using them. */ for (t = zopt_threads - 1; t >= 0; t--) { - VERIFY(thr_join(tid[t], NULL, NULL) == 0); + thread_join(tid[t]->t_tid); if (t < zopt_datasets) ztest_dataset_close(zs, t); } @@ -5234,18 +5247,18 @@ ztest_run(ztest_shared_t *zs) zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa)); zs->zs_space = metaslab_class_get_space(spa_normal_class(spa)); - umem_free(tid, zopt_threads * sizeof (thread_t)); + umem_free(tid, zopt_threads * sizeof (kthread_t *)); /* Kill the resume thread */ ztest_exiting = B_TRUE; - VERIFY(thr_join(resume_tid, NULL, NULL) == 0); + thread_join(resume_thread->t_tid); ztest_resume(spa); /* * Right before closing the pool, kick off a bunch of async I/O; * spa_close() should wait for it to complete. */ - for (uint64_t object = 1; object < 50; object++) + for (object = 1; object < 50; object++) dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20); spa_close(spa, FTAG); @@ -5343,10 +5356,10 @@ ztest_freeze(ztest_shared_t *zs) list_destroy(&zcl.zcl_callbacks); - (void) _mutex_destroy(&zcl.zcl_callbacks_lock); + (void) mutex_destroy(&zcl.zcl_callbacks_lock); - (void) rwlock_destroy(&zs->zs_name_lock); - (void) _mutex_destroy(&zs->zs_vdev_lock); + (void) rw_destroy(&zs->zs_name_lock); + (void) mutex_destroy(&zs->zs_vdev_lock); } void @@ -5401,8 +5414,8 @@ ztest_init(ztest_shared_t *zs) spa_t *spa; nvlist_t *nvroot, *props; - VERIFY(_mutex_init(&zs->zs_vdev_lock, USYNC_THREAD, NULL) == 0); - VERIFY(rwlock_init(&zs->zs_name_lock, USYNC_THREAD, NULL) == 0); + mutex_init(&zs->zs_vdev_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&zs->zs_name_lock, NULL, RW_DEFAULT, NULL); kernel_init(FREAD | FWRITE); diff --git a/lib/libuutil/uu_misc.c b/lib/libuutil/uu_misc.c index a4ae4a1e72..3bd5c31192 100644 --- a/lib/libuutil/uu_misc.c +++ b/lib/libuutil/uu_misc.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #if !defined(TEXT_DOMAIN) @@ -70,11 +69,12 @@ static va_list uu_panic_args; static pthread_t uu_panic_thread; static uint32_t _uu_main_error; +static __thread int _uu_main_thread = 0; void uu_set_error(uint_t code) { - if (thr_main() != 0) { + if (_uu_main_thread) { _uu_main_error = code; return; } @@ -103,7 +103,7 @@ uu_set_error(uint_t code) uint32_t uu_error(void) { - if (thr_main() != 0) + if (_uu_main_thread) return (_uu_main_error); if (uu_error_key_setup < 0) /* can't happen? */ @@ -261,5 +261,6 @@ uu_init(void) __attribute__((constructor)); static void uu_init(void) { + _uu_main_thread = 1; (void) pthread_atfork(uu_lockup, uu_release, uu_release_child); } diff --git a/lib/libzpool/include/sys/zfs_context.h b/lib/libzpool/include/sys/zfs_context.h index 8c16ec1ef7..109ad925d7 100644 --- a/lib/libzpool/include/sys/zfs_context.h +++ b/lib/libzpool/include/sys/zfs_context.h @@ -50,8 +50,7 @@ extern "C" { #include #include #include -#include -#include +#include #include #include #include @@ -97,6 +96,8 @@ extern "C" { #define CE_PANIC 3 /* panic */ #define CE_IGNORE 4 /* print nothing */ +extern int aok; + /* * ZFS debugging */ @@ -202,27 +203,45 @@ _NOTE(CONSTCOND) } while (0) /* * Threads */ -#define curthread ((void *)(uintptr_t)thr_self()) - -typedef struct kthread kthread_t; - -#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \ - zk_thread_create(func, arg) -#define thread_exit() thr_exit(NULL) -#define thread_join(t) panic("libzpool cannot join threads") - -#define newproc(f, a, cid, pri, ctp, pid) (ENOSYS) +#define TS_RUN 0x00000002 +#define STACK_SIZE 8192 /* Linux x86 and amd64 */ /* in libzpool, p0 exists only to have its address taken */ -struct proc { +typedef struct proc { uintptr_t this_is_never_used_dont_dereference_it; -}; +} proc_t; extern struct proc p0; -#define PS_NONE -1 +typedef void (*thread_func_t)(void *); +typedef void (*thread_func_arg_t)(void *); +typedef pthread_t kt_did_t; -extern kthread_t *zk_thread_create(void (*func)(), void *arg); +typedef struct kthread { + kt_did_t t_tid; + thread_func_t t_func; + void * t_arg; +} kthread_t; + +/* XXX tsd_create()/tsd_destroy() missing */ +#define tsd_get(key) pthread_getspecific(key) +#define tsd_set(key, val) pthread_setspecific(key, val) +#define curthread zk_thread_current() +#define thread_exit zk_thread_exit +#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \ + zk_thread_create(stk, stksize, (thread_func_t)func, arg, \ + len, NULL, state, pri) +#define thread_join(t) zk_thread_join(t) +#define newproc(f,a,cid,pri,ctp,pid) (ENOSYS) + +extern kthread_t *zk_thread_current(void); +extern void zk_thread_exit(void); +extern kthread_t *zk_thread_create(caddr_t stk, size_t stksize, + thread_func_t func, void *arg, size_t len, + proc_t *pp, int state, pri_t pri); +extern void zk_thread_join(kt_did_t tid); + +#define PS_NONE -1 #define issig(why) (FALSE) #define ISSIG(thr, why) (FALSE) @@ -230,53 +249,51 @@ extern kthread_t *zk_thread_create(void (*func)(), void *arg); /* * Mutexes */ +#define MTX_MAGIC 0x9522f51362a6e326ull +#define MTX_INIT ((void *)NULL) +#define MTX_DEST ((void *)-1UL) + typedef struct kmutex { void *m_owner; - boolean_t initialized; - mutex_t m_lock; + uint64_t m_magic; + pthread_mutex_t m_lock; } kmutex_t; -#define MUTEX_DEFAULT USYNC_THREAD -#undef MUTEX_HELD -#define MUTEX_HELD(m) _mutex_held(&(m)->m_lock) +#define MUTEX_DEFAULT 0 +#define MUTEX_HELD(m) ((m)->m_owner == curthread) -/* - * Argh -- we have to get cheesy here because the kernel and userland - * have different signatures for the same routine. - */ -extern int _mutex_init(mutex_t *mp, int type, void *arg); -extern int _mutex_destroy(mutex_t *mp); - -#define mutex_init(mp, b, c, d) zmutex_init((kmutex_t *)(mp)) -#define mutex_destroy(mp) zmutex_destroy((kmutex_t *)(mp)) - -extern void zmutex_init(kmutex_t *mp); -extern void zmutex_destroy(kmutex_t *mp); +extern void mutex_init(kmutex_t *mp, char *name, int type, void *cookie); +extern void mutex_destroy(kmutex_t *mp); extern void mutex_enter(kmutex_t *mp); extern void mutex_exit(kmutex_t *mp); extern int mutex_tryenter(kmutex_t *mp); extern void *mutex_owner(kmutex_t *mp); +extern int mutex_held(kmutex_t *mp); /* * RW locks */ +#define RW_MAGIC 0x4d31fb123648e78aull +#define RW_INIT ((void *)NULL) +#define RW_DEST ((void *)-1UL) + typedef struct krwlock { - void *rw_owner; - boolean_t initialized; - rwlock_t rw_lock; + void *rw_owner; + void *rw_wr_owner; + uint64_t rw_magic; + pthread_rwlock_t rw_lock; + uint_t rw_readers; } krwlock_t; typedef int krw_t; #define RW_READER 0 #define RW_WRITER 1 -#define RW_DEFAULT USYNC_THREAD +#define RW_DEFAULT RW_READER -#undef RW_READ_HELD -#define RW_READ_HELD(x) _rw_read_held(&(x)->rw_lock) - -#undef RW_WRITE_HELD -#define RW_WRITE_HELD(x) _rw_write_held(&(x)->rw_lock) +#define RW_READ_HELD(x) ((x)->rw_readers > 0) +#define RW_WRITE_HELD(x) ((x)->rw_wr_owner == curthread) +#define RW_LOCK_HELD(x) (RW_READ_HELD(x) || RW_WRITE_HELD(x)) extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg); extern void rw_destroy(krwlock_t *rwlp); @@ -294,9 +311,14 @@ extern gid_t *crgetgroups(cred_t *cr); /* * Condition variables */ -typedef cond_t kcondvar_t; +#define CV_MAGIC 0xd31ea9a83b1b30c4ull -#define CV_DEFAULT USYNC_THREAD +typedef struct kcondvar { + uint64_t cv_magic; + pthread_cond_t cv; +} kcondvar_t; + +#define CV_DEFAULT 0 extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg); extern void cv_destroy(kcondvar_t *cv); @@ -361,7 +383,7 @@ extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t); extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t); extern void taskq_destroy(taskq_t *); extern void taskq_wait(taskq_t *); -extern int taskq_member(taskq_t *, void *); +extern int taskq_member(taskq_t *, kthread_t *); extern void system_taskq_init(void); extern void system_taskq_fini(void); @@ -488,7 +510,7 @@ extern void delay(clock_t ticks); #define minclsyspri 60 #define maxclsyspri 99 -#define CPU_SEQID (thr_self() & (max_ncpus - 1)) +#define CPU_SEQID (pthread_self() & (max_ncpus - 1)) #define kcred NULL #define CRED() NULL diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index c0d529e873..4897b672a7 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -58,16 +59,150 @@ struct proc p0; * threads * ========================================================================= */ -/*ARGSUSED*/ -kthread_t * -zk_thread_create(void (*func)(), void *arg) + +pthread_cond_t kthread_cond = PTHREAD_COND_INITIALIZER; +pthread_mutex_t kthread_lock = PTHREAD_MUTEX_INITIALIZER; +pthread_key_t kthread_key; +int kthread_nr = 0; + +static void +thread_init(void) { - thread_t tid; + kthread_t *kt; - VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED, - &tid) == 0); + VERIFY3S(pthread_key_create(&kthread_key, NULL), ==, 0); - return ((void *)(uintptr_t)tid); + /* Create entry for primary kthread */ + kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL); + kt->t_tid = pthread_self(); + kt->t_func = NULL; + + VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0); + + /* Only the main thread should be running at the moment */ + ASSERT3S(kthread_nr, ==, 0); + kthread_nr = 1; +} + +static void +thread_fini(void) +{ + kthread_t *kt = curthread; + + ASSERT(pthread_equal(kt->t_tid, pthread_self())); + ASSERT3P(kt->t_func, ==, NULL); + + umem_free(kt, sizeof(kthread_t)); + + /* Wait for all threads to exit via thread_exit() */ + VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0); + + kthread_nr--; /* Main thread is exiting */ + + while (kthread_nr > 0) + VERIFY3S(pthread_cond_wait(&kthread_cond, &kthread_lock), ==, + 0); + + ASSERT3S(kthread_nr, ==, 0); + VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0); + + VERIFY3S(pthread_key_delete(kthread_key), ==, 0); +} + +kthread_t * +zk_thread_current(void) +{ + kthread_t *kt = pthread_getspecific(kthread_key); + + ASSERT3P(kt, !=, NULL); + + return kt; +} + +void * +zk_thread_helper(void *arg) +{ + kthread_t *kt = (kthread_t *) arg; + + VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0); + + VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0); + kthread_nr++; + VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0); + + kt->t_tid = pthread_self(); + ((thread_func_arg_t) kt->t_func)(kt->t_arg); + + /* Unreachable, thread must exit with thread_exit() */ + abort(); + + return NULL; +} + +kthread_t * +zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg, + size_t len, proc_t *pp, int state, pri_t pri) +{ + kthread_t *kt; + pthread_t tid; + pthread_attr_t attr; + size_t stack; + + /* + * Due to a race when getting/setting the thread ID, currently only + * detached threads are supported. + */ + ASSERT3S(state & ~TS_RUN, ==, 0); + + kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL); + kt->t_func = func; + kt->t_arg = arg; + + /* + * The Solaris kernel stack size in x86/x64 is 8K, so we reduce the + * default stack size in userspace, for sanity checking. + * + * PTHREAD_STACK_MIN is the stack required for a NULL procedure in + * userspace. + * + * XXX: Stack size for other architectures is not being taken into + * account. + */ + stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE); + + VERIFY3S(pthread_attr_init(&attr), ==, 0); + VERIFY3S(pthread_attr_setstacksize(&attr, stack), ==, 0); + VERIFY3S(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED), + ==, 0); + + VERIFY3S(pthread_create(&tid, &attr, &zk_thread_helper, kt), ==, 0); + + VERIFY3S(pthread_attr_destroy(&attr), ==, 0); + + return kt; +} + +void +zk_thread_exit(void) +{ + kthread_t *kt = curthread; + + ASSERT(pthread_equal(kt->t_tid, pthread_self())); + + umem_free(kt, sizeof(kthread_t)); + + pthread_mutex_lock(&kthread_lock); + kthread_nr--; + pthread_mutex_unlock(&kthread_lock); + + pthread_cond_broadcast(&kthread_cond); + pthread_exit(NULL); +} + +void +zk_thread_join(kt_did_t tid) +{ + pthread_join((pthread_t)tid, NULL); } /* @@ -98,42 +233,45 @@ kstat_delete(kstat_t *ksp) * mutexes * ========================================================================= */ + void -zmutex_init(kmutex_t *mp) +mutex_init(kmutex_t *mp, char *name, int type, void *cookie) { - mp->m_owner = NULL; - mp->initialized = B_TRUE; - (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL); + ASSERT3S(type, ==, MUTEX_DEFAULT); + ASSERT3P(cookie, ==, NULL); + mp->m_owner = MTX_INIT; + mp->m_magic = MTX_MAGIC; + VERIFY3S(pthread_mutex_init(&mp->m_lock, NULL), ==, 0); } void -zmutex_destroy(kmutex_t *mp) +mutex_destroy(kmutex_t *mp) { - ASSERT(mp->initialized == B_TRUE); - ASSERT(mp->m_owner == NULL); - (void) _mutex_destroy(&(mp)->m_lock); - mp->m_owner = (void *)-1UL; - mp->initialized = B_FALSE; + ASSERT3U(mp->m_magic, ==, MTX_MAGIC); + ASSERT3P(mp->m_owner, ==, MTX_INIT); + VERIFY3S(pthread_mutex_destroy(&(mp)->m_lock), ==, 0); + mp->m_owner = MTX_DEST; + mp->m_magic = 0; } void mutex_enter(kmutex_t *mp) { - ASSERT(mp->initialized == B_TRUE); - ASSERT(mp->m_owner != (void *)-1UL); - ASSERT(mp->m_owner != curthread); - VERIFY(mutex_lock(&mp->m_lock) == 0); - ASSERT(mp->m_owner == NULL); + ASSERT3U(mp->m_magic, ==, MTX_MAGIC); + ASSERT3P(mp->m_owner, !=, MTX_DEST); + ASSERT3P(mp->m_owner, !=, curthread); + VERIFY3S(pthread_mutex_lock(&mp->m_lock), ==, 0); + ASSERT3P(mp->m_owner, ==, MTX_INIT); mp->m_owner = curthread; } int mutex_tryenter(kmutex_t *mp) { - ASSERT(mp->initialized == B_TRUE); - ASSERT(mp->m_owner != (void *)-1UL); - if (0 == mutex_trylock(&mp->m_lock)) { - ASSERT(mp->m_owner == NULL); + ASSERT3U(mp->m_magic, ==, MTX_MAGIC); + ASSERT3P(mp->m_owner, !=, MTX_DEST); + if (0 == pthread_mutex_trylock(&mp->m_lock)) { + ASSERT3P(mp->m_owner, ==, MTX_INIT); mp->m_owner = curthread; return (1); } else { @@ -144,53 +282,71 @@ mutex_tryenter(kmutex_t *mp) void mutex_exit(kmutex_t *mp) { - ASSERT(mp->initialized == B_TRUE); - ASSERT(mutex_owner(mp) == curthread); - mp->m_owner = NULL; - VERIFY(mutex_unlock(&mp->m_lock) == 0); + ASSERT3U(mp->m_magic, ==, MTX_MAGIC); + ASSERT3P(mutex_owner(mp), ==, curthread); + mp->m_owner = MTX_INIT; + VERIFY3S(pthread_mutex_unlock(&mp->m_lock), ==, 0); } void * mutex_owner(kmutex_t *mp) { - ASSERT(mp->initialized == B_TRUE); + ASSERT3U(mp->m_magic, ==, MTX_MAGIC); return (mp->m_owner); } +int +mutex_held(kmutex_t *mp) +{ + return (mp->m_owner == curthread); +} + /* * ========================================================================= * rwlocks * ========================================================================= */ -/*ARGSUSED*/ + void rw_init(krwlock_t *rwlp, char *name, int type, void *arg) { - rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL); - rwlp->rw_owner = NULL; - rwlp->initialized = B_TRUE; + ASSERT3S(type, ==, RW_DEFAULT); + ASSERT3P(arg, ==, NULL); + VERIFY3S(pthread_rwlock_init(&rwlp->rw_lock, NULL), ==, 0); + rwlp->rw_owner = RW_INIT; + rwlp->rw_wr_owner = RW_INIT; + rwlp->rw_readers = 0; + rwlp->rw_magic = RW_MAGIC; } void rw_destroy(krwlock_t *rwlp) { - rwlock_destroy(&rwlp->rw_lock); - rwlp->rw_owner = (void *)-1UL; - rwlp->initialized = B_FALSE; + ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); + + VERIFY3S(pthread_rwlock_destroy(&rwlp->rw_lock), ==, 0); + rwlp->rw_magic = 0; } void rw_enter(krwlock_t *rwlp, krw_t rw) { - ASSERT(!RW_LOCK_HELD(rwlp)); - ASSERT(rwlp->initialized == B_TRUE); - ASSERT(rwlp->rw_owner != (void *)-1UL); - ASSERT(rwlp->rw_owner != curthread); + ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); + ASSERT3P(rwlp->rw_owner, !=, curthread); + ASSERT3P(rwlp->rw_wr_owner, !=, curthread); - if (rw == RW_READER) - VERIFY(rw_rdlock(&rwlp->rw_lock) == 0); - else - VERIFY(rw_wrlock(&rwlp->rw_lock) == 0); + if (rw == RW_READER) { + VERIFY3S(pthread_rwlock_rdlock(&rwlp->rw_lock), ==, 0); + ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT); + + atomic_inc_uint(&rwlp->rw_readers); + } else { + VERIFY3S(pthread_rwlock_wrlock(&rwlp->rw_lock), ==, 0); + ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT); + ASSERT3U(rwlp->rw_readers, ==, 0); + + rwlp->rw_wr_owner = curthread; + } rwlp->rw_owner = curthread; } @@ -198,11 +354,16 @@ rw_enter(krwlock_t *rwlp, krw_t rw) void rw_exit(krwlock_t *rwlp) { - ASSERT(rwlp->initialized == B_TRUE); - ASSERT(rwlp->rw_owner != (void *)-1UL); + ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); + ASSERT(RW_LOCK_HELD(rwlp)); - rwlp->rw_owner = NULL; - VERIFY(rw_unlock(&rwlp->rw_lock) == 0); + if (RW_READ_HELD(rwlp)) + atomic_dec_uint(&rwlp->rw_readers); + else + rwlp->rw_wr_owner = RW_INIT; + + rwlp->rw_owner = RW_INIT; + VERIFY3S(pthread_rwlock_unlock(&rwlp->rw_lock), ==, 0); } int @@ -210,28 +371,36 @@ rw_tryenter(krwlock_t *rwlp, krw_t rw) { int rv; - ASSERT(rwlp->initialized == B_TRUE); - ASSERT(rwlp->rw_owner != (void *)-1UL); + ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); if (rw == RW_READER) - rv = rw_tryrdlock(&rwlp->rw_lock); + rv = pthread_rwlock_tryrdlock(&rwlp->rw_lock); else - rv = rw_trywrlock(&rwlp->rw_lock); + rv = pthread_rwlock_trywrlock(&rwlp->rw_lock); if (rv == 0) { + ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT); + + if (rw == RW_READER) + atomic_inc_uint(&rwlp->rw_readers); + else { + ASSERT3U(rwlp->rw_readers, ==, 0); + rwlp->rw_wr_owner = curthread; + } + rwlp->rw_owner = curthread; return (1); } + VERIFY3S(rv, ==, EBUSY); + return (0); } -/*ARGSUSED*/ int rw_tryupgrade(krwlock_t *rwlp) { - ASSERT(rwlp->initialized == B_TRUE); - ASSERT(rwlp->rw_owner != (void *)-1UL); + ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); return (0); } @@ -241,26 +410,32 @@ rw_tryupgrade(krwlock_t *rwlp) * condition variables * ========================================================================= */ -/*ARGSUSED*/ + void cv_init(kcondvar_t *cv, char *name, int type, void *arg) { - VERIFY(cond_init(cv, type, NULL) == 0); + ASSERT3S(type, ==, CV_DEFAULT); + cv->cv_magic = CV_MAGIC; + VERIFY3S(pthread_cond_init(&cv->cv, NULL), ==, 0); } void cv_destroy(kcondvar_t *cv) { - VERIFY(cond_destroy(cv) == 0); + ASSERT3U(cv->cv_magic, ==, CV_MAGIC); + VERIFY3S(pthread_cond_destroy(&cv->cv), ==, 0); + cv->cv_magic = 0; } void cv_wait(kcondvar_t *cv, kmutex_t *mp) { - ASSERT(mutex_owner(mp) == curthread); - mp->m_owner = NULL; - int ret = cond_wait(cv, &mp->m_lock); - VERIFY(ret == 0 || ret == EINTR); + ASSERT3U(cv->cv_magic, ==, CV_MAGIC); + ASSERT3P(mutex_owner(mp), ==, curthread); + mp->m_owner = MTX_INIT; + int ret = pthread_cond_wait(&cv->cv, &mp->m_lock); + if (ret != 0) + VERIFY3S(ret, ==, EINTR); mp->m_owner = curthread; } @@ -268,29 +443,38 @@ clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) { int error; + struct timeval tv; timestruc_t ts; clock_t delta; + ASSERT3U(cv->cv_magic, ==, CV_MAGIC); + top: delta = abstime - ddi_get_lbolt(); if (delta <= 0) return (-1); - ts.tv_sec = delta / hz; - ts.tv_nsec = (delta % hz) * (NANOSEC / hz); + VERIFY(gettimeofday(&tv, NULL) == 0); - ASSERT(mutex_owner(mp) == curthread); - mp->m_owner = NULL; - error = cond_reltimedwait(cv, &mp->m_lock, &ts); + ts.tv_sec = tv.tv_sec + delta / hz; + ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz); + if (ts.tv_nsec >= NANOSEC) { + ts.tv_sec++; + ts.tv_nsec -= NANOSEC; + } + + ASSERT3P(mutex_owner(mp), ==, curthread); + mp->m_owner = MTX_INIT; + error = pthread_cond_timedwait(&cv->cv, &mp->m_lock, &ts); mp->m_owner = curthread; - if (error == ETIME) + if (error == ETIMEDOUT) return (-1); if (error == EINTR) goto top; - ASSERT(error == 0); + VERIFY3S(error, ==, 0); return (1); } @@ -298,13 +482,15 @@ top: void cv_signal(kcondvar_t *cv) { - VERIFY(cond_signal(cv) == 0); + ASSERT3U(cv->cv_magic, ==, CV_MAGIC); + VERIFY3S(pthread_cond_signal(&cv->cv), ==, 0); } void cv_broadcast(kcondvar_t *cv) { - VERIFY(cond_broadcast(cv) == 0); + ASSERT3U(cv->cv_magic, ==, CV_MAGIC); + VERIFY3S(pthread_cond_broadcast(&cv->cv), ==, 0); } /* @@ -572,7 +758,7 @@ __dprintf(const char *file, const char *func, int line, const char *fmt, ...) if (dprintf_find_string("pid")) (void) printf("%d ", getpid()); if (dprintf_find_string("tid")) - (void) printf("%u ", thr_self()); + (void) printf("%u ", (uint_t) pthread_self()); if (dprintf_find_string("cpu")) (void) printf("%u ", getcpuid()); if (dprintf_find_string("time")) @@ -825,6 +1011,7 @@ kernel_init(int mode) VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1); VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1); + thread_init(); system_taskq_init(); spa_init(mode); @@ -836,6 +1023,7 @@ kernel_fini(void) spa_fini(); system_taskq_fini(); + thread_fini(); close(random_fd); close(urandom_fd); diff --git a/lib/libzpool/taskq.c b/lib/libzpool/taskq.c index 8db5d11c13..36c0ec7dfc 100644 --- a/lib/libzpool/taskq.c +++ b/lib/libzpool/taskq.c @@ -42,7 +42,7 @@ struct taskq { krwlock_t tq_threadlock; kcondvar_t tq_dispatch_cv; kcondvar_t tq_wait_cv; - thread_t *tq_threadlist; + kthread_t **tq_threadlist; int tq_flags; int tq_active; int tq_nthreads; @@ -154,7 +154,7 @@ taskq_wait(taskq_t *tq) mutex_exit(&tq->tq_lock); } -static void * +static void taskq_thread(void *arg) { taskq_t *tq = arg; @@ -183,7 +183,7 @@ taskq_thread(void *arg) tq->tq_nthreads--; cv_broadcast(&tq->tq_wait_cv); mutex_exit(&tq->tq_lock); - return (NULL); + thread_exit(); } /*ARGSUSED*/ @@ -219,7 +219,7 @@ taskq_create(const char *name, int nthreads, pri_t pri, tq->tq_maxalloc = maxalloc; tq->tq_task.task_next = &tq->tq_task; tq->tq_task.task_prev = &tq->tq_task; - tq->tq_threadlist = kmem_alloc(nthreads * sizeof (thread_t), KM_SLEEP); + tq->tq_threadlist = kmem_alloc(nthreads*sizeof(kthread_t *), KM_SLEEP); if (flags & TASKQ_PREPOPULATE) { mutex_enter(&tq->tq_lock); @@ -229,8 +229,8 @@ taskq_create(const char *name, int nthreads, pri_t pri, } for (t = 0; t < nthreads; t++) - (void) thr_create(0, 0, taskq_thread, - tq, THR_BOUND, &tq->tq_threadlist[t]); + VERIFY((tq->tq_threadlist[t] = thread_create(NULL, 0, + taskq_thread, tq, TS_RUN, NULL, 0, 0)) != NULL); return (tq); } @@ -238,7 +238,6 @@ taskq_create(const char *name, int nthreads, pri_t pri, void taskq_destroy(taskq_t *tq) { - int t; int nthreads = tq->tq_nthreads; taskq_wait(tq); @@ -259,10 +258,7 @@ taskq_destroy(taskq_t *tq) mutex_exit(&tq->tq_lock); - for (t = 0; t < nthreads; t++) - (void) thr_join(tq->tq_threadlist[t], NULL, NULL); - - kmem_free(tq->tq_threadlist, nthreads * sizeof (thread_t)); + kmem_free(tq->tq_threadlist, nthreads * sizeof (kthread_t *)); rw_destroy(&tq->tq_threadlock); mutex_destroy(&tq->tq_lock); @@ -274,7 +270,7 @@ taskq_destroy(taskq_t *tq) } int -taskq_member(taskq_t *tq, void *t) +taskq_member(taskq_t *tq, kthread_t *t) { int i; @@ -282,7 +278,7 @@ taskq_member(taskq_t *tq, void *t) return (1); for (i = 0; i < tq->tq_nthreads; i++) - if (tq->tq_threadlist[i] == (thread_t)(uintptr_t)t) + if (tq->tq_threadlist[i] == t) return (1); return (0); diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 6e923c3a1e..5ebdf4ca6c 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -1052,7 +1052,10 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline) { spa_t *spa = zio->io_spa; zio_type_t t = zio->io_type; - int flags = TQ_SLEEP | (cutinline ? TQ_FRONT : 0); + int flags; + + flags = (cutinline ? TQ_FRONT : 0); + flags |= ((q == ZIO_TASKQ_INTERRUPT) ? TQ_NOSLEEP : TQ_SLEEP); /* * If we're a config writer or a probe, the normal issue and