From 7809eb8b65bec8e8ea09b2ea645d320e0b3a1710 Mon Sep 17 00:00:00 2001 From: Richard Yao Date: Wed, 30 Apr 2014 12:51:28 -0400 Subject: [PATCH] ztest: Switch to LWP rwlock interface ztest is intended to subject the ZFS code in userland to stress that it should be able to withstand. Any failures that occur when running it are failures that likely would occur inside the kernel. However, being in userland, it is much easier to debug them. In practice, this prevents a large number of problems from reaching production code. A design decision was made by the original authors of ztest to make a distinction between userland locking primitives and kernel locking primitives. The ztest code itself calls userland locking primitives while the kernel code being run in userland will call emulated kernel locking primitives that wrap the userland locking primitives. When ztest was first ported to Linux, a decision was made to use the emulated kernel interfaces everywhere. In effect, the userland rw_rdlock()/rw_wrlock() became the kernel rw_enter() and and the userland rw_unlock() became the kernel rw_exit(). This caused a regression because of an assertion in rw_enter() to catch recursive locking. That is permitted in userland, but not in the kernel. Consequently, the ztest code itself does recursive read locking. The use of the emulated kernel interfaces consequently caused the following failure: ztest: ../../lib/libzpool/kernel.c:384: Assertion `rwlp->rw_owner != zk_thread_current() (0x1c87150 != 0x1c87150)' failed. That occurs because ztest_dmu_objset_create_destroy() will take a read lock and call ztest_dmu_object_alloc_free(). That will call ztest_io(), which will take a readlock only when asked to do ZTEST_IO_REWRITE. This triggered the assertion. The pthreads rwlock interface was based on the LWP rwlock interface implemented in Illumos libc. Luckily enough, the subset used by ztest is almost identical, so we can solve this problem by switching to the LWP thread rwlock interface in ztest. This eliminates a point of divergence with Illumos and should make code sharing slightly easier. Signed-off-by: Richard Yao Signed-off-by: Brian Behlendorf Closes #1970 --- cmd/ztest/ztest.c | 101 +++++++++++++++++++++++----------------------- 1 file changed, 50 insertions(+), 51 deletions(-) diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index d392a625e9..05c5dbe2c8 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -275,7 +275,7 @@ typedef struct ztest_od { typedef struct ztest_ds { ztest_shared_ds_t *zd_shared; objset_t *zd_os; - krwlock_t zd_zilog_lock; + rwlock_t zd_zilog_lock; zilog_t *zd_zilog; ztest_od_t *zd_od; /* debugging aid */ char zd_name[MAXNAMELEN]; @@ -429,7 +429,7 @@ static kmutex_t ztest_vdev_lock; * this lock as writer. Grabbing the lock as reader will ensure that the * namespace does not change while the lock is held. */ -static krwlock_t ztest_name_lock; +static rwlock_t ztest_name_lock; static boolean_t ztest_dump_core = B_TRUE; static boolean_t ztest_exiting; @@ -1205,7 +1205,7 @@ ztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os) if (zd->zd_shared != NULL) zd->zd_shared->zd_seq = 0; - rw_init(&zd->zd_zilog_lock, NULL, RW_DEFAULT, NULL); + VERIFY(rwlock_init(&zd->zd_zilog_lock, USYNC_THREAD, NULL) == 0); mutex_init(&zd->zd_dirobj_lock, NULL, MUTEX_DEFAULT, NULL); for (l = 0; l < ZTEST_OBJECT_LOCKS; l++) @@ -1221,7 +1221,7 @@ ztest_zd_fini(ztest_ds_t *zd) int l; mutex_destroy(&zd->zd_dirobj_lock); - rw_destroy(&zd->zd_zilog_lock); + (void) rwlock_destroy(&zd->zd_zilog_lock); for (l = 0; l < ZTEST_OBJECT_LOCKS; l++) ztest_rll_destroy(&zd->zd_object_lock[l]); @@ -2203,7 +2203,7 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) if (ztest_random(2) == 0) io_type = ZTEST_IO_WRITE_TAG; - (void) rw_enter(&zd->zd_zilog_lock, RW_READER); + (void) rw_rdlock(&zd->zd_zilog_lock); switch (io_type) { @@ -2242,7 +2242,7 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) break; case ZTEST_IO_REWRITE: - (void) rw_enter(&ztest_name_lock, RW_READER); + (void) rw_rdlock(&ztest_name_lock); err = ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_CHECKSUM, spa_dedup_checksum(ztest_spa), B_FALSE); @@ -2252,7 +2252,7 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), B_FALSE); VERIFY(err == 0 || err == ENOSPC); - (void) rw_exit(&ztest_name_lock); + (void) rw_unlock(&ztest_name_lock); VERIFY0(dmu_read(zd->zd_os, object, offset, blocksize, data, DMU_READ_NO_PREFETCH)); @@ -2261,7 +2261,7 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) break; } - (void) rw_exit(&zd->zd_zilog_lock); + (void) rw_unlock(&zd->zd_zilog_lock); umem_free(data, blocksize); } @@ -2317,7 +2317,7 @@ ztest_zil_commit(ztest_ds_t *zd, uint64_t id) { zilog_t *zilog = zd->zd_zilog; - (void) rw_enter(&zd->zd_zilog_lock, RW_READER); + (void) rw_rdlock(&zd->zd_zilog_lock); zil_commit(zilog, ztest_random(ZTEST_OBJECTS)); @@ -2332,7 +2332,7 @@ ztest_zil_commit(ztest_ds_t *zd, uint64_t id) zd->zd_shared->zd_seq = zilog->zl_commit_lr_seq; mutex_exit(&zilog->zl_lock); - (void) rw_exit(&zd->zd_zilog_lock); + (void) rw_unlock(&zd->zd_zilog_lock); } /* @@ -2352,7 +2352,7 @@ ztest_zil_remount(ztest_ds_t *zd, uint64_t id) * zd_zilog_lock to block any I/O. */ mutex_enter(&zd->zd_dirobj_lock); - (void) rw_enter(&zd->zd_zilog_lock, RW_WRITER); + (void) rw_wrlock(&zd->zd_zilog_lock); /* zfs_sb_teardown() */ zil_close(zd->zd_zilog); @@ -2361,7 +2361,7 @@ ztest_zil_remount(ztest_ds_t *zd, uint64_t id) VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog); zil_replay(os, zd, ztest_replay_vector); - (void) rw_exit(&zd->zd_zilog_lock); + (void) rw_unlock(&zd->zd_zilog_lock); mutex_exit(&zd->zd_dirobj_lock); } @@ -2397,7 +2397,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) * Attempt to create an existing pool. It shouldn't matter * what's in the nvroot; we should fail with EEXIST. */ - (void) rw_enter(&ztest_name_lock, RW_READER); + (void) rw_rdlock(&ztest_name_lock); nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL)); nvlist_free(nvroot); @@ -2405,7 +2405,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool)); spa_close(spa, FTAG); - (void) rw_exit(&ztest_name_lock); + (void) rw_unlock(&ztest_name_lock); } /* ARGSUSED */ @@ -2557,9 +2557,9 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) * dsl_destroy_head() to fail with EBUSY thus * leaving the dataset in an inconsistent state. */ - rw_enter(&ztest_name_lock, RW_WRITER); + rw_wrlock(&ztest_name_lock); error = spa_vdev_remove(spa, guid, B_FALSE); - rw_exit(&ztest_name_lock); + rw_unlock(&ztest_name_lock); if (error && error != EEXIST) fatal(0, "spa_vdev_remove() = %d", error); @@ -2750,9 +2750,9 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id) spa_config_exit(spa, SCL_VDEV, FTAG); - (void) rw_enter(&ztest_name_lock, RW_WRITER); + (void) rw_wrlock(&ztest_name_lock); error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE); - (void) rw_exit(&ztest_name_lock); + (void) rw_unlock(&ztest_name_lock); nvlist_free(config); @@ -3314,7 +3314,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) zdtmp = umem_alloc(sizeof (ztest_ds_t), UMEM_NOFAIL); name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); - (void) rw_enter(&ztest_name_lock, RW_READER); + (void) rw_rdlock(&ztest_name_lock); (void) snprintf(name, MAXNAMELEN, "%s/temp_%llu", ztest_opts.zo_pool, (u_longlong_t)id); @@ -3400,7 +3400,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) dmu_objset_disown(os, FTAG); ztest_zd_fini(zdtmp); out: - (void) rw_exit(&ztest_name_lock); + (void) rw_unlock(&ztest_name_lock); umem_free(name, MAXNAMELEN); umem_free(zdtmp, sizeof (ztest_ds_t)); @@ -3412,10 +3412,10 @@ out: void ztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id) { - (void) rw_enter(&ztest_name_lock, RW_READER); + (void) rw_rdlock(&ztest_name_lock); (void) ztest_snapshot_destroy(zd->zd_name, id); (void) ztest_snapshot_create(zd->zd_name, id); - (void) rw_exit(&ztest_name_lock); + (void) rw_unlock(&ztest_name_lock); } /* @@ -3492,7 +3492,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) clone2name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); snap3name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); - (void) rw_enter(&ztest_name_lock, RW_READER); + (void) rw_rdlock(&ztest_name_lock); ztest_dsl_dataset_cleanup(osname, id); @@ -3564,7 +3564,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) out: ztest_dsl_dataset_cleanup(osname, id); - (void) rw_exit(&ztest_name_lock); + (void) rw_unlock(&ztest_name_lock); umem_free(snap1name, MAXNAMELEN); umem_free(clone1name, MAXNAMELEN); @@ -4725,13 +4725,13 @@ ztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id) }; int p; - (void) rw_enter(&ztest_name_lock, RW_READER); + (void) rw_rdlock(&ztest_name_lock); for (p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++) (void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p], ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2)); - (void) rw_exit(&ztest_name_lock); + (void) rw_unlock(&ztest_name_lock); } /* ARGSUSED */ @@ -4740,7 +4740,7 @@ ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id) { nvlist_t *props = NULL; - (void) rw_enter(&ztest_name_lock, RW_READER); + (void) rw_rdlock(&ztest_name_lock); (void) ztest_spa_prop_set_uint64(ZPOOL_PROP_DEDUPDITTO, ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN)); @@ -4752,7 +4752,7 @@ ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id) nvlist_free(props); - (void) rw_exit(&ztest_name_lock); + (void) rw_unlock(&ztest_name_lock); } static int @@ -4787,7 +4787,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) char osname[MAXNAMELEN]; nvlist_t *holds; - (void) rw_enter(&ztest_name_lock, RW_READER); + (void) rw_rdlock(&ztest_name_lock); dmu_objset_name(os, osname); @@ -4888,7 +4888,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT); out: - (void) rw_exit(&ztest_name_lock); + (void) rw_unlock(&ztest_name_lock); } /* @@ -4933,7 +4933,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) * they are in progress (i.e. spa_change_guid). Those * operations will have grabbed the name lock as writer. */ - (void) rw_enter(&ztest_name_lock, RW_READER); + (void) rw_rdlock(&ztest_name_lock); /* * We need SCL_STATE here because we're going to look at vd0->vdev_tsd. @@ -5002,7 +5002,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) if (sav->sav_count == 0) { spa_config_exit(spa, SCL_STATE, FTAG); - (void) rw_exit(&ztest_name_lock); + (void) rw_unlock(&ztest_name_lock); goto out; } vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)]; @@ -5016,7 +5016,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) } spa_config_exit(spa, SCL_STATE, FTAG); - (void) rw_exit(&ztest_name_lock); + (void) rw_unlock(&ztest_name_lock); /* * If we can tolerate two or more faults, or we're dealing @@ -5036,13 +5036,12 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) * leaving the dataset in an inconsistent state. */ if (islog) - (void) rw_enter(&ztest_name_lock, - RW_WRITER); + (void) rw_wrlock(&ztest_name_lock); VERIFY(vdev_offline(spa, guid0, flags) != EBUSY); if (islog) - (void) rw_exit(&ztest_name_lock); + (void) rw_unlock(&ztest_name_lock); } else { /* * Ideally we would like to be able to randomly @@ -5138,13 +5137,13 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) * Take the name lock as writer to prevent anyone else from changing * the pool and dataset properies we need to maintain during this test. */ - (void) rw_enter(&ztest_name_lock, RW_WRITER); + (void) rw_wrlock(&ztest_name_lock); if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum, B_FALSE) != 0 || ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1, B_FALSE) != 0) { - (void) rw_exit(&ztest_name_lock); + (void) rw_unlock(&ztest_name_lock); umem_free(od, sizeof (ztest_od_t)); return; } @@ -5159,7 +5158,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) dmu_tx_hold_write(tx, object, 0, copies * blocksize); txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); if (txg == 0) { - (void) rw_exit(&ztest_name_lock); + (void) rw_unlock(&ztest_name_lock); umem_free(od, sizeof (ztest_od_t)); return; } @@ -5208,7 +5207,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) zio_buf_free(buf, psize); - (void) rw_exit(&ztest_name_lock); + (void) rw_unlock(&ztest_name_lock); umem_free(od, sizeof (ztest_od_t)); } @@ -5240,9 +5239,9 @@ ztest_reguid(ztest_ds_t *zd, uint64_t id) orig = spa_guid(spa); load = spa_load_guid(spa); - (void) rw_enter(&ztest_name_lock, RW_WRITER); + (void) rw_wrlock(&ztest_name_lock); error = spa_change_guid(spa); - (void) rw_exit(&ztest_name_lock); + (void) rw_unlock(&ztest_name_lock); if (error != 0) return; @@ -5266,7 +5265,7 @@ ztest_spa_rename(ztest_ds_t *zd, uint64_t id) char *oldname, *newname; spa_t *spa; - (void) rw_enter(&ztest_name_lock, RW_WRITER); + (void) rw_wrlock(&ztest_name_lock); oldname = ztest_opts.zo_pool; newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL); @@ -5306,7 +5305,7 @@ ztest_spa_rename(ztest_ds_t *zd, uint64_t id) umem_free(newname, strlen(newname) + 1); - (void) rw_exit(&ztest_name_lock); + (void) rw_unlock(&ztest_name_lock); } /* @@ -5636,18 +5635,18 @@ ztest_dataset_open(int d) ztest_dataset_name(name, ztest_opts.zo_pool, d); - (void) rw_enter(&ztest_name_lock, RW_READER); + (void) rw_rdlock(&ztest_name_lock); error = ztest_dataset_create(name); if (error == ENOSPC) { - (void) rw_exit(&ztest_name_lock); + (void) rw_unlock(&ztest_name_lock); ztest_record_enospc(FTAG); return (error); } ASSERT(error == 0 || error == EEXIST); VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os)); - (void) rw_exit(&ztest_name_lock); + (void) rw_unlock(&ztest_name_lock); ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os); @@ -5712,7 +5711,7 @@ ztest_run(ztest_shared_t *zs) * Initialize parent/child shared state. */ mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL); - rw_init(&ztest_name_lock, NULL, RW_DEFAULT, NULL); + VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0); zs->zs_thread_start = gethrtime(); zs->zs_thread_stop = @@ -5879,7 +5878,7 @@ ztest_run(ztest_shared_t *zs) list_destroy(&zcl.zcl_callbacks); mutex_destroy(&zcl.zcl_callbacks_lock); - rw_destroy(&ztest_name_lock); + (void) rwlock_destroy(&ztest_name_lock); mutex_destroy(&ztest_vdev_lock); } @@ -6012,7 +6011,7 @@ ztest_init(ztest_shared_t *zs) int i; mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL); - rw_init(&ztest_name_lock, NULL, RW_DEFAULT, NULL); + VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0); kernel_init(FREAD | FWRITE); @@ -6050,7 +6049,7 @@ ztest_init(ztest_shared_t *zs) ztest_run_zdb(ztest_opts.zo_pool); - rw_destroy(&ztest_name_lock); + (void) rwlock_destroy(&ztest_name_lock); mutex_destroy(&ztest_vdev_lock); }