ztest: Switch to LWP rwlock interface

ztest is intended to subject the ZFS code in userland to stress that it
should be able to withstand. Any failures that occur when running it are
failures that likely would occur inside the kernel. However, being in
userland, it is much easier to debug them. In practice, this prevents
a large number of problems from reaching production code.

A design decision was made by the original authors of ztest to make a
distinction between userland locking primitives and kernel locking
primitives. The ztest code itself calls userland locking primitives
while the kernel code being run in userland will call emulated kernel
locking primitives that wrap the userland locking primitives.

When ztest was first ported to Linux, a decision was made to use the
emulated kernel interfaces everywhere. In effect, the userland
rw_rdlock()/rw_wrlock() became the kernel rw_enter() and and the userland
rw_unlock() became the kernel rw_exit(). This caused a regression
because of an assertion in rw_enter() to catch recursive locking. That
is permitted in userland, but not in the kernel. Consequently, the ztest
code itself does recursive read locking. The use of the emulated kernel
interfaces consequently caused the following failure:

ztest: ../../lib/libzpool/kernel.c:384: Assertion `rwlp->rw_owner !=
zk_thread_current() (0x1c87150 != 0x1c87150)' failed.

That occurs because ztest_dmu_objset_create_destroy() will take a read
lock and call ztest_dmu_object_alloc_free(). That will call ztest_io(),
which will take a readlock only when asked to do ZTEST_IO_REWRITE. This
triggered the assertion.

The pthreads rwlock interface was based on the LWP rwlock interface
implemented in Illumos libc. Luckily enough, the subset used by ztest is
almost identical, so we can solve this problem by switching to the LWP
thread rwlock interface in ztest. This eliminates a point of divergence
with Illumos and should make code sharing slightly easier.

Signed-off-by: Richard Yao <ryao@gentoo.org>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #1970
This commit is contained in:
Richard Yao 2014-04-30 12:51:28 -04:00 committed by Brian Behlendorf
parent 3af3df905f
commit 7809eb8b65
1 changed files with 50 additions and 51 deletions

View File

@ -275,7 +275,7 @@ typedef struct ztest_od {
typedef struct ztest_ds {
ztest_shared_ds_t *zd_shared;
objset_t *zd_os;
krwlock_t zd_zilog_lock;
rwlock_t zd_zilog_lock;
zilog_t *zd_zilog;
ztest_od_t *zd_od; /* debugging aid */
char zd_name[MAXNAMELEN];
@ -429,7 +429,7 @@ static kmutex_t ztest_vdev_lock;
* this lock as writer. Grabbing the lock as reader will ensure that the
* namespace does not change while the lock is held.
*/
static krwlock_t ztest_name_lock;
static rwlock_t ztest_name_lock;
static boolean_t ztest_dump_core = B_TRUE;
static boolean_t ztest_exiting;
@ -1205,7 +1205,7 @@ ztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os)
if (zd->zd_shared != NULL)
zd->zd_shared->zd_seq = 0;
rw_init(&zd->zd_zilog_lock, NULL, RW_DEFAULT, NULL);
VERIFY(rwlock_init(&zd->zd_zilog_lock, USYNC_THREAD, NULL) == 0);
mutex_init(&zd->zd_dirobj_lock, NULL, MUTEX_DEFAULT, NULL);
for (l = 0; l < ZTEST_OBJECT_LOCKS; l++)
@ -1221,7 +1221,7 @@ ztest_zd_fini(ztest_ds_t *zd)
int l;
mutex_destroy(&zd->zd_dirobj_lock);
rw_destroy(&zd->zd_zilog_lock);
(void) rwlock_destroy(&zd->zd_zilog_lock);
for (l = 0; l < ZTEST_OBJECT_LOCKS; l++)
ztest_rll_destroy(&zd->zd_object_lock[l]);
@ -2203,7 +2203,7 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
if (ztest_random(2) == 0)
io_type = ZTEST_IO_WRITE_TAG;
(void) rw_enter(&zd->zd_zilog_lock, RW_READER);
(void) rw_rdlock(&zd->zd_zilog_lock);
switch (io_type) {
@ -2242,7 +2242,7 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
break;
case ZTEST_IO_REWRITE:
(void) rw_enter(&ztest_name_lock, RW_READER);
(void) rw_rdlock(&ztest_name_lock);
err = ztest_dsl_prop_set_uint64(zd->zd_name,
ZFS_PROP_CHECKSUM, spa_dedup_checksum(ztest_spa),
B_FALSE);
@ -2252,7 +2252,7 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
ztest_random_dsl_prop(ZFS_PROP_COMPRESSION),
B_FALSE);
VERIFY(err == 0 || err == ENOSPC);
(void) rw_exit(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
VERIFY0(dmu_read(zd->zd_os, object, offset, blocksize, data,
DMU_READ_NO_PREFETCH));
@ -2261,7 +2261,7 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
break;
}
(void) rw_exit(&zd->zd_zilog_lock);
(void) rw_unlock(&zd->zd_zilog_lock);
umem_free(data, blocksize);
}
@ -2317,7 +2317,7 @@ ztest_zil_commit(ztest_ds_t *zd, uint64_t id)
{
zilog_t *zilog = zd->zd_zilog;
(void) rw_enter(&zd->zd_zilog_lock, RW_READER);
(void) rw_rdlock(&zd->zd_zilog_lock);
zil_commit(zilog, ztest_random(ZTEST_OBJECTS));
@ -2332,7 +2332,7 @@ ztest_zil_commit(ztest_ds_t *zd, uint64_t id)
zd->zd_shared->zd_seq = zilog->zl_commit_lr_seq;
mutex_exit(&zilog->zl_lock);
(void) rw_exit(&zd->zd_zilog_lock);
(void) rw_unlock(&zd->zd_zilog_lock);
}
/*
@ -2352,7 +2352,7 @@ ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
* zd_zilog_lock to block any I/O.
*/
mutex_enter(&zd->zd_dirobj_lock);
(void) rw_enter(&zd->zd_zilog_lock, RW_WRITER);
(void) rw_wrlock(&zd->zd_zilog_lock);
/* zfs_sb_teardown() */
zil_close(zd->zd_zilog);
@ -2361,7 +2361,7 @@ ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog);
zil_replay(os, zd, ztest_replay_vector);
(void) rw_exit(&zd->zd_zilog_lock);
(void) rw_unlock(&zd->zd_zilog_lock);
mutex_exit(&zd->zd_dirobj_lock);
}
@ -2397,7 +2397,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
* Attempt to create an existing pool. It shouldn't matter
* what's in the nvroot; we should fail with EEXIST.
*/
(void) rw_enter(&ztest_name_lock, RW_READER);
(void) rw_rdlock(&ztest_name_lock);
nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL));
nvlist_free(nvroot);
@ -2405,7 +2405,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool));
spa_close(spa, FTAG);
(void) rw_exit(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
}
/* ARGSUSED */
@ -2557,9 +2557,9 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
* dsl_destroy_head() to fail with EBUSY thus
* leaving the dataset in an inconsistent state.
*/
rw_enter(&ztest_name_lock, RW_WRITER);
rw_wrlock(&ztest_name_lock);
error = spa_vdev_remove(spa, guid, B_FALSE);
rw_exit(&ztest_name_lock);
rw_unlock(&ztest_name_lock);
if (error && error != EEXIST)
fatal(0, "spa_vdev_remove() = %d", error);
@ -2750,9 +2750,9 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id)
spa_config_exit(spa, SCL_VDEV, FTAG);
(void) rw_enter(&ztest_name_lock, RW_WRITER);
(void) rw_wrlock(&ztest_name_lock);
error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE);
(void) rw_exit(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
nvlist_free(config);
@ -3314,7 +3314,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
zdtmp = umem_alloc(sizeof (ztest_ds_t), UMEM_NOFAIL);
name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL);
(void) rw_enter(&ztest_name_lock, RW_READER);
(void) rw_rdlock(&ztest_name_lock);
(void) snprintf(name, MAXNAMELEN, "%s/temp_%llu",
ztest_opts.zo_pool, (u_longlong_t)id);
@ -3400,7 +3400,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
dmu_objset_disown(os, FTAG);
ztest_zd_fini(zdtmp);
out:
(void) rw_exit(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
umem_free(name, MAXNAMELEN);
umem_free(zdtmp, sizeof (ztest_ds_t));
@ -3412,10 +3412,10 @@ out:
void
ztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id)
{
(void) rw_enter(&ztest_name_lock, RW_READER);
(void) rw_rdlock(&ztest_name_lock);
(void) ztest_snapshot_destroy(zd->zd_name, id);
(void) ztest_snapshot_create(zd->zd_name, id);
(void) rw_exit(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
}
/*
@ -3492,7 +3492,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
clone2name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL);
snap3name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL);
(void) rw_enter(&ztest_name_lock, RW_READER);
(void) rw_rdlock(&ztest_name_lock);
ztest_dsl_dataset_cleanup(osname, id);
@ -3564,7 +3564,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
out:
ztest_dsl_dataset_cleanup(osname, id);
(void) rw_exit(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
umem_free(snap1name, MAXNAMELEN);
umem_free(clone1name, MAXNAMELEN);
@ -4725,13 +4725,13 @@ ztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id)
};
int p;
(void) rw_enter(&ztest_name_lock, RW_READER);
(void) rw_rdlock(&ztest_name_lock);
for (p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++)
(void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p],
ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2));
(void) rw_exit(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
}
/* ARGSUSED */
@ -4740,7 +4740,7 @@ ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
{
nvlist_t *props = NULL;
(void) rw_enter(&ztest_name_lock, RW_READER);
(void) rw_rdlock(&ztest_name_lock);
(void) ztest_spa_prop_set_uint64(ZPOOL_PROP_DEDUPDITTO,
ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN));
@ -4752,7 +4752,7 @@ ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
nvlist_free(props);
(void) rw_exit(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
}
static int
@ -4787,7 +4787,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
char osname[MAXNAMELEN];
nvlist_t *holds;
(void) rw_enter(&ztest_name_lock, RW_READER);
(void) rw_rdlock(&ztest_name_lock);
dmu_objset_name(os, osname);
@ -4888,7 +4888,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT);
out:
(void) rw_exit(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
}
/*
@ -4933,7 +4933,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
* they are in progress (i.e. spa_change_guid). Those
* operations will have grabbed the name lock as writer.
*/
(void) rw_enter(&ztest_name_lock, RW_READER);
(void) rw_rdlock(&ztest_name_lock);
/*
* We need SCL_STATE here because we're going to look at vd0->vdev_tsd.
@ -5002,7 +5002,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
if (sav->sav_count == 0) {
spa_config_exit(spa, SCL_STATE, FTAG);
(void) rw_exit(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
goto out;
}
vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)];
@ -5016,7 +5016,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
}
spa_config_exit(spa, SCL_STATE, FTAG);
(void) rw_exit(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
/*
* If we can tolerate two or more faults, or we're dealing
@ -5036,13 +5036,12 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
* leaving the dataset in an inconsistent state.
*/
if (islog)
(void) rw_enter(&ztest_name_lock,
RW_WRITER);
(void) rw_wrlock(&ztest_name_lock);
VERIFY(vdev_offline(spa, guid0, flags) != EBUSY);
if (islog)
(void) rw_exit(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
} else {
/*
* Ideally we would like to be able to randomly
@ -5138,13 +5137,13 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
* Take the name lock as writer to prevent anyone else from changing
* the pool and dataset properies we need to maintain during this test.
*/
(void) rw_enter(&ztest_name_lock, RW_WRITER);
(void) rw_wrlock(&ztest_name_lock);
if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum,
B_FALSE) != 0 ||
ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1,
B_FALSE) != 0) {
(void) rw_exit(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
umem_free(od, sizeof (ztest_od_t));
return;
}
@ -5159,7 +5158,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
dmu_tx_hold_write(tx, object, 0, copies * blocksize);
txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
if (txg == 0) {
(void) rw_exit(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
umem_free(od, sizeof (ztest_od_t));
return;
}
@ -5208,7 +5207,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
zio_buf_free(buf, psize);
(void) rw_exit(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
umem_free(od, sizeof (ztest_od_t));
}
@ -5240,9 +5239,9 @@ ztest_reguid(ztest_ds_t *zd, uint64_t id)
orig = spa_guid(spa);
load = spa_load_guid(spa);
(void) rw_enter(&ztest_name_lock, RW_WRITER);
(void) rw_wrlock(&ztest_name_lock);
error = spa_change_guid(spa);
(void) rw_exit(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
if (error != 0)
return;
@ -5266,7 +5265,7 @@ ztest_spa_rename(ztest_ds_t *zd, uint64_t id)
char *oldname, *newname;
spa_t *spa;
(void) rw_enter(&ztest_name_lock, RW_WRITER);
(void) rw_wrlock(&ztest_name_lock);
oldname = ztest_opts.zo_pool;
newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL);
@ -5306,7 +5305,7 @@ ztest_spa_rename(ztest_ds_t *zd, uint64_t id)
umem_free(newname, strlen(newname) + 1);
(void) rw_exit(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
}
/*
@ -5636,18 +5635,18 @@ ztest_dataset_open(int d)
ztest_dataset_name(name, ztest_opts.zo_pool, d);
(void) rw_enter(&ztest_name_lock, RW_READER);
(void) rw_rdlock(&ztest_name_lock);
error = ztest_dataset_create(name);
if (error == ENOSPC) {
(void) rw_exit(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
ztest_record_enospc(FTAG);
return (error);
}
ASSERT(error == 0 || error == EEXIST);
VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os));
(void) rw_exit(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os);
@ -5712,7 +5711,7 @@ ztest_run(ztest_shared_t *zs)
* Initialize parent/child shared state.
*/
mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
rw_init(&ztest_name_lock, NULL, RW_DEFAULT, NULL);
VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0);
zs->zs_thread_start = gethrtime();
zs->zs_thread_stop =
@ -5879,7 +5878,7 @@ ztest_run(ztest_shared_t *zs)
list_destroy(&zcl.zcl_callbacks);
mutex_destroy(&zcl.zcl_callbacks_lock);
rw_destroy(&ztest_name_lock);
(void) rwlock_destroy(&ztest_name_lock);
mutex_destroy(&ztest_vdev_lock);
}
@ -6012,7 +6011,7 @@ ztest_init(ztest_shared_t *zs)
int i;
mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
rw_init(&ztest_name_lock, NULL, RW_DEFAULT, NULL);
VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0);
kernel_init(FREAD | FWRITE);
@ -6050,7 +6049,7 @@ ztest_init(ztest_shared_t *zs)
ztest_run_zdb(ztest_opts.zo_pool);
rw_destroy(&ztest_name_lock);
(void) rwlock_destroy(&ztest_name_lock);
mutex_destroy(&ztest_vdev_lock);
}