Additional pthread related fixes for ztest
There are 3 fixes in thie commit. First, update ztest_run() to store the thread id and not the address of the kthread_t. This will be freed on thread exit and is not safe to use. This is pretty close to how things were done in the original ztest code before I got there. Second, for extra paranoia update thread_exit() to return a special TS_MAGIC value via pthread_exit(). This value is then verified in pthread_join() to ensure the thread exited cleanly. This can be done cleanly because the kthread doesn't provide a return code mechanism we need to worry about. Third, replace the ztest deadman thread with a signal handler. We cannot use the previous approach because the correct behavior for pthreads is to wait for all threads to exit before terminating the process. Since the deadman thread won't call exit by design we end up hanging in kernel_exit(). To avoid this we just setup a SIGALRM signal handle and register a deadman alarm. IMHO this is simpler and cleaner anyway.
This commit is contained in:
parent
a2e73b7516
commit
6fe1e37a5e
|
@ -4904,21 +4904,12 @@ ztest_resume_thread(void *arg)
|
|||
return (NULL);
|
||||
}
|
||||
|
||||
static void *
|
||||
ztest_deadman_thread(void *arg)
|
||||
#define GRACE 300
|
||||
|
||||
static void
|
||||
ztest_deadman_alarm(int sig)
|
||||
{
|
||||
ztest_shared_t *zs = arg;
|
||||
int grace = 300;
|
||||
hrtime_t delta;
|
||||
|
||||
delta = (zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + grace;
|
||||
|
||||
(void) poll(NULL, 0, (int)(1000 * delta));
|
||||
|
||||
fatal(0, "failed to complete within %d seconds of deadline", grace);
|
||||
thread_exit();
|
||||
|
||||
return (NULL);
|
||||
fatal(0, "failed to complete within %d seconds of deadline", GRACE);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -5106,7 +5097,7 @@ ztest_dataset_close(ztest_shared_t *zs, int d)
|
|||
static void
|
||||
ztest_run(ztest_shared_t *zs)
|
||||
{
|
||||
kthread_t **tid;
|
||||
kt_did_t *tid;
|
||||
spa_t *spa;
|
||||
kthread_t *resume_thread;
|
||||
uint64_t object;
|
||||
|
@ -5158,10 +5149,10 @@ ztest_run(ztest_shared_t *zs)
|
|||
spa, TS_RUN, NULL, 0, 0)), !=, NULL);
|
||||
|
||||
/*
|
||||
* Create a deadman thread to abort() if we hang.
|
||||
* Set a deadman alarm to abort() if we hang.
|
||||
*/
|
||||
VERIFY3P(thread_create(NULL, 0, ztest_deadman_thread, zs,
|
||||
TS_RUN, NULL, 0, 0), !=, NULL);
|
||||
signal(SIGALRM, ztest_deadman_alarm);
|
||||
alarm((zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + GRACE);
|
||||
|
||||
/*
|
||||
* Verify that we can safely inquire about about any object,
|
||||
|
@ -5187,7 +5178,7 @@ ztest_run(ztest_shared_t *zs)
|
|||
}
|
||||
zs->zs_enospc_count = 0;
|
||||
|
||||
tid = umem_zalloc(zopt_threads * sizeof (kthread_t *), UMEM_NOFAIL);
|
||||
tid = umem_zalloc(zopt_threads * sizeof (kt_did_t), UMEM_NOFAIL);
|
||||
|
||||
if (zopt_verbose >= 4)
|
||||
(void) printf("starting main threads...\n");
|
||||
|
@ -5196,11 +5187,14 @@ ztest_run(ztest_shared_t *zs)
|
|||
* Kick off all the tests that run in parallel.
|
||||
*/
|
||||
for (int t = 0; t < zopt_threads; t++) {
|
||||
kthread_t *thread;
|
||||
|
||||
if (t < zopt_datasets && ztest_dataset_open(zs, t) != 0)
|
||||
return;
|
||||
|
||||
VERIFY3P(tid[t] = thread_create(NULL, 0, ztest_thread,
|
||||
VERIFY3P(thread = thread_create(NULL, 0, ztest_thread,
|
||||
(void *)(uintptr_t)t, TS_RUN, NULL, 0, 0), !=, NULL);
|
||||
tid[t] = thread->t_tid;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -5208,7 +5202,7 @@ ztest_run(ztest_shared_t *zs)
|
|||
* so we don't close datasets while threads are still using them.
|
||||
*/
|
||||
for (int t = zopt_threads - 1; t >= 0; t--) {
|
||||
thread_join(tid[t]->t_tid);
|
||||
thread_join(tid[t]);
|
||||
if (t < zopt_datasets)
|
||||
ztest_dataset_close(zs, t);
|
||||
}
|
||||
|
@ -5218,7 +5212,7 @@ ztest_run(ztest_shared_t *zs)
|
|||
zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
|
||||
zs->zs_space = metaslab_class_get_space(spa_normal_class(spa));
|
||||
|
||||
umem_free(tid, zopt_threads * sizeof (kthread_t *));
|
||||
umem_free(tid, zopt_threads * sizeof (kt_did_t));
|
||||
|
||||
/* Kill the resume thread */
|
||||
ztest_exiting = B_TRUE;
|
||||
|
|
|
@ -197,7 +197,8 @@ _NOTE(CONSTCOND) } while (0)
|
|||
/*
|
||||
* Threads
|
||||
*/
|
||||
#define TS_RUN 0x00000002
|
||||
#define TS_MAGIC 0x72f158ab4261e538ull
|
||||
#define TS_RUN 0x00000002
|
||||
#ifdef _linux_
|
||||
#define STACK_SIZE 8192 /* Linux x86 and amd64 */
|
||||
#else
|
||||
|
@ -221,7 +222,6 @@ typedef struct kthread {
|
|||
void * t_arg;
|
||||
} kthread_t;
|
||||
|
||||
/* XXX tsd_create()/tsd_destroy() missing */
|
||||
#define tsd_get(key) pthread_getspecific(key)
|
||||
#define tsd_set(key, val) pthread_setspecific(key, val)
|
||||
#define curthread zk_thread_current()
|
||||
|
|
|
@ -192,13 +192,16 @@ zk_thread_exit(void)
|
|||
pthread_mutex_unlock(&kthread_lock);
|
||||
|
||||
pthread_cond_broadcast(&kthread_cond);
|
||||
pthread_exit(NULL);
|
||||
pthread_exit((void *)TS_MAGIC);
|
||||
}
|
||||
|
||||
void
|
||||
zk_thread_join(kt_did_t tid)
|
||||
{
|
||||
pthread_join((pthread_t)tid, NULL);
|
||||
void *ret;
|
||||
|
||||
pthread_join((pthread_t)tid, &ret);
|
||||
VERIFY3P(ret, ==, (void *)TS_MAGIC);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
Loading…
Reference in New Issue