From 6fe1e37a5e9399eb24947e7b37ca7f3e276d8466 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 28 Jun 2010 23:45:08 -0700 Subject: [PATCH 1/2] Additional pthread related fixes for ztest There are 3 fixes in thie commit. First, update ztest_run() to store the thread id and not the address of the kthread_t. This will be freed on thread exit and is not safe to use. This is pretty close to how things were done in the original ztest code before I got there. Second, for extra paranoia update thread_exit() to return a special TS_MAGIC value via pthread_exit(). This value is then verified in pthread_join() to ensure the thread exited cleanly. This can be done cleanly because the kthread doesn't provide a return code mechanism we need to worry about. Third, replace the ztest deadman thread with a signal handler. We cannot use the previous approach because the correct behavior for pthreads is to wait for all threads to exit before terminating the process. Since the deadman thread won't call exit by design we end up hanging in kernel_exit(). To avoid this we just setup a SIGALRM signal handle and register a deadman alarm. IMHO this is simpler and cleaner anyway. --- cmd/ztest/ztest.c | 38 +++++++++++--------------- lib/libzpool/include/sys/zfs_context.h | 4 +-- lib/libzpool/kernel.c | 7 +++-- 3 files changed, 23 insertions(+), 26 deletions(-) diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 69086a34a2..5706055216 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -4904,21 +4904,12 @@ ztest_resume_thread(void *arg) return (NULL); } -static void * -ztest_deadman_thread(void *arg) +#define GRACE 300 + +static void +ztest_deadman_alarm(int sig) { - ztest_shared_t *zs = arg; - int grace = 300; - hrtime_t delta; - - delta = (zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + grace; - - (void) poll(NULL, 0, (int)(1000 * delta)); - - fatal(0, "failed to complete within %d seconds of deadline", grace); - thread_exit(); - - return (NULL); + fatal(0, "failed to complete within %d seconds of deadline", GRACE); } static void @@ -5106,7 +5097,7 @@ ztest_dataset_close(ztest_shared_t *zs, int d) static void ztest_run(ztest_shared_t *zs) { - kthread_t **tid; + kt_did_t *tid; spa_t *spa; kthread_t *resume_thread; uint64_t object; @@ -5158,10 +5149,10 @@ ztest_run(ztest_shared_t *zs) spa, TS_RUN, NULL, 0, 0)), !=, NULL); /* - * Create a deadman thread to abort() if we hang. + * Set a deadman alarm to abort() if we hang. */ - VERIFY3P(thread_create(NULL, 0, ztest_deadman_thread, zs, - TS_RUN, NULL, 0, 0), !=, NULL); + signal(SIGALRM, ztest_deadman_alarm); + alarm((zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + GRACE); /* * Verify that we can safely inquire about about any object, @@ -5187,7 +5178,7 @@ ztest_run(ztest_shared_t *zs) } zs->zs_enospc_count = 0; - tid = umem_zalloc(zopt_threads * sizeof (kthread_t *), UMEM_NOFAIL); + tid = umem_zalloc(zopt_threads * sizeof (kt_did_t), UMEM_NOFAIL); if (zopt_verbose >= 4) (void) printf("starting main threads...\n"); @@ -5196,11 +5187,14 @@ ztest_run(ztest_shared_t *zs) * Kick off all the tests that run in parallel. */ for (int t = 0; t < zopt_threads; t++) { + kthread_t *thread; + if (t < zopt_datasets && ztest_dataset_open(zs, t) != 0) return; - VERIFY3P(tid[t] = thread_create(NULL, 0, ztest_thread, + VERIFY3P(thread = thread_create(NULL, 0, ztest_thread, (void *)(uintptr_t)t, TS_RUN, NULL, 0, 0), !=, NULL); + tid[t] = thread->t_tid; } /* @@ -5208,7 +5202,7 @@ ztest_run(ztest_shared_t *zs) * so we don't close datasets while threads are still using them. */ for (int t = zopt_threads - 1; t >= 0; t--) { - thread_join(tid[t]->t_tid); + thread_join(tid[t]); if (t < zopt_datasets) ztest_dataset_close(zs, t); } @@ -5218,7 +5212,7 @@ ztest_run(ztest_shared_t *zs) zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa)); zs->zs_space = metaslab_class_get_space(spa_normal_class(spa)); - umem_free(tid, zopt_threads * sizeof (kthread_t *)); + umem_free(tid, zopt_threads * sizeof (kt_did_t)); /* Kill the resume thread */ ztest_exiting = B_TRUE; diff --git a/lib/libzpool/include/sys/zfs_context.h b/lib/libzpool/include/sys/zfs_context.h index e62c75a188..b0112e6ec4 100644 --- a/lib/libzpool/include/sys/zfs_context.h +++ b/lib/libzpool/include/sys/zfs_context.h @@ -197,7 +197,8 @@ _NOTE(CONSTCOND) } while (0) /* * Threads */ -#define TS_RUN 0x00000002 +#define TS_MAGIC 0x72f158ab4261e538ull +#define TS_RUN 0x00000002 #ifdef _linux_ #define STACK_SIZE 8192 /* Linux x86 and amd64 */ #else @@ -221,7 +222,6 @@ typedef struct kthread { void * t_arg; } kthread_t; -/* XXX tsd_create()/tsd_destroy() missing */ #define tsd_get(key) pthread_getspecific(key) #define tsd_set(key, val) pthread_setspecific(key, val) #define curthread zk_thread_current() diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index a992bec9ab..2c9d32be82 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -192,13 +192,16 @@ zk_thread_exit(void) pthread_mutex_unlock(&kthread_lock); pthread_cond_broadcast(&kthread_cond); - pthread_exit(NULL); + pthread_exit((void *)TS_MAGIC); } void zk_thread_join(kt_did_t tid) { - pthread_join((pthread_t)tid, NULL); + void *ret; + + pthread_join((pthread_t)tid, &ret); + VERIFY3P(ret, ==, (void *)TS_MAGIC); } /* From 7334572416255825f45a71447d78de710597dc54 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 29 Jun 2010 10:11:32 -0700 Subject: [PATCH 2/2] Reduce stack usage by dsl_dataset_destroy() Move dsl_dataset_t local variable from the stack to the heap. This reduces the stack usage of this function from 2048 bytes to 176 bytes for x84_64 arches. --- .topdeps | 1 + .topmsg | 8 ++++++++ module/zfs/dsl_dataset.c | 22 +++++++++++++--------- 3 files changed, 22 insertions(+), 9 deletions(-) create mode 100644 .topdeps create mode 100644 .topmsg diff --git a/.topdeps b/.topdeps new file mode 100644 index 0000000000..1f7391f92b --- /dev/null +++ b/.topdeps @@ -0,0 +1 @@ +master diff --git a/.topmsg b/.topmsg new file mode 100644 index 0000000000..14624fabbc --- /dev/null +++ b/.topmsg @@ -0,0 +1,8 @@ +From: Brian Behlendorf +Subject: [PATCH] fix stack dsl_dataset_destroy + +Move dsl_dataset_t local variable from the stack to the heap. +This reduces the stack usage of this function from 2048 bytes +to 176 bytes for x84_64 arches. + +Signed-off-by: Brian Behlendorf diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index ddd83576c6..0f7bc0f81a 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -1033,7 +1033,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) dsl_dir_t *dd; uint64_t obj; struct dsl_ds_destroyarg dsda = { 0 }; - dsl_dataset_t dummy_ds = { 0 }; + dsl_dataset_t *dummy_ds; dsda.ds = ds; @@ -1053,8 +1053,9 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) } dd = ds->ds_dir; - dummy_ds.ds_dir = dd; - dummy_ds.ds_object = ds->ds_object; + dummy_ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); + dummy_ds->ds_dir = dd; + dummy_ds->ds_object = ds->ds_object; /* * Check for errors and mark this ds as inconsistent, in @@ -1063,11 +1064,11 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, dsl_dataset_destroy_begin_sync, ds, NULL, 0); if (err) - goto out; + goto out_free; err = dmu_objset_from_ds(ds, &os); if (err) - goto out; + goto out_free; /* * remove the objects in open context, so that we won't @@ -1104,14 +1105,14 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) } if (err != ESRCH) - goto out; + goto out_free; rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); rw_exit(&dd->dd_pool->dp_config_rwlock); if (err) - goto out; + goto out_free; /* * Blow away the dsl_dir + head dataset. @@ -1127,7 +1128,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) err = dsl_dataset_origin_rm_prep(&dsda, tag); if (err) { dsl_dir_close(dd, FTAG); - goto out; + goto out_free; } } @@ -1135,7 +1136,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) dsl_sync_task_create(dstg, dsl_dataset_destroy_check, dsl_dataset_destroy_sync, &dsda, tag, 0); dsl_sync_task_create(dstg, dsl_dir_destroy_check, - dsl_dir_destroy_sync, &dummy_ds, FTAG, 0); + dsl_dir_destroy_sync, dummy_ds, FTAG, 0); err = dsl_sync_task_group_wait(dstg); dsl_sync_task_group_destroy(dstg); @@ -1158,6 +1159,9 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) /* if it is successful, dsl_dir_destroy_sync will close the dd */ if (err) dsl_dir_close(dd, FTAG); + +out_free: + kmem_free(dummy_ds, sizeof (dsl_dataset_t)); out: dsl_dataset_disown(ds, tag); return (err);