From 0c11631b026b2e9dd40cefb72313970cc1eb389c Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 28 Jun 2010 10:00:12 -0700 Subject: [PATCH 1/7] Fix for fix-stack-dmu_objset_snapshot should be 'sn' not '&sn' I missed a instanse of removing the & operator when reducing the stack usage in this function. This unfortunately doesn't cause a compile warning but it is does cause ztest failures. Anyway, update the topic branch to correct this mistake. --- module/zfs/dmu_objset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 32bb614c81..ba024584fa 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -913,7 +913,7 @@ dmu_objset_snapshot(char *fsname, char *snapname, if (dst->dst_err) dsl_dataset_name(ds, sn->failed); zil_resume(dmu_objset_zil(os)); - dmu_objset_rele(os, &sn); + dmu_objset_rele(os, sn); } if (err) From 6914386b8590fed6be580170481b1cf9016a59b6 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 28 Jun 2010 10:11:12 -0700 Subject: [PATCH 2/7] Never sleep under taskq_dispatch() There are cases where under Linux it is not safe to sleep in taskq_dispatch(). Rather than adding Linux specific code to detect these cases I opted to keep it simple and just never allow a sleep here. The impact of this should be minimal. --- module/zfs/zio.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/module/zfs/zio.c b/module/zfs/zio.c index db9bb65fdd..341f00306a 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -1048,10 +1048,7 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline) { spa_t *spa = zio->io_spa; zio_type_t t = zio->io_type; - int flags; - - flags = (cutinline ? TQ_FRONT : 0); - flags |= ((q == ZIO_TASKQ_INTERRUPT) ? TQ_NOSLEEP : TQ_SLEEP); + int flags = TQ_NOSLEEP | (cutinline ? TQ_FRONT : 0); /* * If we're a config writer or a probe, the normal issue and From a2e73b751657526c49af0e0598e656df0f877a1f Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Sun, 27 Jun 2010 15:06:49 -0700 Subject: [PATCH 3/7] Allow joinable threads There was previous discussion of a race with joinable threads but to be honest I can neither exactly remember the race, or recrease the issue. I believe it may have had to do with pthread_create() returning without having set kt->tid since this was done in the created thread. If that was the race then I've 'fixed' it by ensuring the thread id is set in the thread AND as the first pthread_create() argument. Why this wasn't done originally I'm not sure, with luck Ricardo remembers. Additionally, explicitly set a PAGESIZE guard frame at the end of the stack to aid in detecting stack overflow. And add some conditional logic to set STACK_SIZE correctly for Solaris. --- lib/libzpool/include/sys/zfs_context.h | 4 ++++ lib/libzpool/kernel.c | 25 ++++++++++--------------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/lib/libzpool/include/sys/zfs_context.h b/lib/libzpool/include/sys/zfs_context.h index db93a89b8b..e62c75a188 100644 --- a/lib/libzpool/include/sys/zfs_context.h +++ b/lib/libzpool/include/sys/zfs_context.h @@ -198,7 +198,11 @@ _NOTE(CONSTCOND) } while (0) * Threads */ #define TS_RUN 0x00000002 +#ifdef _linux_ #define STACK_SIZE 8192 /* Linux x86 and amd64 */ +#else +#define STACK_SIZE 24576 /* Solaris */ +#endif /* in libzpool, p0 exists only to have its address taken */ typedef struct proc { diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index 6f9e383a87..a992bec9ab 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -145,14 +145,9 @@ zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg, size_t len, proc_t *pp, int state, pri_t pri) { kthread_t *kt; - pthread_t tid; pthread_attr_t attr; size_t stack; - /* - * Due to a race when getting/setting the thread ID, currently only - * detached threads are supported. - */ ASSERT3S(state & ~TS_RUN, ==, 0); kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL); @@ -160,23 +155,23 @@ zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg, kt->t_arg = arg; /* - * The Solaris kernel stack size in x86/x64 is 8K, so we reduce the - * default stack size in userspace, for sanity checking. + * The Solaris kernel stack size is 24k for x86/x86_64. + * The Linux kernel stack size is 8k for x86/x86_64. * - * PTHREAD_STACK_MIN is the stack required for a NULL procedure in - * userspace. - * - * XXX: Stack size for other architectures is not being taken into - * account. + * We reduce the default stack size in userspace, to ensure + * we observe stack overruns in user space as well as in + * kernel space. PTHREAD_STACK_MIN is the minimum stack + * required for a NULL procedure in user space and is added + * in to the stack requirements. */ stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE); VERIFY3S(pthread_attr_init(&attr), ==, 0); VERIFY3S(pthread_attr_setstacksize(&attr, stack), ==, 0); - VERIFY3S(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED), - ==, 0); + VERIFY3S(pthread_attr_setguardsize(&attr, PAGESIZE), ==, 0); - VERIFY3S(pthread_create(&tid, &attr, &zk_thread_helper, kt), ==, 0); + VERIFY3S(pthread_create(&kt->t_tid, &attr, &zk_thread_helper, kt), + ==, 0); VERIFY3S(pthread_attr_destroy(&attr), ==, 0); From d6ea5e8cefb4e72e5c8065909ef75371aa2da6f2 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 25 Jun 2010 23:04:21 -0700 Subject: [PATCH 4/7] Additional ZVOL cleanup in zvol_set_volsize() The following cleanup was missed in the first pass when the ZVOL implementation was updated. An extra instance of a zvol_state_t was removed from the stack and the error handling was simplified. --- module/zfs/zvol.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 333b3fb9d1..ffbb29251c 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -275,8 +275,6 @@ zvol_set_volsize(const char *name, uint64_t volsize) zvol_state_t *zv; dmu_object_info_t doi; objset_t *os = NULL; - zvol_state_t state = { 0 }; - uint64_t old_volsize = 0ULL; uint64_t readonly; int error; @@ -284,24 +282,24 @@ zvol_set_volsize(const char *name, uint64_t volsize) zv = zvol_find_by_name(name); if (zv == NULL) { - error = dmu_objset_hold(name, FTAG, &os); - if (error) - goto out; - zv = &state; + error = ENXIO; + goto out; } + error = dmu_objset_hold(name, FTAG, &os); + if (error) + goto out; + + if ((error = dmu_object_info(os, ZVOL_OBJ, &doi)) != 0 || + (error = zvol_check_volsize(volsize,doi.doi_data_block_size)) != 0) + goto out; + VERIFY(dsl_prop_get_integer(name, "readonly", &readonly, NULL) == 0); if (readonly) { error = EROFS; goto out; } - old_volsize = zv->zv_volsize; - - if ((error = dmu_object_info(os, ZVOL_OBJ, &doi)) != 0 || - (error = zvol_check_volsize(volsize,doi.doi_data_block_size)) != 0) - goto out; - if (get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY)) { error = EROFS; goto out; From 5b1f2041e5093b0a5da88e0aa4e0e89745b53a18 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 17 Jun 2010 12:34:46 -0700 Subject: [PATCH 5/7] Silence 3 additional large kmem warnings The following are 3 cases where move than 2 pages are allocated with a kmem_alloc()... but not a lot more. For now we just disable the warning with KM_NODEBUG and this can be revisted latter to see if it's worth shrinking the allocation or perhaps moving it to a slab. --- module/zfs/spa_config.c | 4 ++-- module/zfs/spa_misc.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index cdeda3f93c..6fc4ad0a88 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -96,7 +96,7 @@ spa_config_load(void) if (kobj_get_filesize(file, &fsize) != 0) goto out; - buf = kmem_alloc(fsize, KM_SLEEP); + buf = kmem_alloc(fsize, KM_SLEEP | KM_NODEBUG); /* * Read the nvlist from the file. @@ -159,7 +159,7 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) */ VERIFY(nvlist_size(nvl, &buflen, NV_ENCODE_XDR) == 0); - buf = kmem_alloc(buflen, KM_SLEEP); + buf = kmem_alloc(buflen, KM_SLEEP | KM_NODEBUG); temp = kmem_zalloc(MAXPATHLEN, KM_SLEEP); VERIFY(nvlist_pack(nvl, &buf, &buflen, NV_ENCODE_XDR, diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 3999bf7032..660f25524b 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -437,7 +437,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) ASSERT(MUTEX_HELD(&spa_namespace_lock)); - spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP); + spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP | KM_NODEBUG); mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL); From 94f23a6856f0cf2157596129eeda65dff7820446 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 21 Jun 2010 21:26:25 -0700 Subject: [PATCH 6/7] Ensure NULL is not returned incorrectly after the first call. I'm surprised this was not caught long ago, but previous the code actually never did call the function twice so it was missed. Anyway, this fixes it. --- lib/libspl/getexecname.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/libspl/getexecname.c b/lib/libspl/getexecname.c index 43bf39ae33..c564eed055 100644 --- a/lib/libspl/getexecname.c +++ b/lib/libspl/getexecname.c @@ -48,6 +48,8 @@ getexecname(void) execname[rc] = '\0'; ptr = execname; } + } else { + ptr = execname; } pthread_mutex_unlock(&mtx); From 52bb0d8e75b0de049832b986fd06d6ca950fc4c5 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 21 Jun 2010 21:31:41 -0700 Subject: [PATCH 7/7] Ensure zio_bad_cksum_t is initialized This may not strictly be needed but it does keep gcc happy. We should keep our eye on this though if the extra bcopy significantly impacts performance. It may. --- module/zfs/vdev_raidz.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index 64c1564bfd..cbabb84020 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -1638,8 +1638,11 @@ raidz_checksum_verify(zio_t *zio) { zio_bad_cksum_t zbc; raidz_map_t *rm = zio->io_vsd; + int ret; - int ret = zio_checksum_error(zio, &zbc); + bzero(&zbc, sizeof (zio_bad_cksum_t)); + + ret = zio_checksum_error(zio, &zbc); if (ret != 0 && zbc.zbc_injected != 0) rm->rm_ecksuminjected = 1;