diff --git a/config/Rules.am b/config/Rules.am index af20ddb396..20a21e972b 100644 --- a/config/Rules.am +++ b/config/Rules.am @@ -7,7 +7,8 @@ AM_CFLAGS += ${NO_BOOL_COMPARE} AM_CFLAGS += -fno-strict-aliasing AM_CPPFLAGS = -D_GNU_SOURCE -D__EXTENSIONS__ -D_REENTRANT AM_CPPFLAGS += -D_POSIX_PTHREAD_SEMANTICS -D_FILE_OFFSET_BITS=64 -AM_CPPFLAGS += -D_LARGEFILE64_SOURCE -DTEXT_DOMAIN=\"zfs-linux-user\" +AM_CPPFLAGS += -D_LARGEFILE64_SOURCE -DHAVE_LARGE_STACKS=1 +AM_CPPFLAGS += -DTEXT_DOMAIN=\"zfs-linux-user\" AM_CPPFLAGS += -DLIBEXECDIR=\"$(libexecdir)\" AM_CPPFLAGS += -DRUNSTATEDIR=\"$(runstatedir)\" AM_CPPFLAGS += -DSBINDIR=\"$(sbindir)\" diff --git a/config/kernel.m4 b/config/kernel.m4 index 30c9b84d22..ae4bed9a7b 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -473,9 +473,35 @@ AC_DEFUN([ZFS_AC_KERNEL_CONFIG], [ ]) ]) + ZFS_AC_KERNEL_CONFIG_THREAD_SIZE ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC ]) +dnl # +dnl # Check configured THREAD_SIZE +dnl # +dnl # The stack size will vary by architecture, but as of Linux 3.15 on x86_64 +dnl # the default thread stack size was increased to 16K from 8K. Therefore, +dnl # on newer kernels and some architectures stack usage optimizations can be +dnl # conditionally applied to improve performance without negatively impacting +dnl # stability. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_CONFIG_THREAD_SIZE], [ + AC_MSG_CHECKING([whether kernel was built with 16K or larger stacks]) + ZFS_LINUX_TRY_COMPILE([ + #include + ],[ + #if (THREAD_SIZE < 16384) + #error "THREAD_SIZE is less than 16K" + #endif + ],[ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_LARGE_STACKS, 1, [kernel has large stacks]) + ],[ + AC_MSG_RESULT([no]) + ]) +]) + dnl # dnl # Check CONFIG_DEBUG_LOCK_ALLOC dnl # @@ -585,7 +611,7 @@ dnl # dnl # ZFS_LINUX_CONFIG dnl # AC_DEFUN([ZFS_LINUX_CONFIG], - [AC_MSG_CHECKING([whether Linux was built with CONFIG_$1]) + [AC_MSG_CHECKING([whether kernel was built with CONFIG_$1]) ZFS_LINUX_TRY_COMPILE([ #include ],[ diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index 6a349c6600..940454977c 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -69,7 +69,7 @@ typedef struct dump_bytes_io { } dump_bytes_io_t; static void -dump_bytes_strategy(void *arg) +dump_bytes_cb(void *arg) { dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg; dmu_sendarg_t *dsp = dbi->dbi_dsp; @@ -96,6 +96,9 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf, int len) dbi.dbi_buf = buf; dbi.dbi_len = len; +#if defined(HAVE_LARGE_STACKS) + dump_bytes_cb(&dbi); +#else /* * The vn_rdwr() call is performed in a taskq to ensure that there is * always enough stack space to write safely to the target filesystem. @@ -103,7 +106,8 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf, int len) * them and they are used in vdev_file.c for a similar purpose. */ spa_taskq_dispatch_sync(dmu_objset_spa(dsp->dsa_os), ZIO_TYPE_FREE, - ZIO_TASKQ_ISSUE, dump_bytes_strategy, &dbi, TQ_SLEEP); + ZIO_TASKQ_ISSUE, dump_bytes_cb, &dbi, TQ_SLEEP); +#endif /* HAVE_LARGE_STACKS */ return (dsp->dsa_err); } diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 794439bbe7..2bc88c52c9 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -1415,6 +1415,31 @@ zio_execute(zio_t *zio) spl_fstrans_unmark(cookie); } +/* + * Used to determine if in the current context the stack is sized large + * enough to allow zio_execute() to be called recursively. A minimum + * stack size of 16K is required to avoid needing to re-dispatch the zio. + */ +boolean_t +zio_execute_stack_check(zio_t *zio) +{ +#if !defined(HAVE_LARGE_STACKS) + dsl_pool_t *dp = spa_get_dsl(zio->io_spa); + + /* Executing in txg_sync_thread() context. */ + if (dp && curthread == dp->dp_tx.tx_sync_thread) + return (B_TRUE); + + /* Pool initialization outside of zio_taskq context. */ + if (dp && spa_is_initializing(dp->dp_spa) && + !zio_taskq_member(zio, ZIO_TASKQ_ISSUE) && + !zio_taskq_member(zio, ZIO_TASKQ_ISSUE_HIGH)) + return (B_TRUE); +#endif /* HAVE_LARGE_STACKS */ + + return (B_FALSE); +} + __attribute__((always_inline)) static inline void __zio_execute(zio_t *zio) @@ -1424,8 +1449,6 @@ __zio_execute(zio_t *zio) while (zio->io_stage < ZIO_STAGE_DONE) { enum zio_stage pipeline = zio->io_pipeline; enum zio_stage stage = zio->io_stage; - dsl_pool_t *dp; - boolean_t cut; int rv; ASSERT(!MUTEX_HELD(&zio->io_lock)); @@ -1438,10 +1461,6 @@ __zio_execute(zio_t *zio) ASSERT(stage <= ZIO_STAGE_DONE); - dp = spa_get_dsl(zio->io_spa); - cut = (stage == ZIO_STAGE_VDEV_IO_START) ? - zio_requeue_io_start_cut_in_line : B_FALSE; - /* * If we are in interrupt context and this pipeline stage * will grab a config lock that is held across I/O, @@ -1453,21 +1472,19 @@ __zio_execute(zio_t *zio) */ if ((stage & ZIO_BLOCKING_STAGES) && zio->io_vd == NULL && zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) { + boolean_t cut = (stage == ZIO_STAGE_VDEV_IO_START) ? + zio_requeue_io_start_cut_in_line : B_FALSE; zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut); return; } /* - * If we executing in the context of the tx_sync_thread, - * or we are performing pool initialization outside of a - * zio_taskq[ZIO_TASKQ_ISSUE|ZIO_TASKQ_ISSUE_HIGH] context. - * Then issue the zio asynchronously to minimize stack usage - * for these deep call paths. + * If the current context doesn't have large enough stacks + * the zio must be issued asynchronously to prevent overflow. */ - if ((dp && curthread == dp->dp_tx.tx_sync_thread) || - (dp && spa_is_initializing(dp->dp_spa) && - !zio_taskq_member(zio, ZIO_TASKQ_ISSUE) && - !zio_taskq_member(zio, ZIO_TASKQ_ISSUE_HIGH))) { + if (zio_execute_stack_check(zio)) { + boolean_t cut = (stage == ZIO_STAGE_VDEV_IO_START) ? + zio_requeue_io_start_cut_in_line : B_FALSE; zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut); return; }