From 009cc8e884a84aeebed612ab64c30f77eab38392 Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Tue, 18 Aug 2020 12:33:55 -0400 Subject: [PATCH] Make zc_nvlist_src_size limit tunable We limit the size of nvlists passed to the kernel so a user cannot make the kernel do an unreasonably large allocation. On FreeBSD this limit was 128 kiB, which turns out to be a bit too small when doing some operations involving a large number of datasets or snapshots, for example replication. Make this limit tunable, with a platform-specific auto default. Linux keeps its limit at KMALLOC_MAX_SIZE. FreeBSD uses 1/4 of the system limit on user wired memory, which allows it to scale depending on system configuration. Reviewed-by: Matt Macy Reviewed-by: Brian Behlendorf Signed-off-by: Ryan Moeller Issue #6572 Closes #10706 --- include/os/freebsd/spl/sys/ccompile.h | 2 -- include/sys/zfs_ioctl_impl.h | 2 ++ man/man5/zfs-module-parameters.5 | 17 +++++++++++++++++ module/os/freebsd/zfs/zfs_ioctl_os.c | 13 +++++++++++++ module/os/linux/zfs/zfs_ioctl_os.c | 9 +++++++++ module/zfs/zfs_ioctl.c | 12 ++++++++++-- 6 files changed, 51 insertions(+), 4 deletions(-) diff --git a/include/os/freebsd/spl/sys/ccompile.h b/include/os/freebsd/spl/sys/ccompile.h index c90813b040..7268bd1d73 100644 --- a/include/os/freebsd/spl/sys/ccompile.h +++ b/include/os/freebsd/spl/sys/ccompile.h @@ -162,8 +162,6 @@ extern "C" { #define O_RSYNC 0 #define O_DSYNC 0 -#define KMALLOC_MAX_SIZE MAXPHYS - #ifndef LOCORE #ifndef HAVE_RPC_TYPES typedef int bool_t; diff --git a/include/sys/zfs_ioctl_impl.h b/include/sys/zfs_ioctl_impl.h index 2da6dde877..787475cf39 100644 --- a/include/sys/zfs_ioctl_impl.h +++ b/include/sys/zfs_ioctl_impl.h @@ -25,6 +25,7 @@ extern kmutex_t zfsdev_state_lock; extern zfsdev_state_t *zfsdev_state_list; +extern unsigned long zfs_max_nvlist_src_size; typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *); typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *); @@ -80,6 +81,7 @@ void zfs_ioctl_register(const char *, zfs_ioc_t, zfs_ioc_func_t *, zfs_secpolicy_func_t *, zfs_ioc_namecheck_t, zfs_ioc_poolcheck_t, boolean_t, boolean_t, const zfs_ioc_key_t *, size_t); +uint64_t zfs_max_nvlist_src_size_os(void); void zfs_ioctl_init_os(void); boolean_t zfs_vfs_held(zfsvfs_t *); diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index c209acbe16..463e51acc8 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -1074,6 +1074,23 @@ pool import (only in read-only mode). Default value: \fB0\fR .RE +.sp +.ne 2 +.na +\fBzfs_max_nvlist_src_size\fR (ulong) +.ad +.RS 12n +Maximum size in bytes allowed to be passed as zc_nvlist_src_size for ioctls on +/dev/zfs. This prevents a user from causing the kernel to allocate an excessive +amount of memory. When the limit is exceeded, the ioctl fails with EINVAL and a +description of the error is sent to the zfs-dbgmsg log. This parameter should +not need to be touched under normal circumstances. On FreeBSD, the default is +based on the system limit on user wired memory. On Linux, the default is +\fBKMALLOC_MAX_SIZE\fR . +.sp +Default value: \fB0\fR (kernel decides) +.RE + .sp .ne 2 .na diff --git a/module/os/freebsd/zfs/zfs_ioctl_os.c b/module/os/freebsd/zfs/zfs_ioctl_os.c index 1f7cd791e5..0e0c16033b 100644 --- a/module/os/freebsd/zfs/zfs_ioctl_os.c +++ b/module/os/freebsd/zfs/zfs_ioctl_os.c @@ -35,9 +35,14 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include +#if __FreeBSD_version < 1201517 +#define vm_page_max_user_wired vm_page_max_wired +#endif + int zfs_vfs_ref(zfsvfs_t **zfvp) { @@ -133,6 +138,14 @@ zfs_ioc_nextboot(const char *unused, nvlist_t *innvl, nvlist_t *outnvl) return (error); } +uint64_t +zfs_max_nvlist_src_size_os(void) +{ + if (zfs_max_nvlist_src_size != 0) + return (zfs_max_nvlist_src_size); + + return (ptob(vm_page_max_user_wired) / 4); +} void zfs_ioctl_init_os(void) diff --git a/module/os/linux/zfs/zfs_ioctl_os.c b/module/os/linux/zfs/zfs_ioctl_os.c index 52c5ed8838..b88e0497d0 100644 --- a/module/os/linux/zfs/zfs_ioctl_os.c +++ b/module/os/linux/zfs/zfs_ioctl_os.c @@ -203,6 +203,15 @@ out: } +uint64_t +zfs_max_nvlist_src_size_os(void) +{ + if (zfs_max_nvlist_src_size != 0) + return (zfs_max_nvlist_src_size); + + return (KMALLOC_MAX_SIZE); +} + void zfs_ioctl_init_os(void) { diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index a58fd99ad5..463704c14e 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -225,8 +225,9 @@ zfsdev_state_t *zfsdev_state_list; /* * Limit maximum nvlist size. We don't want users passing in insane values * for zc->zc_nvlist_src_size, since we will need to allocate that much memory. + * Defaults to 0=auto which is handled by platform code. */ -#define MAX_NVLIST_SRC_SIZE KMALLOC_MAX_SIZE +unsigned long zfs_max_nvlist_src_size = 0; uint_t zfs_fsyncer_key; uint_t zfs_allow_log_key; @@ -7341,6 +7342,7 @@ zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag) int error, cmd; const zfs_ioc_vec_t *vec; char *saved_poolname = NULL; + uint64_t max_nvlist_src_size; size_t saved_poolname_len = 0; nvlist_t *innvl = NULL; fstrans_cookie_t cookie; @@ -7360,7 +7362,8 @@ zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag) return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL)); zc->zc_iflags = flag & FKIOCTL; - if (zc->zc_nvlist_src_size > MAX_NVLIST_SRC_SIZE) { + max_nvlist_src_size = zfs_max_nvlist_src_size_os(); + if (zc->zc_nvlist_src_size > max_nvlist_src_size) { /* * Make sure the user doesn't pass in an insane value for * zc_nvlist_src_size. We have to check, since we will end @@ -7593,3 +7596,8 @@ zfs_kmod_fini(void) tsd_destroy(&rrw_tsd_key); tsd_destroy(&zfs_allow_log_key); } + +/* BEGIN CSTYLED */ +ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, ULONG, ZMOD_RW, + "Maximum size in bytes allowed for src nvlist passed with ZFS ioctls"); +/* END CSTYLED */