2020-04-14 18:36:28 +00:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2020 iXsystems, Inc.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
|
|
|
#include <sys/param.h>
|
2020-10-08 16:37:56 +00:00
|
|
|
#include <sys/buf.h>
|
|
|
|
#include <sys/cmn_err.h>
|
2020-04-14 18:36:28 +00:00
|
|
|
#include <sys/conf.h>
|
2020-10-08 16:37:56 +00:00
|
|
|
#include <sys/dmu.h>
|
|
|
|
#include <sys/dmu_impl.h>
|
|
|
|
#include <sys/dmu_objset.h>
|
|
|
|
#include <sys/dmu_send.h>
|
|
|
|
#include <sys/dmu_tx.h>
|
|
|
|
#include <sys/dsl_bookmark.h>
|
|
|
|
#include <sys/dsl_crypt.h>
|
|
|
|
#include <sys/dsl_dataset.h>
|
|
|
|
#include <sys/dsl_deleg.h>
|
|
|
|
#include <sys/dsl_destroy.h>
|
|
|
|
#include <sys/dsl_dir.h>
|
|
|
|
#include <sys/dsl_prop.h>
|
|
|
|
#include <sys/dsl_scan.h>
|
|
|
|
#include <sys/dsl_userhold.h>
|
|
|
|
#include <sys/errno.h>
|
2020-04-14 18:36:28 +00:00
|
|
|
#include <sys/eventhandler.h>
|
2020-10-08 16:37:56 +00:00
|
|
|
#include <sys/file.h>
|
|
|
|
#include <sys/fm/util.h>
|
|
|
|
#include <sys/fs/zfs.h>
|
2020-04-14 18:36:28 +00:00
|
|
|
#include <sys/kernel.h>
|
2020-10-08 16:37:56 +00:00
|
|
|
#include <sys/kmem.h>
|
2020-04-14 18:36:28 +00:00
|
|
|
#include <sys/lock.h>
|
|
|
|
#include <sys/malloc.h>
|
2020-10-08 16:37:56 +00:00
|
|
|
#include <sys/mount.h>
|
2020-04-14 18:36:28 +00:00
|
|
|
#include <sys/mutex.h>
|
2020-10-08 16:37:56 +00:00
|
|
|
#include <sys/nvpair.h>
|
|
|
|
#include <sys/policy.h>
|
2020-04-14 18:36:28 +00:00
|
|
|
#include <sys/proc.h>
|
2020-10-08 16:37:56 +00:00
|
|
|
#include <sys/sdt.h>
|
2020-04-14 18:36:28 +00:00
|
|
|
#include <sys/spa.h>
|
|
|
|
#include <sys/spa_impl.h>
|
2020-10-08 16:37:56 +00:00
|
|
|
#include <sys/stat.h>
|
2020-04-14 18:36:28 +00:00
|
|
|
#include <sys/sunddi.h>
|
2020-10-08 16:37:56 +00:00
|
|
|
#include <sys/systm.h>
|
2020-04-14 18:36:28 +00:00
|
|
|
#include <sys/taskqueue.h>
|
2020-10-08 16:37:56 +00:00
|
|
|
#include <sys/uio.h>
|
|
|
|
#include <sys/vdev.h>
|
2020-04-14 18:36:28 +00:00
|
|
|
#include <sys/vdev_removal.h>
|
2020-10-08 16:37:56 +00:00
|
|
|
#include <sys/zap.h>
|
|
|
|
#include <sys/zcp.h>
|
|
|
|
#include <sys/zfeature.h>
|
2020-09-30 20:19:49 +00:00
|
|
|
#include <sys/zfs_context.h>
|
2020-10-08 16:37:56 +00:00
|
|
|
#include <sys/zfs_ctldir.h>
|
|
|
|
#include <sys/zfs_dir.h>
|
|
|
|
#include <sys/zfs_ioctl.h>
|
2020-04-14 18:36:28 +00:00
|
|
|
#include <sys/zfs_ioctl_compat.h>
|
|
|
|
#include <sys/zfs_ioctl_impl.h>
|
2020-10-08 16:37:56 +00:00
|
|
|
#include <sys/zfs_onexit.h>
|
|
|
|
#include <sys/zfs_vfsops.h>
|
|
|
|
#include <sys/zfs_znode.h>
|
|
|
|
#include <sys/zio_checksum.h>
|
|
|
|
#include <sys/zone.h>
|
|
|
|
#include <sys/zvol.h>
|
2020-04-14 18:36:28 +00:00
|
|
|
|
2020-10-08 16:37:56 +00:00
|
|
|
#include "zfs_comutil.h"
|
|
|
|
#include "zfs_deleg.h"
|
2020-04-14 18:36:28 +00:00
|
|
|
#include "zfs_namecheck.h"
|
|
|
|
#include "zfs_prop.h"
|
|
|
|
|
|
|
|
SYSCTL_DECL(_vfs_zfs);
|
|
|
|
SYSCTL_DECL(_vfs_zfs_vdev);
|
|
|
|
|
2020-09-30 20:19:49 +00:00
|
|
|
extern uint_t rrw_tsd_key;
|
2020-07-16 04:32:50 +00:00
|
|
|
static int zfs_version_ioctl = ZFS_IOCVER_OZFS;
|
2020-04-14 18:36:28 +00:00
|
|
|
SYSCTL_DECL(_vfs_zfs_version);
|
|
|
|
SYSCTL_INT(_vfs_zfs_version, OID_AUTO, ioctl, CTLFLAG_RD, &zfs_version_ioctl,
|
|
|
|
0, "ZFS_IOCTL_VERSION");
|
|
|
|
|
|
|
|
static struct cdev *zfsdev;
|
|
|
|
|
|
|
|
static struct root_hold_token *zfs_root_token;
|
|
|
|
|
|
|
|
extern uint_t rrw_tsd_key;
|
|
|
|
extern uint_t zfs_allow_log_key;
|
|
|
|
extern uint_t zfs_geom_probe_vdev_key;
|
|
|
|
|
|
|
|
static int zfs__init(void);
|
|
|
|
static int zfs__fini(void);
|
|
|
|
static void zfs_shutdown(void *, int);
|
|
|
|
|
|
|
|
static eventhandler_tag zfs_shutdown_event_tag;
|
|
|
|
|
|
|
|
#define ZFS_MIN_KSTACK_PAGES 4
|
|
|
|
|
|
|
|
static int
|
|
|
|
zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag,
|
|
|
|
struct thread *td)
|
|
|
|
{
|
2020-07-16 04:32:50 +00:00
|
|
|
uint_t len;
|
|
|
|
int vecnum;
|
2020-04-14 18:36:28 +00:00
|
|
|
zfs_iocparm_t *zp;
|
|
|
|
zfs_cmd_t *zc;
|
2022-10-27 21:45:44 +00:00
|
|
|
#ifdef ZFS_LEGACY_SUPPORT
|
2020-04-14 18:36:28 +00:00
|
|
|
zfs_cmd_legacy_t *zcl;
|
2022-10-27 21:45:44 +00:00
|
|
|
#endif
|
2020-04-14 18:36:28 +00:00
|
|
|
int rc, error;
|
|
|
|
void *uaddr;
|
|
|
|
|
|
|
|
len = IOCPARM_LEN(zcmd);
|
|
|
|
vecnum = zcmd & 0xff;
|
|
|
|
zp = (void *)arg;
|
|
|
|
error = 0;
|
2022-10-27 21:45:44 +00:00
|
|
|
#ifdef ZFS_LEGACY_SUPPORT
|
2020-04-14 18:36:28 +00:00
|
|
|
zcl = NULL;
|
2022-10-27 21:45:44 +00:00
|
|
|
#endif
|
2020-04-14 18:36:28 +00:00
|
|
|
|
2022-12-14 01:35:07 +00:00
|
|
|
if (len != sizeof (zfs_iocparm_t))
|
2020-04-14 18:36:28 +00:00
|
|
|
return (EINVAL);
|
|
|
|
|
2023-10-09 20:27:18 +00:00
|
|
|
uaddr = (void *)(uintptr_t)zp->zfs_cmd;
|
Reduce need for contiguous memory for ioctls
We've had cases where we trigger an OOM despite having memory freely
available on the system. For example, here, we had about 21GB free:
kernel: Node 0 Normal: 2418758*4kB (UME) 1549533*8kB (UE) 0*16kB
0*32kB 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB =
22071296kB
The problem being, all the memory is in 4K and 8K contiguous regions,
but the allocation request was for a 16K contiguous region:
kernel: SafeExecutors-4 invoked oom-killer:
gfp_mask=0x42dc0(GFP_KERNEL|__GFP_NOWARN|__GFP_COMP|__GFP_ZERO),
order=2, oom_score_adj=0
The offending allocation came from this call trace:
kernel: Call Trace:
kernel: dump_stack+0x57/0x7a
kernel: dump_header+0x4f/0x1e1
kernel: oom_kill_process.cold.33+0xb/0x10
kernel: out_of_memory+0x1ad/0x490
kernel: __alloc_pages_slowpath+0xd55/0xe40
kernel: __alloc_pages_nodemask+0x2df/0x330
kernel: kmalloc_large_node+0x42/0x90
kernel: __kmalloc_node+0x25a/0x320
kernel: ? spl_kmem_free_impl+0x21/0x30 [spl]
kernel: spl_kmem_alloc_impl+0xa5/0x100 [spl]
kernel: spl_kmem_zalloc+0x19/0x20 [spl]
kernel: zfsdev_ioctl+0x2b/0xe0 [zfs]
kernel: do_vfs_ioctl+0xa9/0x640
kernel: ? __audit_syscall_entry+0xdd/0x130
kernel: ksys_ioctl+0x67/0x90
kernel: __x64_sys_ioctl+0x1a/0x20
kernel: do_syscall_64+0x5e/0x200
kernel: entry_SYSCALL_64_after_hwframe+0x44/0xa9
kernel: RIP: 0033:0x7fdca3674317
The problem is, for each ioctl that ZFS makes, it has to allocate a
zfs_cmd_t structure, which is 13744 bytes in size (on my system):
sdb> sizeof zfs_cmd
(size_t)13744
This size, coupled with the fact that we currently allocate it with
kmem_zalloc, means we need a 16K contiguous region of memory to satisfy
the request.
The solution taken by this change, is to use "vmem" instead of "kmem" to
do the allocation, such that we don't necessarily need a contiguous 16K
memory region to satisfy the allocation.
Arguably, a better solution would be not to require such a large
allocation to begin with (e.g. reduce the size of the zfs_cmd_t
structure), but that'd be a much larger change than this "one liner".
Thus, I've opted for this approach for now; we can always circle back
and attempt to reduce the size of the structure in the future.
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Reviewed-by: Mark Maybee <mark.maybee@delphix.com>
Reviewed-by: Don Brady <don.brady@delphix.com>
Signed-off-by: Prakash Surya <prakash.surya@delphix.com>
Closes #14474
2023-02-14 00:35:59 +00:00
|
|
|
zc = vmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
|
2022-10-27 21:45:44 +00:00
|
|
|
#ifdef ZFS_LEGACY_SUPPORT
|
2020-04-14 18:36:28 +00:00
|
|
|
/*
|
|
|
|
* Remap ioctl code for legacy user binaries
|
|
|
|
*/
|
2020-07-16 04:32:50 +00:00
|
|
|
if (zp->zfs_ioctl_version == ZFS_IOCVER_LEGACY) {
|
|
|
|
vecnum = zfs_ioctl_legacy_to_ozfs(vecnum);
|
|
|
|
if (vecnum < 0) {
|
Reduce need for contiguous memory for ioctls
We've had cases where we trigger an OOM despite having memory freely
available on the system. For example, here, we had about 21GB free:
kernel: Node 0 Normal: 2418758*4kB (UME) 1549533*8kB (UE) 0*16kB
0*32kB 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB =
22071296kB
The problem being, all the memory is in 4K and 8K contiguous regions,
but the allocation request was for a 16K contiguous region:
kernel: SafeExecutors-4 invoked oom-killer:
gfp_mask=0x42dc0(GFP_KERNEL|__GFP_NOWARN|__GFP_COMP|__GFP_ZERO),
order=2, oom_score_adj=0
The offending allocation came from this call trace:
kernel: Call Trace:
kernel: dump_stack+0x57/0x7a
kernel: dump_header+0x4f/0x1e1
kernel: oom_kill_process.cold.33+0xb/0x10
kernel: out_of_memory+0x1ad/0x490
kernel: __alloc_pages_slowpath+0xd55/0xe40
kernel: __alloc_pages_nodemask+0x2df/0x330
kernel: kmalloc_large_node+0x42/0x90
kernel: __kmalloc_node+0x25a/0x320
kernel: ? spl_kmem_free_impl+0x21/0x30 [spl]
kernel: spl_kmem_alloc_impl+0xa5/0x100 [spl]
kernel: spl_kmem_zalloc+0x19/0x20 [spl]
kernel: zfsdev_ioctl+0x2b/0xe0 [zfs]
kernel: do_vfs_ioctl+0xa9/0x640
kernel: ? __audit_syscall_entry+0xdd/0x130
kernel: ksys_ioctl+0x67/0x90
kernel: __x64_sys_ioctl+0x1a/0x20
kernel: do_syscall_64+0x5e/0x200
kernel: entry_SYSCALL_64_after_hwframe+0x44/0xa9
kernel: RIP: 0033:0x7fdca3674317
The problem is, for each ioctl that ZFS makes, it has to allocate a
zfs_cmd_t structure, which is 13744 bytes in size (on my system):
sdb> sizeof zfs_cmd
(size_t)13744
This size, coupled with the fact that we currently allocate it with
kmem_zalloc, means we need a 16K contiguous region of memory to satisfy
the request.
The solution taken by this change, is to use "vmem" instead of "kmem" to
do the allocation, such that we don't necessarily need a contiguous 16K
memory region to satisfy the allocation.
Arguably, a better solution would be not to require such a large
allocation to begin with (e.g. reduce the size of the zfs_cmd_t
structure), but that'd be a much larger change than this "one liner".
Thus, I've opted for this approach for now; we can always circle back
and attempt to reduce the size of the structure in the future.
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Reviewed-by: Mark Maybee <mark.maybee@delphix.com>
Reviewed-by: Don Brady <don.brady@delphix.com>
Signed-off-by: Prakash Surya <prakash.surya@delphix.com>
Closes #14474
2023-02-14 00:35:59 +00:00
|
|
|
vmem_free(zc, sizeof (zfs_cmd_t));
|
2020-04-14 18:36:28 +00:00
|
|
|
return (ENOTSUP);
|
|
|
|
}
|
Reduce need for contiguous memory for ioctls
We've had cases where we trigger an OOM despite having memory freely
available on the system. For example, here, we had about 21GB free:
kernel: Node 0 Normal: 2418758*4kB (UME) 1549533*8kB (UE) 0*16kB
0*32kB 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB =
22071296kB
The problem being, all the memory is in 4K and 8K contiguous regions,
but the allocation request was for a 16K contiguous region:
kernel: SafeExecutors-4 invoked oom-killer:
gfp_mask=0x42dc0(GFP_KERNEL|__GFP_NOWARN|__GFP_COMP|__GFP_ZERO),
order=2, oom_score_adj=0
The offending allocation came from this call trace:
kernel: Call Trace:
kernel: dump_stack+0x57/0x7a
kernel: dump_header+0x4f/0x1e1
kernel: oom_kill_process.cold.33+0xb/0x10
kernel: out_of_memory+0x1ad/0x490
kernel: __alloc_pages_slowpath+0xd55/0xe40
kernel: __alloc_pages_nodemask+0x2df/0x330
kernel: kmalloc_large_node+0x42/0x90
kernel: __kmalloc_node+0x25a/0x320
kernel: ? spl_kmem_free_impl+0x21/0x30 [spl]
kernel: spl_kmem_alloc_impl+0xa5/0x100 [spl]
kernel: spl_kmem_zalloc+0x19/0x20 [spl]
kernel: zfsdev_ioctl+0x2b/0xe0 [zfs]
kernel: do_vfs_ioctl+0xa9/0x640
kernel: ? __audit_syscall_entry+0xdd/0x130
kernel: ksys_ioctl+0x67/0x90
kernel: __x64_sys_ioctl+0x1a/0x20
kernel: do_syscall_64+0x5e/0x200
kernel: entry_SYSCALL_64_after_hwframe+0x44/0xa9
kernel: RIP: 0033:0x7fdca3674317
The problem is, for each ioctl that ZFS makes, it has to allocate a
zfs_cmd_t structure, which is 13744 bytes in size (on my system):
sdb> sizeof zfs_cmd
(size_t)13744
This size, coupled with the fact that we currently allocate it with
kmem_zalloc, means we need a 16K contiguous region of memory to satisfy
the request.
The solution taken by this change, is to use "vmem" instead of "kmem" to
do the allocation, such that we don't necessarily need a contiguous 16K
memory region to satisfy the allocation.
Arguably, a better solution would be not to require such a large
allocation to begin with (e.g. reduce the size of the zfs_cmd_t
structure), but that'd be a much larger change than this "one liner".
Thus, I've opted for this approach for now; we can always circle back
and attempt to reduce the size of the structure in the future.
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Reviewed-by: Mark Maybee <mark.maybee@delphix.com>
Reviewed-by: Don Brady <don.brady@delphix.com>
Signed-off-by: Prakash Surya <prakash.surya@delphix.com>
Closes #14474
2023-02-14 00:35:59 +00:00
|
|
|
zcl = vmem_zalloc(sizeof (zfs_cmd_legacy_t), KM_SLEEP);
|
2020-04-14 18:36:28 +00:00
|
|
|
if (copyin(uaddr, zcl, sizeof (zfs_cmd_legacy_t))) {
|
|
|
|
error = SET_ERROR(EFAULT);
|
|
|
|
goto out;
|
|
|
|
}
|
2020-07-16 04:32:50 +00:00
|
|
|
zfs_cmd_legacy_to_ozfs(zcl, zc);
|
2022-10-27 21:45:44 +00:00
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
if (copyin(uaddr, zc, sizeof (zfs_cmd_t))) {
|
2020-04-14 18:36:28 +00:00
|
|
|
error = SET_ERROR(EFAULT);
|
|
|
|
goto out;
|
|
|
|
}
|
2020-06-08 20:57:22 +00:00
|
|
|
error = zfsdev_ioctl_common(vecnum, zc, 0);
|
2022-10-27 21:45:44 +00:00
|
|
|
#ifdef ZFS_LEGACY_SUPPORT
|
2020-04-14 18:36:28 +00:00
|
|
|
if (zcl) {
|
2020-07-16 04:32:50 +00:00
|
|
|
zfs_cmd_ozfs_to_legacy(zc, zcl);
|
2020-04-14 18:36:28 +00:00
|
|
|
rc = copyout(zcl, uaddr, sizeof (*zcl));
|
2022-10-27 21:45:44 +00:00
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
{
|
2020-04-14 18:36:28 +00:00
|
|
|
rc = copyout(zc, uaddr, sizeof (*zc));
|
|
|
|
}
|
|
|
|
if (error == 0 && rc != 0)
|
|
|
|
error = SET_ERROR(EFAULT);
|
|
|
|
out:
|
2022-10-27 21:45:44 +00:00
|
|
|
#ifdef ZFS_LEGACY_SUPPORT
|
2020-04-14 18:36:28 +00:00
|
|
|
if (zcl)
|
Reduce need for contiguous memory for ioctls
We've had cases where we trigger an OOM despite having memory freely
available on the system. For example, here, we had about 21GB free:
kernel: Node 0 Normal: 2418758*4kB (UME) 1549533*8kB (UE) 0*16kB
0*32kB 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB =
22071296kB
The problem being, all the memory is in 4K and 8K contiguous regions,
but the allocation request was for a 16K contiguous region:
kernel: SafeExecutors-4 invoked oom-killer:
gfp_mask=0x42dc0(GFP_KERNEL|__GFP_NOWARN|__GFP_COMP|__GFP_ZERO),
order=2, oom_score_adj=0
The offending allocation came from this call trace:
kernel: Call Trace:
kernel: dump_stack+0x57/0x7a
kernel: dump_header+0x4f/0x1e1
kernel: oom_kill_process.cold.33+0xb/0x10
kernel: out_of_memory+0x1ad/0x490
kernel: __alloc_pages_slowpath+0xd55/0xe40
kernel: __alloc_pages_nodemask+0x2df/0x330
kernel: kmalloc_large_node+0x42/0x90
kernel: __kmalloc_node+0x25a/0x320
kernel: ? spl_kmem_free_impl+0x21/0x30 [spl]
kernel: spl_kmem_alloc_impl+0xa5/0x100 [spl]
kernel: spl_kmem_zalloc+0x19/0x20 [spl]
kernel: zfsdev_ioctl+0x2b/0xe0 [zfs]
kernel: do_vfs_ioctl+0xa9/0x640
kernel: ? __audit_syscall_entry+0xdd/0x130
kernel: ksys_ioctl+0x67/0x90
kernel: __x64_sys_ioctl+0x1a/0x20
kernel: do_syscall_64+0x5e/0x200
kernel: entry_SYSCALL_64_after_hwframe+0x44/0xa9
kernel: RIP: 0033:0x7fdca3674317
The problem is, for each ioctl that ZFS makes, it has to allocate a
zfs_cmd_t structure, which is 13744 bytes in size (on my system):
sdb> sizeof zfs_cmd
(size_t)13744
This size, coupled with the fact that we currently allocate it with
kmem_zalloc, means we need a 16K contiguous region of memory to satisfy
the request.
The solution taken by this change, is to use "vmem" instead of "kmem" to
do the allocation, such that we don't necessarily need a contiguous 16K
memory region to satisfy the allocation.
Arguably, a better solution would be not to require such a large
allocation to begin with (e.g. reduce the size of the zfs_cmd_t
structure), but that'd be a much larger change than this "one liner".
Thus, I've opted for this approach for now; we can always circle back
and attempt to reduce the size of the structure in the future.
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Reviewed-by: Mark Maybee <mark.maybee@delphix.com>
Reviewed-by: Don Brady <don.brady@delphix.com>
Signed-off-by: Prakash Surya <prakash.surya@delphix.com>
Closes #14474
2023-02-14 00:35:59 +00:00
|
|
|
vmem_free(zcl, sizeof (zfs_cmd_legacy_t));
|
2022-10-27 21:45:44 +00:00
|
|
|
#endif
|
Reduce need for contiguous memory for ioctls
We've had cases where we trigger an OOM despite having memory freely
available on the system. For example, here, we had about 21GB free:
kernel: Node 0 Normal: 2418758*4kB (UME) 1549533*8kB (UE) 0*16kB
0*32kB 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB =
22071296kB
The problem being, all the memory is in 4K and 8K contiguous regions,
but the allocation request was for a 16K contiguous region:
kernel: SafeExecutors-4 invoked oom-killer:
gfp_mask=0x42dc0(GFP_KERNEL|__GFP_NOWARN|__GFP_COMP|__GFP_ZERO),
order=2, oom_score_adj=0
The offending allocation came from this call trace:
kernel: Call Trace:
kernel: dump_stack+0x57/0x7a
kernel: dump_header+0x4f/0x1e1
kernel: oom_kill_process.cold.33+0xb/0x10
kernel: out_of_memory+0x1ad/0x490
kernel: __alloc_pages_slowpath+0xd55/0xe40
kernel: __alloc_pages_nodemask+0x2df/0x330
kernel: kmalloc_large_node+0x42/0x90
kernel: __kmalloc_node+0x25a/0x320
kernel: ? spl_kmem_free_impl+0x21/0x30 [spl]
kernel: spl_kmem_alloc_impl+0xa5/0x100 [spl]
kernel: spl_kmem_zalloc+0x19/0x20 [spl]
kernel: zfsdev_ioctl+0x2b/0xe0 [zfs]
kernel: do_vfs_ioctl+0xa9/0x640
kernel: ? __audit_syscall_entry+0xdd/0x130
kernel: ksys_ioctl+0x67/0x90
kernel: __x64_sys_ioctl+0x1a/0x20
kernel: do_syscall_64+0x5e/0x200
kernel: entry_SYSCALL_64_after_hwframe+0x44/0xa9
kernel: RIP: 0033:0x7fdca3674317
The problem is, for each ioctl that ZFS makes, it has to allocate a
zfs_cmd_t structure, which is 13744 bytes in size (on my system):
sdb> sizeof zfs_cmd
(size_t)13744
This size, coupled with the fact that we currently allocate it with
kmem_zalloc, means we need a 16K contiguous region of memory to satisfy
the request.
The solution taken by this change, is to use "vmem" instead of "kmem" to
do the allocation, such that we don't necessarily need a contiguous 16K
memory region to satisfy the allocation.
Arguably, a better solution would be not to require such a large
allocation to begin with (e.g. reduce the size of the zfs_cmd_t
structure), but that'd be a much larger change than this "one liner".
Thus, I've opted for this approach for now; we can always circle back
and attempt to reduce the size of the structure in the future.
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Reviewed-by: Mark Maybee <mark.maybee@delphix.com>
Reviewed-by: Don Brady <don.brady@delphix.com>
Signed-off-by: Prakash Surya <prakash.surya@delphix.com>
Closes #14474
2023-02-14 00:35:59 +00:00
|
|
|
vmem_free(zc, sizeof (zfs_cmd_t));
|
2020-09-30 20:19:49 +00:00
|
|
|
MPASS(tsd_get(rrw_tsd_key) == NULL);
|
2020-04-14 18:36:28 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
zfsdev_close(void *data)
|
|
|
|
{
|
2021-03-16 13:04:58 +00:00
|
|
|
zfsdev_state_destroy(data);
|
2020-04-14 18:36:28 +00:00
|
|
|
}
|
|
|
|
|
2021-03-16 13:04:58 +00:00
|
|
|
void
|
|
|
|
zfsdev_private_set_state(void *priv __unused, zfsdev_state_t *zs)
|
2020-04-14 18:36:28 +00:00
|
|
|
{
|
|
|
|
devfs_set_cdevpriv(zs, zfsdev_close);
|
2021-03-16 13:04:58 +00:00
|
|
|
}
|
2020-04-14 18:36:28 +00:00
|
|
|
|
2021-03-16 13:04:58 +00:00
|
|
|
zfsdev_state_t *
|
|
|
|
zfsdev_private_get_state(void *priv)
|
|
|
|
{
|
|
|
|
return (priv);
|
2020-04-14 18:36:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2021-03-16 13:04:58 +00:00
|
|
|
zfsdev_open(struct cdev *devp __unused, int flag __unused, int mode __unused,
|
|
|
|
struct thread *td __unused)
|
2020-04-14 18:36:28 +00:00
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
mutex_enter(&zfsdev_state_lock);
|
2021-03-16 13:04:58 +00:00
|
|
|
error = zfsdev_state_init(NULL);
|
2020-04-14 18:36:28 +00:00
|
|
|
mutex_exit(&zfsdev_state_lock);
|
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct cdevsw zfs_cdevsw = {
|
|
|
|
.d_version = D_VERSION,
|
|
|
|
.d_open = zfsdev_open,
|
|
|
|
.d_ioctl = zfsdev_ioctl,
|
|
|
|
.d_name = ZFS_DRIVER
|
|
|
|
};
|
|
|
|
|
|
|
|
int
|
|
|
|
zfsdev_attach(void)
|
|
|
|
{
|
2022-09-08 17:40:18 +00:00
|
|
|
struct make_dev_args args;
|
|
|
|
|
|
|
|
make_dev_args_init(&args);
|
|
|
|
args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
|
|
|
|
args.mda_devsw = &zfs_cdevsw;
|
|
|
|
args.mda_cr = NULL;
|
|
|
|
args.mda_uid = UID_ROOT;
|
|
|
|
args.mda_gid = GID_OPERATOR;
|
|
|
|
args.mda_mode = 0666;
|
|
|
|
return (make_dev_s(&args, &zfsdev, ZFS_DRIVER));
|
2020-04-14 18:36:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
zfsdev_detach(void)
|
|
|
|
{
|
|
|
|
if (zfsdev != NULL)
|
|
|
|
destroy_dev(zfsdev);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
zfs__init(void)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
#if KSTACK_PAGES < ZFS_MIN_KSTACK_PAGES
|
|
|
|
printf("ZFS NOTICE: KSTACK_PAGES is %d which could result in stack "
|
|
|
|
"overflow panic!\nPlease consider adding "
|
|
|
|
"'options KSTACK_PAGES=%d' to your kernel config\n", KSTACK_PAGES,
|
|
|
|
ZFS_MIN_KSTACK_PAGES);
|
|
|
|
#endif
|
|
|
|
zfs_root_token = root_mount_hold("ZFS");
|
|
|
|
if ((error = zfs_kmod_init()) != 0) {
|
|
|
|
printf("ZFS: Failed to Load ZFS Filesystem"
|
|
|
|
", rc = %d\n", error);
|
|
|
|
root_mount_rel(zfs_root_token);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
tsd_create(&zfs_geom_probe_vdev_key, NULL);
|
|
|
|
|
|
|
|
printf("ZFS storage pool version: features support ("
|
|
|
|
SPA_VERSION_STRING ")\n");
|
|
|
|
root_mount_rel(zfs_root_token);
|
|
|
|
ddi_sysevent_init();
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
zfs__fini(void)
|
|
|
|
{
|
|
|
|
if (zfs_busy() || zvol_busy() ||
|
|
|
|
zio_injection_enabled) {
|
|
|
|
return (EBUSY);
|
|
|
|
}
|
|
|
|
zfs_kmod_fini();
|
|
|
|
tsd_destroy(&zfs_geom_probe_vdev_key);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
zfs_shutdown(void *arg __unused, int howto __unused)
|
|
|
|
{
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ZFS fini routines can not properly work in a panic-ed system.
|
|
|
|
*/
|
|
|
|
if (panicstr == NULL)
|
|
|
|
zfs__fini();
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
zfs_modevent(module_t mod, int type, void *unused __unused)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case MOD_LOAD:
|
|
|
|
err = zfs__init();
|
|
|
|
if (err == 0)
|
|
|
|
zfs_shutdown_event_tag = EVENTHANDLER_REGISTER(
|
|
|
|
shutdown_post_sync, zfs_shutdown, NULL,
|
|
|
|
SHUTDOWN_PRI_FIRST);
|
|
|
|
return (err);
|
|
|
|
case MOD_UNLOAD:
|
|
|
|
err = zfs__fini();
|
|
|
|
if (err == 0 && zfs_shutdown_event_tag != NULL)
|
|
|
|
EVENTHANDLER_DEREGISTER(shutdown_post_sync,
|
|
|
|
zfs_shutdown_event_tag);
|
|
|
|
return (err);
|
|
|
|
case MOD_SHUTDOWN:
|
|
|
|
return (0);
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return (EOPNOTSUPP);
|
|
|
|
}
|
|
|
|
|
|
|
|
static moduledata_t zfs_mod = {
|
|
|
|
"zfsctrl",
|
|
|
|
zfs_modevent,
|
|
|
|
0
|
|
|
|
};
|
|
|
|
|
|
|
|
#ifdef _KERNEL
|
|
|
|
EVENTHANDLER_DEFINE(mountroot, spa_boot_init, NULL, 0);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
DECLARE_MODULE(zfsctrl, zfs_mod, SI_SUB_CLOCKS, SI_ORDER_ANY);
|
|
|
|
MODULE_VERSION(zfsctrl, 1);
|
2020-06-16 18:47:04 +00:00
|
|
|
#if __FreeBSD_version > 1300092
|
|
|
|
MODULE_DEPEND(zfsctrl, xdr, 1, 1, 1);
|
|
|
|
#else
|
2020-04-14 18:36:28 +00:00
|
|
|
MODULE_DEPEND(zfsctrl, krpc, 1, 1, 1);
|
2020-06-16 18:47:04 +00:00
|
|
|
#endif
|
2020-04-14 18:36:28 +00:00
|
|
|
MODULE_DEPEND(zfsctrl, acl_nfs4, 1, 1, 1);
|
|
|
|
MODULE_DEPEND(zfsctrl, crypto, 1, 1, 1);
|
2021-08-13 20:42:45 +00:00
|
|
|
MODULE_DEPEND(zfsctrl, zlib, 1, 1, 1);
|