2014-01-21 21:30:03 +00:00
|
|
|
/*
|
2020-10-09 03:10:13 +00:00
|
|
|
* This file is part of the ZFS Event Daemon (ZED).
|
|
|
|
*
|
2014-01-21 21:30:03 +00:00
|
|
|
* Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
|
|
|
|
* Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
|
2015-05-06 22:56:03 +00:00
|
|
|
* Refer to the ZoL git commit log for authoritative copyright attribution.
|
|
|
|
*
|
|
|
|
* The contents of this file are subject to the terms of the
|
|
|
|
* Common Development and Distribution License Version 1.0 (CDDL-1.0).
|
|
|
|
* You can obtain a copy of the license from the top-level file
|
|
|
|
* "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
|
|
|
|
* You may not use this file except in compliance with the license.
|
2014-01-21 21:30:03 +00:00
|
|
|
*/
|
|
|
|
|
zed: implement close_from() in terms of /proc/self/fd, if available
/dev/fd on Darwin
Consider the following strace output:
prlimit64(0, RLIMIT_NOFILE, NULL, {rlim_cur=1024, rlim_max=1024*1024}) = 0
Yes, that is well over a million file descriptors!
This reduces the ZED start-up time from "at least a second" to
"instantaneous", and, under strace, from "don't even try" to "usable"
by simple virtue of doing five syscalls instead of over a million;
in most cases the main loop does nothing
Recent Linuxes (5.8+) have close_range(2) for this, but that's an
overoptimisation (and libcs don't have wrappers for it yet)
This is also run by the ZEDLET pre-exec. Compare:
Finished "all-syslog.sh" eid=13 pid=6717 time=1.027100s exit=0
Finished "history_event-zfs-list-cacher.sh" eid=13 pid=6718 time=1.046923s exit=0
to
Finished "all-syslog.sh" eid=12 pid=4834 time=0.001836s exit=0
Finished "history_event-zfs-list-cacher.sh" eid=12 pid=4835 time=0.001346s exit=0
lol
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Closes #11834
2021-04-02 13:10:34 +00:00
|
|
|
#include <dirent.h>
|
2014-01-21 21:30:03 +00:00
|
|
|
#include <errno.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <unistd.h>
|
2020-06-11 20:25:39 +00:00
|
|
|
#include "zed_file.h"
|
2014-01-21 21:30:03 +00:00
|
|
|
#include "zed_log.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Read up to [n] bytes from [fd] into [buf].
|
|
|
|
* Return the number of bytes read, 0 on EOF, or -1 on error.
|
|
|
|
*/
|
|
|
|
ssize_t
|
|
|
|
zed_file_read_n(int fd, void *buf, size_t n)
|
|
|
|
{
|
|
|
|
unsigned char *p;
|
|
|
|
size_t n_left;
|
|
|
|
ssize_t n_read;
|
|
|
|
|
|
|
|
p = buf;
|
|
|
|
n_left = n;
|
|
|
|
while (n_left > 0) {
|
|
|
|
if ((n_read = read(fd, p, n_left)) < 0) {
|
|
|
|
if (errno == EINTR)
|
|
|
|
continue;
|
|
|
|
else
|
|
|
|
return (-1);
|
|
|
|
|
|
|
|
} else if (n_read == 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
n_left -= n_read;
|
|
|
|
p += n_read;
|
|
|
|
}
|
|
|
|
return (n - n_left);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Write [n] bytes from [buf] out to [fd].
|
|
|
|
* Return the number of bytes written, or -1 on error.
|
|
|
|
*/
|
|
|
|
ssize_t
|
|
|
|
zed_file_write_n(int fd, void *buf, size_t n)
|
|
|
|
{
|
|
|
|
const unsigned char *p;
|
|
|
|
size_t n_left;
|
|
|
|
ssize_t n_written;
|
|
|
|
|
|
|
|
p = buf;
|
|
|
|
n_left = n;
|
|
|
|
while (n_left > 0) {
|
|
|
|
if ((n_written = write(fd, p, n_left)) < 0) {
|
|
|
|
if (errno == EINTR)
|
|
|
|
continue;
|
|
|
|
else
|
|
|
|
return (-1);
|
|
|
|
|
|
|
|
}
|
|
|
|
n_left -= n_written;
|
|
|
|
p += n_written;
|
|
|
|
}
|
|
|
|
return (n);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set an exclusive advisory lock on the open file descriptor [fd].
|
|
|
|
* Return 0 on success, 1 if a conflicting lock is held by another process,
|
2014-09-10 21:22:39 +00:00
|
|
|
* or -1 on error (with errno set).
|
2014-01-21 21:30:03 +00:00
|
|
|
*/
|
|
|
|
int
|
|
|
|
zed_file_lock(int fd)
|
|
|
|
{
|
|
|
|
struct flock lock;
|
|
|
|
|
|
|
|
if (fd < 0) {
|
|
|
|
errno = EBADF;
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
lock.l_type = F_WRLCK;
|
|
|
|
lock.l_whence = SEEK_SET;
|
|
|
|
lock.l_start = 0;
|
|
|
|
lock.l_len = 0;
|
|
|
|
|
|
|
|
if (fcntl(fd, F_SETLK, &lock) < 0) {
|
|
|
|
if ((errno == EACCES) || (errno == EAGAIN))
|
|
|
|
return (1);
|
|
|
|
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Release an advisory lock held on the open file descriptor [fd].
|
|
|
|
* Return 0 on success, or -1 on error (with errno set).
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
zed_file_unlock(int fd)
|
|
|
|
{
|
|
|
|
struct flock lock;
|
|
|
|
|
|
|
|
if (fd < 0) {
|
|
|
|
errno = EBADF;
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
lock.l_type = F_UNLCK;
|
|
|
|
lock.l_whence = SEEK_SET;
|
|
|
|
lock.l_start = 0;
|
|
|
|
lock.l_len = 0;
|
|
|
|
|
|
|
|
if (fcntl(fd, F_SETLK, &lock) < 0)
|
|
|
|
return (-1);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Test whether an exclusive advisory lock could be obtained for the open
|
2014-09-10 21:22:39 +00:00
|
|
|
* file descriptor [fd].
|
2014-08-28 21:39:48 +00:00
|
|
|
* Return 0 if the file is not locked, >0 for the PID of another process
|
2014-09-10 21:22:39 +00:00
|
|
|
* holding a conflicting lock, or -1 on error (with errno set).
|
2014-01-21 21:30:03 +00:00
|
|
|
*/
|
|
|
|
pid_t
|
|
|
|
zed_file_is_locked(int fd)
|
|
|
|
{
|
|
|
|
struct flock lock;
|
|
|
|
|
|
|
|
if (fd < 0) {
|
|
|
|
errno = EBADF;
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
lock.l_type = F_WRLCK;
|
|
|
|
lock.l_whence = SEEK_SET;
|
|
|
|
lock.l_start = 0;
|
|
|
|
lock.l_len = 0;
|
|
|
|
|
|
|
|
if (fcntl(fd, F_GETLK, &lock) < 0)
|
|
|
|
return (-1);
|
|
|
|
|
|
|
|
if (lock.l_type == F_UNLCK)
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
return (lock.l_pid);
|
|
|
|
}
|
|
|
|
|
zed: implement close_from() in terms of /proc/self/fd, if available
/dev/fd on Darwin
Consider the following strace output:
prlimit64(0, RLIMIT_NOFILE, NULL, {rlim_cur=1024, rlim_max=1024*1024}) = 0
Yes, that is well over a million file descriptors!
This reduces the ZED start-up time from "at least a second" to
"instantaneous", and, under strace, from "don't even try" to "usable"
by simple virtue of doing five syscalls instead of over a million;
in most cases the main loop does nothing
Recent Linuxes (5.8+) have close_range(2) for this, but that's an
overoptimisation (and libcs don't have wrappers for it yet)
This is also run by the ZEDLET pre-exec. Compare:
Finished "all-syslog.sh" eid=13 pid=6717 time=1.027100s exit=0
Finished "history_event-zfs-list-cacher.sh" eid=13 pid=6718 time=1.046923s exit=0
to
Finished "all-syslog.sh" eid=12 pid=4834 time=0.001836s exit=0
Finished "history_event-zfs-list-cacher.sh" eid=12 pid=4835 time=0.001346s exit=0
lol
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Closes #11834
2021-04-02 13:10:34 +00:00
|
|
|
|
|
|
|
#if __APPLE__
|
|
|
|
#define PROC_SELF_FD "/dev/fd"
|
|
|
|
#else /* Linux-compatible layout */
|
|
|
|
#define PROC_SELF_FD "/proc/self/fd"
|
|
|
|
#endif
|
|
|
|
|
2014-01-21 21:30:03 +00:00
|
|
|
/*
|
|
|
|
* Close all open file descriptors greater than or equal to [lowfd].
|
|
|
|
* Any errors encountered while closing file descriptors are ignored.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
zed_file_close_from(int lowfd)
|
|
|
|
{
|
zed: implement close_from() in terms of /proc/self/fd, if available
/dev/fd on Darwin
Consider the following strace output:
prlimit64(0, RLIMIT_NOFILE, NULL, {rlim_cur=1024, rlim_max=1024*1024}) = 0
Yes, that is well over a million file descriptors!
This reduces the ZED start-up time from "at least a second" to
"instantaneous", and, under strace, from "don't even try" to "usable"
by simple virtue of doing five syscalls instead of over a million;
in most cases the main loop does nothing
Recent Linuxes (5.8+) have close_range(2) for this, but that's an
overoptimisation (and libcs don't have wrappers for it yet)
This is also run by the ZEDLET pre-exec. Compare:
Finished "all-syslog.sh" eid=13 pid=6717 time=1.027100s exit=0
Finished "history_event-zfs-list-cacher.sh" eid=13 pid=6718 time=1.046923s exit=0
to
Finished "all-syslog.sh" eid=12 pid=4834 time=0.001836s exit=0
Finished "history_event-zfs-list-cacher.sh" eid=12 pid=4835 time=0.001346s exit=0
lol
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Closes #11834
2021-04-02 13:10:34 +00:00
|
|
|
int errno_bak = errno;
|
|
|
|
int maxfd = 0;
|
2014-01-21 21:30:03 +00:00
|
|
|
int fd;
|
zed: implement close_from() in terms of /proc/self/fd, if available
/dev/fd on Darwin
Consider the following strace output:
prlimit64(0, RLIMIT_NOFILE, NULL, {rlim_cur=1024, rlim_max=1024*1024}) = 0
Yes, that is well over a million file descriptors!
This reduces the ZED start-up time from "at least a second" to
"instantaneous", and, under strace, from "don't even try" to "usable"
by simple virtue of doing five syscalls instead of over a million;
in most cases the main loop does nothing
Recent Linuxes (5.8+) have close_range(2) for this, but that's an
overoptimisation (and libcs don't have wrappers for it yet)
This is also run by the ZEDLET pre-exec. Compare:
Finished "all-syslog.sh" eid=13 pid=6717 time=1.027100s exit=0
Finished "history_event-zfs-list-cacher.sh" eid=13 pid=6718 time=1.046923s exit=0
to
Finished "all-syslog.sh" eid=12 pid=4834 time=0.001836s exit=0
Finished "history_event-zfs-list-cacher.sh" eid=12 pid=4835 time=0.001346s exit=0
lol
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Closes #11834
2021-04-02 13:10:34 +00:00
|
|
|
DIR *fddir;
|
|
|
|
struct dirent *fdent;
|
|
|
|
|
|
|
|
if ((fddir = opendir(PROC_SELF_FD)) != NULL) {
|
|
|
|
while ((fdent = readdir(fddir)) != NULL) {
|
|
|
|
fd = atoi(fdent->d_name);
|
|
|
|
if (fd > maxfd && fd != dirfd(fddir))
|
|
|
|
maxfd = fd;
|
|
|
|
}
|
|
|
|
(void) closedir(fddir);
|
2014-01-21 21:30:03 +00:00
|
|
|
} else {
|
zed: only go up to current limit in close_from() fallback
Consider the following strace log:
prlimit64(0, RLIMIT_NOFILE,
NULL, {rlim_cur=1024, rlim_max=1024*1024}) = 0
dup2(0, 30) = 30
dup2(0, 300) = 300
dup2(0, 3000) = -1 EBADF (Bad file descriptor)
dup2(0, 30000) = -1 EBADF (Bad file descriptor)
dup2(0, 300000) = -1 EBADF (Bad file descriptor)
prlimit64(0, RLIMIT_NOFILE,
{rlim_cur=1024*1024, rlim_max=1024*1024}, NULL) = 0
dup2(0, 30) = 30
dup2(0, 300) = 300
dup2(0, 3000) = 3000
dup2(0, 30000) = 30000
dup2(0, 300000) = 300000
Even a privileged process needs to bump its rlimit before being able
to use fds higher than rlim_cur.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Closes #11834
2021-04-03 10:09:24 +00:00
|
|
|
maxfd = sysconf(_SC_OPEN_MAX);
|
2014-01-21 21:30:03 +00:00
|
|
|
}
|
|
|
|
for (fd = lowfd; fd < maxfd; fd++)
|
|
|
|
(void) close(fd);
|
|
|
|
|
|
|
|
errno = errno_bak;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set the CLOEXEC flag on file descriptor [fd] so it will be automatically
|
2014-09-10 21:22:39 +00:00
|
|
|
* closed upon successful execution of one of the exec functions.
|
2014-01-21 21:30:03 +00:00
|
|
|
* Return 0 on success, or -1 on error.
|
2014-09-10 21:22:39 +00:00
|
|
|
*
|
2014-01-21 21:30:03 +00:00
|
|
|
* FIXME: No longer needed?
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
zed_file_close_on_exec(int fd)
|
|
|
|
{
|
|
|
|
int flags;
|
|
|
|
|
|
|
|
if (fd < 0) {
|
|
|
|
errno = EBADF;
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
flags = fcntl(fd, F_GETFD);
|
|
|
|
if (flags == -1)
|
|
|
|
return (-1);
|
|
|
|
|
|
|
|
flags |= FD_CLOEXEC;
|
|
|
|
|
|
|
|
if (fcntl(fd, F_SETFD, flags) == -1)
|
|
|
|
return (-1);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|