zfs/etc/systemd/system-generators/zfs-mount-generator.c

1090 lines
30 KiB
C

/*
* Copyright (c) 2017 Antonio Russo <antonio.e.russo@gmail.com>
* Copyright (c) 2020 InsanePrawn <insane.prawny@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <sys/resource.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <semaphore.h>
#include <stdbool.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
#include <time.h>
#include <regex.h>
#include <search.h>
#include <dirent.h>
#include <string.h>
#include <stdlib.h>
#include <limits.h>
#include <errno.h>
#include <libzfs.h>
#define STRCMP ((int(*)(const void *, const void *))&strcmp)
#define PID_T_CMP ((int(*)(const void *, const void *))&pid_t_cmp)
static int
pid_t_cmp(const pid_t *lhs, const pid_t *rhs)
{
/*
* This is always valid, quoth sys_types.h(7posix):
* > blksize_t, pid_t, and ssize_t shall be signed integer types.
*/
return (*lhs - *rhs);
}
#define EXIT_ENOMEM() \
do { \
fprintf(stderr, PROGNAME "[%d]: " \
"not enough memory (L%d)!\n", getpid(), __LINE__); \
_exit(1); \
} while (0)
#define PROGNAME "zfs-mount-generator"
#define FSLIST SYSCONFDIR "/zfs/zfs-list.cache"
#define ZFS SBINDIR "/zfs"
#define OUTPUT_HEADER \
"# Automatically generated by " PROGNAME "\n" \
"\n"
/*
* Starts like the one in libzfs_util.c but also matches "//"
* and captures until the end, since we actually use it for path extraxion
*/
#define URI_REGEX_S "^\\([A-Za-z][A-Za-z0-9+.\\-]*\\):\\/\\/\\(.*\\)$"
static regex_t uri_regex;
static char *argv0;
static const char *destdir = "/tmp";
static int destdir_fd = -1;
static void *known_pools = NULL; /* tsearch() of C strings */
static struct {
sem_t noauto_not_on_sem;
sem_t noauto_names_sem;
size_t noauto_names_len;
size_t noauto_names_max;
char noauto_names[][NAME_MAX];
} *noauto_files;
static char *
systemd_escape(const char *input, const char *prepend, const char *append)
{
size_t len = strlen(input);
size_t applen = strlen(append);
size_t prelen = strlen(prepend);
char *ret = malloc(4 * len + prelen + applen + 1);
if (!ret)
EXIT_ENOMEM();
memcpy(ret, prepend, prelen);
char *out = ret + prelen;
const char *cur = input;
if (*cur == '.') {
memcpy(out, "\\x2e", 4);
out += 4;
++cur;
}
for (; *cur; ++cur) {
if (*cur == '/')
*(out++) = '-';
else if (strchr(
"0123456789"
"abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
":_.", *cur))
*(out++) = *cur;
else {
sprintf(out, "\\x%02x", (int)*cur);
out += 4;
}
}
memcpy(out, append, applen + 1);
return (ret);
}
static void
simplify_path(char *path)
{
char *out = path;
for (char *cur = path; *cur; ++cur) {
if (*cur == '/') {
while (*(cur + 1) == '/')
++cur;
*(out++) = '/';
} else
*(out++) = *cur;
}
*(out++) = '\0';
}
static bool
strendswith(const char *what, const char *suff)
{
size_t what_l = strlen(what);
size_t suff_l = strlen(suff);
return ((what_l >= suff_l) &&
(strcmp(what + what_l - suff_l, suff) == 0));
}
/* Assumes already-simplified path, doesn't modify input */
static char *
systemd_escape_path(char *input, const char *prepend, const char *append)
{
if (strcmp(input, "/") == 0) {
char *ret;
if (asprintf(&ret, "%s-%s", prepend, append) == -1)
EXIT_ENOMEM();
return (ret);
} else {
/*
* path_is_normalized() (flattened for absolute paths here),
* required for proper escaping
*/
if (strstr(input, "/./") || strstr(input, "/../") ||
strendswith(input, "/.") || strendswith(input, "/.."))
return (NULL);
if (input[0] == '/')
++input;
char *back = &input[strlen(input) - 1];
bool deslash = *back == '/';
if (deslash)
*back = '\0';
char *ret = systemd_escape(input, prepend, append);
if (deslash)
*back = '/';
return (ret);
}
}
static FILE *
fopenat(int dirfd, const char *pathname, int flags,
const char *stream_mode, mode_t mode)
{
int fd = openat(dirfd, pathname, flags, mode);
if (fd < 0)
return (NULL);
return (fdopen(fd, stream_mode));
}
static int
line_worker(char *line, const char *cachefile)
{
char *toktmp;
/* BEGIN CSTYLED */
const char *dataset = strtok_r(line, "\t", &toktmp);
char *p_mountpoint = strtok_r(NULL, "\t", &toktmp);
const char *p_canmount = strtok_r(NULL, "\t", &toktmp);
const char *p_atime = strtok_r(NULL, "\t", &toktmp);
const char *p_relatime = strtok_r(NULL, "\t", &toktmp);
const char *p_devices = strtok_r(NULL, "\t", &toktmp);
const char *p_exec = strtok_r(NULL, "\t", &toktmp);
const char *p_readonly = strtok_r(NULL, "\t", &toktmp);
const char *p_setuid = strtok_r(NULL, "\t", &toktmp);
const char *p_nbmand = strtok_r(NULL, "\t", &toktmp);
const char *p_encroot = strtok_r(NULL, "\t", &toktmp) ?: "-";
char *p_keyloc = strtok_r(NULL, "\t", &toktmp) ?: strdupa("none");
const char *p_systemd_requires = strtok_r(NULL, "\t", &toktmp) ?: "-";
const char *p_systemd_requiresmountsfor = strtok_r(NULL, "\t", &toktmp) ?: "-";
const char *p_systemd_before = strtok_r(NULL, "\t", &toktmp) ?: "-";
const char *p_systemd_after = strtok_r(NULL, "\t", &toktmp) ?: "-";
char *p_systemd_wantedby = strtok_r(NULL, "\t", &toktmp) ?: strdupa("-");
char *p_systemd_requiredby = strtok_r(NULL, "\t", &toktmp) ?: strdupa("-");
const char *p_systemd_nofail = strtok_r(NULL, "\t", &toktmp) ?: "-";
const char *p_systemd_ignore = strtok_r(NULL, "\t", &toktmp) ?: "-";
/* END CSTYLED */
const char *pool = dataset;
if ((toktmp = strchr(pool, '/')) != NULL)
pool = strndupa(pool, toktmp - pool);
if (p_nbmand == NULL) {
fprintf(stderr, PROGNAME "[%d]: %s: not enough tokens!\n",
getpid(), dataset);
return (1);
}
strncpy(argv0, dataset, strlen(argv0));
/* Minimal pre-requisites to mount a ZFS dataset */
const char *after = "zfs-import.target";
const char *wants = "zfs-import.target";
const char *bindsto = NULL;
char *wantedby = NULL;
char *requiredby = NULL;
bool noauto = false;
bool wantedby_append = true;
/*
* zfs-import.target is not needed if the pool is already imported.
* This avoids a dependency loop on root-on-ZFS systems:
* systemd-random-seed.service After (via RequiresMountsFor)
* var-lib.mount After
* zfs-import.target After
* zfs-import-{cache,scan}.service After
* cryptsetup.service After
* systemd-random-seed.service
*/
if (tfind(pool, &known_pools, STRCMP)) {
after = "";
wants = "";
}
if (strcmp(p_systemd_after, "-") == 0)
p_systemd_after = NULL;
if (strcmp(p_systemd_before, "-") == 0)
p_systemd_before = NULL;
if (strcmp(p_systemd_requires, "-") == 0)
p_systemd_requires = NULL;
if (strcmp(p_systemd_requiresmountsfor, "-") == 0)
p_systemd_requiresmountsfor = NULL;
if (strcmp(p_encroot, "-") != 0) {
char *keyloadunit =
systemd_escape(p_encroot, "zfs-load-key-", ".service");
if (strcmp(dataset, p_encroot) == 0) {
const char *keymountdep = NULL;
bool is_prompt = false;
regmatch_t uri_matches[3];
if (regexec(&uri_regex, p_keyloc,
sizeof (uri_matches) / sizeof (*uri_matches),
uri_matches, 0) == 0) {
p_keyloc[uri_matches[2].rm_eo] = '\0';
const char *path =
&p_keyloc[uri_matches[2].rm_so];
/*
* Assumes all URI keylocations need
* the mount for their path;
* http://, for example, wouldn't
* (but it'd need network-online.target et al.)
*/
keymountdep = path;
} else {
if (strcmp(p_keyloc, "prompt") != 0)
fprintf(stderr, PROGNAME "[%d]: %s: "
"unknown non-URI keylocation=%s\n",
getpid(), dataset, p_keyloc);
is_prompt = true;
}
/* Generate the key-load .service unit */
FILE *keyloadunit_f = fopenat(destdir_fd, keyloadunit,
O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, "w",
0644);
if (!keyloadunit_f) {
fprintf(stderr, PROGNAME "[%d]: %s: "
"couldn't open %s under %s: %s\n",
getpid(), dataset, keyloadunit, destdir,
strerror(errno));
return (1);
}
fprintf(keyloadunit_f,
OUTPUT_HEADER
"[Unit]\n"
"Description=Load ZFS key for %s\n"
"SourcePath=" FSLIST "/%s\n"
"Documentation=man:zfs-mount-generator(8)\n"
"DefaultDependencies=no\n"
"Wants=%s\n"
"After=%s\n",
dataset, cachefile, wants, after);
if (p_systemd_requires)
fprintf(keyloadunit_f,
"Requires=%s\n", p_systemd_requires);
if (p_systemd_requiresmountsfor || keymountdep) {
fprintf(keyloadunit_f, "RequiresMountsFor=");
if (p_systemd_requiresmountsfor)
fprintf(keyloadunit_f,
"%s ", p_systemd_requiresmountsfor);
if (keymountdep)
fprintf(keyloadunit_f,
"'%s'", keymountdep);
fprintf(keyloadunit_f, "\n");
}
/* BEGIN CSTYLED */
fprintf(keyloadunit_f,
"\n"
"[Service]\n"
"Type=oneshot\n"
"RemainAfterExit=yes\n"
"# This avoids a dependency loop involving systemd-journald.socket if this\n"
"# dataset is a parent of the root filesystem.\n"
"StandardOutput=null\n"
"StandardError=null\n"
"ExecStart=/bin/sh -c '"
"set -eu;"
"keystatus=\"$$(" ZFS " get -H -o value keystatus \"%s\")\";"
"[ \"$$keystatus\" = \"unavailable\" ] || exit 0;",
dataset);
if (is_prompt)
fprintf(keyloadunit_f,
"count=0;"
"while [ $$count -lt 3 ]; do "
"systemd-ask-password --id=\"zfs:%s\" \"Enter passphrase for %s:\" |"
"" ZFS " load-key \"%s\" && exit 0;"
"count=$$((count + 1));"
"done;"
"exit 1",
dataset, dataset, dataset);
else
fprintf(keyloadunit_f,
"" ZFS " load-key \"%s\"",
dataset);
fprintf(keyloadunit_f,
"'\n"
"ExecStop=/bin/sh -c '"
"set -eu;"
"keystatus=\"$$(" ZFS " get -H -o value keystatus \"%s\")\";"
"[ \"$$keystatus\" = \"available\" ] || exit 0;"
"" ZFS " unload-key \"%s\""
"'\n",
dataset, dataset);
/* END CSTYLED */
(void) fclose(keyloadunit_f);
}
/* Update dependencies for the mount file to want this */
bindsto = keyloadunit;
if (after[0] == '\0')
after = keyloadunit;
else if (asprintf(&toktmp, "%s %s", after, keyloadunit) != -1)
after = toktmp;
else
EXIT_ENOMEM();
}
/* Skip generation of the mount unit if org.openzfs.systemd:ignore=on */
if (strcmp(p_systemd_ignore, "-") == 0 ||
strcmp(p_systemd_ignore, "off") == 0) {
/* ok */
} else if (strcmp(p_systemd_ignore, "on") == 0)
return (0);
else {
fprintf(stderr, PROGNAME "[%d]: %s: "
"invalid org.openzfs.systemd:ignore=%s\n",
getpid(), dataset, p_systemd_ignore);
return (1);
}
/* Check for canmount */
if (strcmp(p_canmount, "on") == 0) {
/* ok */
} else if (strcmp(p_canmount, "noauto") == 0)
noauto = true;
else if (strcmp(p_canmount, "off") == 0)
return (0);
else {
fprintf(stderr, PROGNAME "[%d]: %s: invalid canmount=%s\n",
getpid(), dataset, p_canmount);
return (1);
}
/* Check for legacy and blank mountpoints */
if (strcmp(p_mountpoint, "legacy") == 0 ||
strcmp(p_mountpoint, "none") == 0)
return (0);
else if (p_mountpoint[0] != '/') {
fprintf(stderr, PROGNAME "[%d]: %s: invalid mountpoint=%s\n",
getpid(), dataset, p_mountpoint);
return (1);
}
/* Escape the mountpoint per systemd policy */
simplify_path(p_mountpoint);
const char *mountfile = systemd_escape_path(p_mountpoint, "", ".mount");
if (mountfile == NULL) {
fprintf(stderr,
PROGNAME "[%d]: %s: abnormal simplified mountpoint: %s\n",
getpid(), dataset, p_mountpoint);
return (1);
}
/*
* Parse options, cf. lib/libzfs/libzfs_mount.c:zfs_add_options
*
* The longest string achievable here is
* ",atime,strictatime,nodev,noexec,rw,nosuid,nomand".
*/
char opts[64] = "";
/* atime */
if (strcmp(p_atime, "on") == 0) {
/* relatime */
if (strcmp(p_relatime, "on") == 0)
strcat(opts, ",atime,relatime");
else if (strcmp(p_relatime, "off") == 0)
strcat(opts, ",atime,strictatime");
else
fprintf(stderr,
PROGNAME "[%d]: %s: invalid relatime=%s\n",
getpid(), dataset, p_relatime);
} else if (strcmp(p_atime, "off") == 0) {
strcat(opts, ",noatime");
} else
fprintf(stderr, PROGNAME "[%d]: %s: invalid atime=%s\n",
getpid(), dataset, p_atime);
/* devices */
if (strcmp(p_devices, "on") == 0)
strcat(opts, ",dev");
else if (strcmp(p_devices, "off") == 0)
strcat(opts, ",nodev");
else
fprintf(stderr, PROGNAME "[%d]: %s: invalid devices=%s\n",
getpid(), dataset, p_devices);
/* exec */
if (strcmp(p_exec, "on") == 0)
strcat(opts, ",exec");
else if (strcmp(p_exec, "off") == 0)
strcat(opts, ",noexec");
else
fprintf(stderr, PROGNAME "[%d]: %s: invalid exec=%s\n",
getpid(), dataset, p_exec);
/* readonly */
if (strcmp(p_readonly, "on") == 0)
strcat(opts, ",ro");
else if (strcmp(p_readonly, "off") == 0)
strcat(opts, ",rw");
else
fprintf(stderr, PROGNAME "[%d]: %s: invalid readonly=%s\n",
getpid(), dataset, p_readonly);
/* setuid */
if (strcmp(p_setuid, "on") == 0)
strcat(opts, ",suid");
else if (strcmp(p_setuid, "off") == 0)
strcat(opts, ",nosuid");
else
fprintf(stderr, PROGNAME "[%d]: %s: invalid setuid=%s\n",
getpid(), dataset, p_setuid);
/* nbmand */
if (strcmp(p_nbmand, "on") == 0)
strcat(opts, ",mand");
else if (strcmp(p_nbmand, "off") == 0)
strcat(opts, ",nomand");
else
fprintf(stderr, PROGNAME "[%d]: %s: invalid nbmand=%s\n",
getpid(), dataset, p_setuid);
if (strcmp(p_systemd_wantedby, "-") != 0) {
noauto = true;
if (strcmp(p_systemd_wantedby, "none") != 0)
wantedby = p_systemd_wantedby;
}
if (strcmp(p_systemd_requiredby, "-") != 0) {
noauto = true;
if (strcmp(p_systemd_requiredby, "none") != 0)
requiredby = p_systemd_requiredby;
}
/*
* For datasets with canmount=on, a dependency is created for
* local-fs.target by default. To avoid regressions, this dependency
* is reduced to "wants" rather than "requires" when nofail!=off.
* **THIS MAY CHANGE**
* noauto=on disables this behavior completely.
*/
if (!noauto) {
if (strcmp(p_systemd_nofail, "off") == 0)
requiredby = strdupa("local-fs.target");
else {
wantedby = strdupa("local-fs.target");
wantedby_append = strcmp(p_systemd_nofail, "on") != 0;
}
}
/*
* Handle existing files:
* 1. We never overwrite existing files, although we may delete
* files if we're sure they were created by us. (see 5.)
* 2. We handle files differently based on canmount.
* Units with canmount=on always have precedence over noauto.
* This is enforced by the noauto_not_on_sem semaphore,
* which is only unlocked when the last canmount=on process exits.
* It is important to use p_canmount and not noauto here,
* since we categorise by canmount while other properties,
* e.g. org.openzfs.systemd:wanted-by, also modify noauto.
* 3. If no unit file exists for a noauto dataset, we create one.
* Additionally, we use noauto_files to track the unit file names
* (which are the systemd-escaped mountpoints) of all (exclusively)
* noauto datasets that had a file created.
* 4. If the file to be created is found in the tracking array,
* we do NOT create it.
* 5. If a file exists for a noauto dataset,
* we check whether the file name is in the array.
* If it is, we have multiple noauto datasets for the same
* mountpoint. In such cases, we remove the file for safety.
* We leave the file name in the tracking array to avoid
* further noauto datasets creating a file for this path again.
*/
{
sem_t *our_sem = (strcmp(p_canmount, "on") == 0) ?
&noauto_files->noauto_names_sem :
&noauto_files->noauto_not_on_sem;
while (sem_wait(our_sem) == -1 && errno == EINTR)
;
}
struct stat stbuf;
bool already_exists = fstatat(destdir_fd, mountfile, &stbuf, 0) == 0;
bool is_known = false;
for (size_t i = 0; i < noauto_files->noauto_names_len; ++i) {
if (strncmp(
noauto_files->noauto_names[i], mountfile, NAME_MAX) == 0) {
is_known = true;
break;
}
}
if (already_exists) {
if (is_known) {
/* If it's in $noauto_files, we must be noauto too */
/* See 5 */
errno = 0;
(void) unlinkat(destdir_fd, mountfile, 0);
/* See 2 */
fprintf(stderr, PROGNAME "[%d]: %s: "
"removing duplicate noauto unit %s%s%s\n",
getpid(), dataset, mountfile,
errno ? "" : " failed: ",
errno ? "" : strerror(errno));
} else {
/* Don't log for canmount=noauto */
if (strcmp(p_canmount, "on") == 0)
fprintf(stderr, PROGNAME "[%d]: %s: "
"%s already exists. Skipping.\n",
getpid(), dataset, mountfile);
}
/* File exists: skip current dataset */
if (strcmp(p_canmount, "on") == 0)
sem_post(&noauto_files->noauto_names_sem);
return (0);
} else {
if (is_known) {
/* See 4 */
if (strcmp(p_canmount, "on") == 0)
sem_post(&noauto_files->noauto_names_sem);
return (0);
} else if (strcmp(p_canmount, "noauto") == 0) {
if (noauto_files->noauto_names_len ==
noauto_files->noauto_names_max)
fprintf(stderr, PROGNAME "[%d]: %s: "
"noauto dataset limit (%zu) reached! "
"Not tracking %s. Please report this to "
"https://github.com/openzfs/zfs\n",
getpid(), dataset,
noauto_files->noauto_names_max, mountfile);
else {
strncpy(noauto_files->noauto_names[
noauto_files->noauto_names_len],
mountfile, NAME_MAX);
++noauto_files->noauto_names_len;
}
}
}
FILE *mountfile_f = fopenat(destdir_fd, mountfile,
O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, "w", 0644);
if (strcmp(p_canmount, "on") == 0)
sem_post(&noauto_files->noauto_names_sem);
if (!mountfile_f) {
fprintf(stderr,
PROGNAME "[%d]: %s: couldn't open %s under %s: %s\n",
getpid(), dataset, mountfile, destdir, strerror(errno));
return (1);
}
fprintf(mountfile_f,
OUTPUT_HEADER
"[Unit]\n"
"SourcePath=" FSLIST "/%s\n"
"Documentation=man:zfs-mount-generator(8)\n"
"\n"
"Before=",
cachefile);
if (p_systemd_before)
fprintf(mountfile_f, "%s ", p_systemd_before);
fprintf(mountfile_f, "zfs-mount.service"); /* Ensures we don't race */
if (requiredby)
fprintf(mountfile_f, " %s", requiredby);
if (wantedby && wantedby_append)
fprintf(mountfile_f, " %s", wantedby);
fprintf(mountfile_f,
"\n"
"After=");
if (p_systemd_after)
fprintf(mountfile_f, "%s ", p_systemd_after);
fprintf(mountfile_f, "%s\n", after);
fprintf(mountfile_f, "Wants=%s\n", wants);
if (bindsto)
fprintf(mountfile_f, "BindsTo=%s\n", bindsto);
if (p_systemd_requires)
fprintf(mountfile_f, "Requires=%s\n", p_systemd_requires);
if (p_systemd_requiresmountsfor)
fprintf(mountfile_f,
"RequiresMountsFor=%s\n", p_systemd_requiresmountsfor);
fprintf(mountfile_f,
"\n"
"[Mount]\n"
"Where=%s\n"
"What=%s\n"
"Type=zfs\n"
"Options=defaults%s,zfsutil\n",
p_mountpoint, dataset, opts);
(void) fclose(mountfile_f);
if (!requiredby && !wantedby)
return (0);
/* Finally, create the appropriate dependencies */
char *linktgt;
if (asprintf(&linktgt, "../%s", mountfile) == -1)
EXIT_ENOMEM();
char *dependencies[][2] = {
{"wants", wantedby},
{"requires", requiredby},
{}
};
for (__typeof__(&*dependencies) dep = &*dependencies; **dep; ++dep) {
if (!(*dep)[1])
continue;
for (char *reqby = strtok_r((*dep)[1], " ", &toktmp);
reqby;
reqby = strtok_r(NULL, " ", &toktmp)) {
char *depdir;
if (asprintf(&depdir, "%s.%s", reqby, (*dep)[0]) == -1)
EXIT_ENOMEM();
(void) mkdirat(destdir_fd, depdir, 0755);
int depdir_fd = openat(destdir_fd, depdir,
O_PATH | O_DIRECTORY | O_CLOEXEC);
if (depdir_fd < 0) {
fprintf(stderr, PROGNAME "[%d]: %s: "
"couldn't open %s under %s: %s\n",
getpid(), dataset, depdir, destdir,
strerror(errno));
free(depdir);
continue;
}
if (symlinkat(linktgt, depdir_fd, mountfile) == -1)
fprintf(stderr, PROGNAME "[%d]: %s: "
"couldn't symlink at "
"%s under %s under %s: %s\n",
getpid(), dataset, mountfile,
depdir, destdir, strerror(errno));
(void) close(depdir_fd);
free(depdir);
}
}
return (0);
}
static int
pool_enumerator(zpool_handle_t *pool, void *data __attribute__((unused)))
{
int ret = 0;
/*
* Pools are guaranteed-unique by the kernel,
* no risk of leaking dupes here
*/
char *name = strdup(zpool_get_name(pool));
if (!name || !tsearch(name, &known_pools, STRCMP)) {
free(name);
ret = ENOMEM;
}
zpool_close(pool);
return (ret);
}
int
main(int argc, char **argv)
{
struct timespec time_init = {};
clock_gettime(CLOCK_MONOTONIC_RAW, &time_init);
{
int kmfd = open("/dev/kmsg", O_WRONLY | O_CLOEXEC);
if (kmfd >= 0) {
(void) dup2(kmfd, STDERR_FILENO);
(void) close(kmfd);
}
}
uint8_t debug = 0;
argv0 = argv[0];
switch (argc) {
case 1:
/* Use default */
break;
case 2:
case 4:
destdir = argv[1];
break;
default:
fprintf(stderr,
PROGNAME "[%d]: wrong argument count: %d\n",
getpid(), argc - 1);
_exit(1);
}
{
destdir_fd = open(destdir, O_PATH | O_DIRECTORY | O_CLOEXEC);
if (destdir_fd < 0) {
fprintf(stderr, PROGNAME "[%d]: "
"can't open destination directory %s: %s\n",
getpid(), destdir, strerror(errno));
_exit(1);
}
}
DIR *fslist_dir = opendir(FSLIST);
if (!fslist_dir) {
if (errno != ENOENT)
fprintf(stderr,
PROGNAME "[%d]: couldn't open " FSLIST ": %s\n",
getpid(), strerror(errno));
_exit(0);
}
{
libzfs_handle_t *libzfs = libzfs_init();
if (libzfs) {
if (zpool_iter(libzfs, pool_enumerator, NULL) != 0)
fprintf(stderr, PROGNAME "[%d]: "
"error listing pools, ignoring\n",
getpid());
libzfs_fini(libzfs);
} else
fprintf(stderr, PROGNAME "[%d]: "
"couldn't start libzfs, ignoring\n",
getpid());
}
{
int regerr = regcomp(&uri_regex, URI_REGEX_S, 0);
if (regerr != 0) {
fprintf(stderr,
PROGNAME "[%d]: invalid regex: %d\n",
getpid(), regerr);
_exit(1);
}
}
{
/*
* We could just get a gigabyte here and Not Care,
* but if vm.overcommit_memory=2, then MAP_NORESERVE is ignored
* and we'd try (and likely fail) to rip it out of swap
*/
noauto_files = mmap(NULL, 4 * 1024 * 1024,
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
if (noauto_files == MAP_FAILED) {
fprintf(stderr,
PROGNAME "[%d]: couldn't allocate IPC region: %s\n",
getpid(), strerror(errno));
_exit(1);
}
sem_init(&noauto_files->noauto_not_on_sem, true, 0);
sem_init(&noauto_files->noauto_names_sem, true, 1);
noauto_files->noauto_names_len = 0;
/* Works out to 16447ish, *well* enough */
noauto_files->noauto_names_max =
(4 * 1024 * 1024 - sizeof (*noauto_files)) / NAME_MAX;
}
char *line = NULL;
size_t linelen = 0;
struct timespec time_start = {};
{
const char *dbgenv = getenv("ZFS_DEBUG");
if (dbgenv)
debug = atoi(dbgenv);
else {
FILE *cmdline = fopen("/proc/cmdline", "re");
if (cmdline != NULL) {
if (getline(&line, &linelen, cmdline) >= 0)
debug = strstr(line, "debug") ? 2 : 0;
(void) fclose(cmdline);
}
}
if (debug && !isatty(STDOUT_FILENO))
dup2(STDERR_FILENO, STDOUT_FILENO);
}
size_t forked_canmount_on = 0;
size_t forked_canmount_not_on = 0;
size_t canmount_on_pids_len = 128;
pid_t *canmount_on_pids =
malloc(canmount_on_pids_len * sizeof (*canmount_on_pids));
if (canmount_on_pids == NULL)
canmount_on_pids_len = 0;
if (debug)
clock_gettime(CLOCK_MONOTONIC_RAW, &time_start);
ssize_t read;
pid_t pid;
struct dirent *cachent;
while ((cachent = readdir(fslist_dir)) != NULL) {
if (strcmp(cachent->d_name, ".") == 0 ||
strcmp(cachent->d_name, "..") == 0)
continue;
FILE *cachefile = fopenat(dirfd(fslist_dir), cachent->d_name,
O_RDONLY | O_CLOEXEC, "r", 0);
if (!cachefile) {
fprintf(stderr, PROGNAME "[%d]: "
"couldn't open %s under " FSLIST ": %s\n",
getpid(), cachent->d_name, strerror(errno));
continue;
}
while ((read = getline(&line, &linelen, cachefile)) >= 0) {
line[read - 1] = '\0'; /* newline */
switch (pid = fork()) {
case -1:
fprintf(stderr,
PROGNAME "[%d]: couldn't fork for %s: %s\n",
getpid(), line, strerror(errno));
break;
case 0: /* child */
_exit(line_worker(line, cachent->d_name));
default: { /* parent */
char *tmp;
char *dset = strtok_r(line, "\t", &tmp);
strtok_r(NULL, "\t", &tmp);
char *canmount = strtok_r(NULL, "\t", &tmp);
bool canmount_on =
canmount && strncmp(canmount, "on", 2) == 0;
if (debug >= 2)
printf(PROGNAME ": forked %d, "
"canmount_on=%d, dataset=%s\n",
(int)pid, canmount_on, dset);
if (canmount_on &&
forked_canmount_on ==
canmount_on_pids_len) {
size_t new_len =
(canmount_on_pids_len ?: 16) * 2;
void *new_pidlist =
realloc(canmount_on_pids,
new_len *
sizeof (*canmount_on_pids));
if (!new_pidlist) {
fprintf(stderr,
PROGNAME "[%d]: "
"out of memory! "
"Mount ordering may be "
"affected.\n", getpid());
continue;
}
canmount_on_pids = new_pidlist;
canmount_on_pids_len = new_len;
}
if (canmount_on) {
canmount_on_pids[forked_canmount_on] =
pid;
++forked_canmount_on;
} else
++forked_canmount_not_on;
break;
}
}
}
(void) fclose(cachefile);
}
free(line);
if (forked_canmount_on == 0) {
/* No canmount=on processes to finish, so don't deadlock here */
for (size_t i = 0; i < forked_canmount_not_on; ++i)
sem_post(&noauto_files->noauto_not_on_sem);
} else {
/* Likely a no-op, since we got these from a narrow fork loop */
qsort(canmount_on_pids, forked_canmount_on,
sizeof (*canmount_on_pids), PID_T_CMP);
}
int status, ret = 0;
struct rusage usage;
size_t forked_canmount_on_max = forked_canmount_on;
while ((pid = wait4(-1, &status, 0, &usage)) != -1) {
ret |= WEXITSTATUS(status) | WTERMSIG(status);
if (forked_canmount_on != 0) {
if (bsearch(&pid, canmount_on_pids,
forked_canmount_on_max, sizeof (*canmount_on_pids),
PID_T_CMP))
--forked_canmount_on;
if (forked_canmount_on == 0) {
/*
* All canmount=on processes have finished,
* let all the lower-priority ones finish now
*/
for (size_t i = 0;
i < forked_canmount_not_on; ++i)
sem_post(
&noauto_files->noauto_not_on_sem);
}
}
if (debug >= 2)
printf(PROGNAME ": %d done, user=%llu.%06us, "
"system=%llu.%06us, maxrss=%ldB, ex=0x%x\n",
(int)pid,
(unsigned long long) usage.ru_utime.tv_sec,
(unsigned int) usage.ru_utime.tv_usec,
(unsigned long long) usage.ru_stime.tv_sec,
(unsigned int) usage.ru_stime.tv_usec,
usage.ru_maxrss * 1024, status);
}
if (debug) {
struct timespec time_end = {};
clock_gettime(CLOCK_MONOTONIC_RAW, &time_end);
getrusage(RUSAGE_SELF, &usage);
printf(
"\n"
PROGNAME ": self : "
"user=%llu.%06us, system=%llu.%06us, maxrss=%ldB\n",
(unsigned long long) usage.ru_utime.tv_sec,
(unsigned int) usage.ru_utime.tv_usec,
(unsigned long long) usage.ru_stime.tv_sec,
(unsigned int) usage.ru_stime.tv_usec,
usage.ru_maxrss * 1024);
getrusage(RUSAGE_CHILDREN, &usage);
printf(PROGNAME ": children: "
"user=%llu.%06us, system=%llu.%06us, maxrss=%ldB\n",
(unsigned long long) usage.ru_utime.tv_sec,
(unsigned int) usage.ru_utime.tv_usec,
(unsigned long long) usage.ru_stime.tv_sec,
(unsigned int) usage.ru_stime.tv_usec,
usage.ru_maxrss * 1024);
if (time_start.tv_nsec > time_end.tv_nsec) {
time_end.tv_nsec =
1000000000 + time_end.tv_nsec - time_start.tv_nsec;
time_end.tv_sec -= 1;
} else
time_end.tv_nsec -= time_start.tv_nsec;
time_end.tv_sec -= time_start.tv_sec;
if (time_init.tv_nsec > time_start.tv_nsec) {
time_start.tv_nsec =
1000000000 + time_start.tv_nsec - time_init.tv_nsec;
time_start.tv_sec -= 1;
} else
time_start.tv_nsec -= time_init.tv_nsec;
time_start.tv_sec -= time_init.tv_sec;
time_init.tv_nsec = time_start.tv_nsec + time_end.tv_nsec;
time_init.tv_sec =
time_start.tv_sec + time_end.tv_sec +
time_init.tv_nsec / 1000000000;
time_init.tv_nsec %= 1000000000;
printf(PROGNAME ": wall : "
"total=%llu.%09llus = "
"init=%llu.%09llus + real=%llu.%09llus\n",
(unsigned long long) time_init.tv_sec,
(unsigned long long) time_init.tv_nsec,
(unsigned long long) time_start.tv_sec,
(unsigned long long) time_start.tv_nsec,
(unsigned long long) time_end.tv_sec,
(unsigned long long) time_end.tv_nsec);
}
_exit(ret);
}