zed: protect against wait4()/fork() races to the global PID table
This can be very easily triggered by adding a sleep(1) before the wait4() on a PID-starved system: the reaper thread would wait for a child before its entry appeared, letting old entries accumulate: Invoking "all-debug.sh" eid=3021 pid=391 Finished "(null)" eid=0 pid=391 time=0.002432s exit=0 Invoking "all-syslog.sh" eid=3021 pid=336 Finished "(null)" eid=0 pid=336 time=0.002432s exit=0 Invoking "history_event-zfs-list-cacher.sh" eid=3021 pid=347 Invoking "all-debug.sh" eid=3022 pid=349 Finished "history_event-zfs-list-cacher.sh" eid=3021 pid=347 time=0.001669s exit=0 Finished "(null)" eid=0 pid=349 time=0.002404s exit=0 Invoking "all-syslog.sh" eid=3022 pid=370 Finished "(null)" eid=0 pid=370 time=0.002427s exit=0 Invoking "history_event-zfs-list-cacher.sh" eid=3022 pid=391 avl_find(tree, new_node, &where) == NULL ASSERT at ../../module/avl/avl.c:641:avl_add() Thread 1 "zed" received signal SIGABRT, Aborted. By employing this wider lock, we atomise [wait, remove] and [fork, add]: slowing down the reaper thread now just causes some zombies to accumulate until it can get to them Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Don Brady <don.brady@delphix.com> Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz> Closes #11963 Closes #11965
This commit is contained in:
parent
c074a7de13
commit
3bd6b0e05a
|
@ -142,8 +142,10 @@ _zed_exec_fork_child(uint64_t eid, const char *dir, const char *prog,
|
|||
prog, eid, strerror(ENAMETOOLONG));
|
||||
return;
|
||||
}
|
||||
(void) pthread_mutex_lock(&_launched_processes_lock);
|
||||
pid = fork();
|
||||
if (pid < 0) {
|
||||
(void) pthread_mutex_unlock(&_launched_processes_lock);
|
||||
zed_log_msg(LOG_WARNING,
|
||||
"Failed to fork \"%s\" for eid=%llu: %s",
|
||||
prog, eid, strerror(errno));
|
||||
|
@ -166,20 +168,19 @@ _zed_exec_fork_child(uint64_t eid, const char *dir, const char *prog,
|
|||
|
||||
/* parent process */
|
||||
|
||||
__atomic_sub_fetch(&_launched_processes_limit, 1, __ATOMIC_SEQ_CST);
|
||||
zed_log_msg(LOG_INFO, "Invoking \"%s\" eid=%llu pid=%d",
|
||||
prog, eid, pid);
|
||||
|
||||
node = calloc(1, sizeof (*node));
|
||||
if (node) {
|
||||
node->pid = pid;
|
||||
node->eid = eid;
|
||||
node->name = strdup(prog);
|
||||
|
||||
(void) pthread_mutex_lock(&_launched_processes_lock);
|
||||
avl_add(&_launched_processes, node);
|
||||
(void) pthread_mutex_unlock(&_launched_processes_lock);
|
||||
}
|
||||
(void) pthread_mutex_unlock(&_launched_processes_lock);
|
||||
|
||||
__atomic_sub_fetch(&_launched_processes_limit, 1, __ATOMIC_SEQ_CST);
|
||||
zed_log_msg(LOG_INFO, "Invoking \"%s\" eid=%llu pid=%d",
|
||||
prog, eid, pid);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
Loading…
Reference in New Issue