start: handle setting pdeath signal in new pidns

In the usual case the child runs in a separate pid namespace. So far we haven't been able to reliably set the pdeath signal. When we set the pdeath signal we need to verify that we haven't lost a race whereby we have been orphaned and though we have set a pdeath signal it won't help us since, well, the parent is dead. We were able to correctly handle this case when we were in the same pidns since getppid() will return a valid pid. When we are in a separate pidns 0 will be returned since the parent doesn't exist in our pidns. A while back, while Jann and I were discussing other things he came up with a nifty idea: simply pass an fd for the parent's status file and check the "State:" field. This is the implementation of that idea. Suggested-by: 's avatarJann Horn <jann@thejh.net> Signed-off-by: 's avatarChristian Brauner <christian.brauner@ubuntu.com>
parent 1af01b93
......@@ -703,6 +703,9 @@ void lxc_free_handler(struct lxc_handler *handler)
if (handler->conf->maincmd_fd >= 0)
lxc_abstract_unix_close(handler->conf->maincmd_fd);
if (handler->monitor_status_fd >= 0)
close(handler->monitor_status_fd);
if (handler->state_socket_pair[0] >= 0)
close(handler->state_socket_pair[0]);
......@@ -737,6 +740,7 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf,
handler->data_sock[0] = handler->data_sock[1] = -1;
handler->conf = conf;
handler->lxcpath = lxcpath;
handler->monitor_status_fd = -EBADF;
handler->pinfd = -1;
handler->pidfd = -EBADF;
handler->proc_pidfd = -EBADF;
......@@ -789,11 +793,17 @@ on_error:
int lxc_init(const char *name, struct lxc_handler *handler)
{
__do_close_prot_errno int status_fd = -EBADF;
int ret;
const char *loglevel;
struct lxc_conf *conf = handler->conf;
handler->monitor_pid = lxc_raw_getpid();
status_fd = open("/proc/self/status", O_RDONLY | O_CLOEXEC);
if (status_fd < 0) {
SYSERROR("Failed to open monitor status fd");
goto out_close_maincmd_fd;
}
lsm_init();
TRACE("Initialized LSM");
......@@ -917,6 +927,7 @@ int lxc_init(const char *name, struct lxc_handler *handler)
TRACE("Initialized cgroup driver");
INFO("Container \"%s\" is initialized", name);
handler->monitor_status_fd = move_fd(status_fd);
return 0;
out_delete_terminal:
......@@ -1112,6 +1123,7 @@ static int do_start(void *data)
struct lxc_handler *handler = data;
ATTR_UNUSED __do_close_prot_errno int data_sock0 = handler->data_sock[0],
data_sock1 = handler->data_sock[1];
__do_close_prot_errno int status_fd = -EBADF;
int ret;
uid_t new_uid;
gid_t new_gid;
......@@ -1122,13 +1134,18 @@ static int do_start(void *data)
lxc_sync_fini_parent(handler);
if (lxc_abstract_unix_recv_fds(handler->data_sock[1], &status_fd, 1, NULL, 0) < 0) {
ERROR("Failed to receive status file descriptor to child process");
goto out_warn_father;
}
/* This prctl must be before the synchro, so if the parent dies before
* we set the parent death signal, we will detect its death with the
* synchro right after, otherwise we have a window where the parent can
* exit before we set the pdeath signal leading to a unsupervized
* container.
*/
ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid);
ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid, status_fd);
if (ret < 0) {
SYSERROR("Failed to set PR_SET_PDEATHSIG to SIGKILL");
goto out_warn_father;
......@@ -1206,7 +1223,7 @@ static int do_start(void *data)
goto out_warn_father;
/* set{g,u}id() clears deathsignal */
ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid);
ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid, status_fd);
if (ret < 0) {
SYSERROR("Failed to set PR_SET_PDEATHSIG to SIGKILL");
goto out_warn_father;
......@@ -1231,12 +1248,12 @@ static int do_start(void *data)
*/
if (handler->daemonize && !handler->conf->autodev) {
char path[PATH_MAX];
ret = snprintf(path, sizeof(path), "%s/dev/null",
handler->conf->rootfs.mount);
if (ret < 0 || ret >= sizeof(path))
goto out_warn_father;
ret = access(path, F_OK);
if (ret != 0) {
devnull_fd = open_devnull();
......@@ -1453,6 +1470,16 @@ static int do_start(void *data)
goto out_warn_father;
}
if (handler->conf->monitor_signal_pdeath != SIGKILL) {
ret = lxc_set_death_signal(handler->conf->monitor_signal_pdeath,
handler->monitor_pid, status_fd);
if (ret < 0) {
SYSERROR("Failed to set PR_SET_PDEATHSIG to %d",
handler->conf->monitor_signal_pdeath);
goto out_warn_father;
}
}
/* After this call, we are in error because this ops should not return
* as it execs.
*/
......@@ -1768,6 +1795,12 @@ static int lxc_spawn(struct lxc_handler *handler)
lxc_sync_fini_child(handler);
if (lxc_abstract_unix_send_fds(handler->data_sock[0], &handler->monitor_status_fd, 1, NULL, 0) < 0) {
ERROR("Failed to send status file descriptor to child process");
goto out_delete_net;
}
close_prot_errno_disarm(handler->monitor_status_fd);
/* Map the container uids. The container became an invalid userid the
* moment it was cloned with CLONE_NEWUSER. This call doesn't change
* anything immediately, but allows the container to setuid(0) (0 being
......
......@@ -114,6 +114,8 @@ struct lxc_handler {
/* The monitor's pid. */
pid_t monitor_pid;
int monitor_status_fd;
/* Whether the child has already exited. */
bool init_died;
......
......@@ -1720,7 +1720,43 @@ uint64_t lxc_find_next_power2(uint64_t n)
return n;
}
int lxc_set_death_signal(int signal, pid_t parent)
static int process_dead(/* takes */ int status_fd)
{
__do_close_prot_errno int dupfd = -EBADF;
__do_free char *line = NULL;
__do_fclose FILE *f = NULL;
int ret = 0;
size_t n = 0;
dupfd = dup(status_fd);
if (dupfd < 0)
return -1;
if (fd_cloexec(dupfd, true) < 0)
return -1;
/* transfer ownership of fd */
f = fdopen(move_fd(dupfd), "re");
if (!f)
return -1;
ret = 0;
while (getline(&line, &n, f) != -1) {
char *state;
if (strncmp(line, "State:", 6))
continue;
state = lxc_trim_whitespace_in_place(line + 6);
/* only check whether process is dead or zombie for now */
if (*state == 'X' || *state == 'Z')
ret = 1;
}
return ret;
}
int lxc_set_death_signal(int signal, pid_t parent, int parent_status_fd)
{
int ret;
pid_t ppid;
......@@ -1728,12 +1764,16 @@ int lxc_set_death_signal(int signal, pid_t parent)
ret = prctl(PR_SET_PDEATHSIG, prctl_arg(signal), prctl_arg(0),
prctl_arg(0), prctl_arg(0));
/* If not in a PID namespace, check whether we have been orphaned. */
/* verify that we haven't been orphaned in the meantime */
ppid = (pid_t)syscall(SYS_getppid);
if (ppid && ppid != parent) {
ret = raise(SIGKILL);
if (ret < 0)
return -1;
if (ppid == 0) { /* parent outside our pidns */
if (parent_status_fd < 0)
return 0;
if (process_dead(parent_status_fd) == 1)
return raise(SIGKILL);
} else if (ppid != parent) {
return raise(SIGKILL);
}
if (ret < 0)
......
......@@ -255,7 +255,7 @@ static inline uint64_t lxc_getpagesize(void)
extern uint64_t lxc_find_next_power2(uint64_t n);
/* Set a signal the child process will receive after the parent has died. */
extern int lxc_set_death_signal(int signal, pid_t parent);
extern int lxc_set_death_signal(int signal, pid_t parent, int parent_status_fd);
extern int fd_cloexec(int fd, bool cloexec);
extern int recursive_destroy(char *dirname);
extern int lxc_setup_keyring(void);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment