Unverified Commit 90bb9fb2 by Stéphane Graber Committed by GitHub

Merge pull request #3848 from brauner/2021-05-21.fixes

start: rework fd synchronization
parents ddd51bd1 6bc4165d
......@@ -3077,8 +3077,11 @@ static bool unified_hierarchy_delegated(int dfd_base, char ***ret_files)
static bool legacy_hierarchy_delegated(int dfd_base)
{
if (faccessat(dfd_base, "cgroup.procs", W_OK, 0) && errno != ENOENT)
return sysinfo_ret(false, "The cgroup.procs file is not writable, skipping legacy hierarchy");
int ret;
ret = faccessat(dfd_base, ".", W_OK, 0);
if (ret < 0 && errno != ENOENT)
return sysinfo_ret(false, "Legacy hierarchy not writable, skipping");
return true;
}
......
......@@ -1184,7 +1184,7 @@ on_error:
return -1;
}
int lxc_send_ttys_to_parent(struct lxc_handler *handler)
static int lxc_send_ttys_to_parent(struct lxc_handler *handler)
{
int ret = -1;
......@@ -1635,7 +1635,7 @@ static const struct id_map *find_mapped_nsid_entry(const struct lxc_conf *conf,
return retmap;
}
int lxc_setup_devpts_parent(struct lxc_handler *handler)
static int lxc_setup_devpts_parent(struct lxc_handler *handler)
{
int ret;
......@@ -1750,7 +1750,7 @@ static int lxc_setup_devpts_child(struct lxc_handler *handler)
return 0;
}
int lxc_send_devpts_to_parent(struct lxc_handler *handler)
static int lxc_send_devpts_to_parent(struct lxc_handler *handler)
{
int ret;
......@@ -2927,7 +2927,6 @@ out:
ret = lxc_abstract_unix_send_credential(handler->data_sock[0], NULL, 0);
if (ret < 0)
return syserror("Failed to inform child that we are done setting up mounts");
TRACE("AAAA");
return fret;
}
......@@ -4013,6 +4012,97 @@ int lxc_idmapped_mounts_parent(struct lxc_handler *handler)
}
}
static int lxc_recv_ttys_from_child(struct lxc_handler *handler)
{
int i;
struct lxc_terminal_info *tty;
int ret = -1;
int sock = handler->data_sock[1];
struct lxc_conf *conf = handler->conf;
struct lxc_tty_info *ttys = &conf->ttys;
if (!conf->ttys.max)
return 0;
ttys->tty = malloc(sizeof(*ttys->tty) * ttys->max);
if (!ttys->tty)
return -1;
for (i = 0; i < conf->ttys.max; i++) {
int ttyx = -EBADF, ttyy = -EBADF;
ret = lxc_abstract_unix_recv_two_fds(sock, &ttyx, &ttyy);
if (ret < 0)
break;
tty = &ttys->tty[i];
tty->busy = -1;
tty->ptx = ttyx;
tty->pty = ttyy;
TRACE("Received pty with ptx fd %d and pty fd %d from child", tty->ptx, tty->pty);
}
if (ret < 0)
SYSERROR("Failed to receive %zu ttys from child", ttys->max);
else
TRACE("Received %zu ttys from child", ttys->max);
return ret;
}
int lxc_sync_fds_parent(struct lxc_handler *handler)
{
int ret;
ret = lxc_seccomp_recv_notifier_fd(&handler->conf->seccomp, handler->data_sock[1]);
if (ret < 0)
return syserror_ret(ret, "Failed to receive seccomp notify fd from child");
ret = lxc_setup_devpts_parent(handler);
if (ret < 0)
return syserror_ret(ret, "Failed to receive devpts fd from child");
/* Read tty fds allocated by child. */
ret = lxc_recv_ttys_from_child(handler);
if (ret < 0)
return syserror_ret(ret, "Failed to receive tty info from child process");
if (handler->ns_clone_flags & CLONE_NEWNET) {
ret = lxc_network_recv_name_and_ifindex_from_child(handler);
if (ret < 0)
return syserror_ret(ret, "Failed to receive names and ifindices for network devices from child");
}
TRACE("Finished syncing file descriptors with child");
return 0;
}
int lxc_sync_fds_child(struct lxc_handler *handler)
{
int ret;
ret = lxc_seccomp_send_notifier_fd(&handler->conf->seccomp, handler->data_sock[0]);
if (ret < 0)
return syserror_ret(ret, "Failed to send seccomp notify fd to parent");
ret = lxc_send_devpts_to_parent(handler);
if (ret < 0)
return syserror_ret(ret, "Failed to send seccomp devpts fd to parent");
ret = lxc_send_ttys_to_parent(handler);
if (ret < 0)
return syserror_ret(ret, "Failed to send tty file descriptors to parent");
if (handler->ns_clone_flags & CLONE_NEWNET) {
ret = lxc_network_send_name_and_ifindex_to_parent(handler);
if (ret < 0)
return syserror_ret(ret, "Failed to send network device names and ifindices to parent");
}
TRACE("Finished syncing file descriptors with parent");
return 0;
}
int lxc_setup(struct lxc_handler *handler)
{
int ret;
......@@ -4040,6 +4130,10 @@ int lxc_setup(struct lxc_handler *handler)
}
if (handler->ns_clone_flags & CLONE_NEWNET) {
ret = lxc_network_recv_from_parent(handler);
if (ret < 0)
return log_error(-1, "Failed to receive veth names from parent");
ret = lxc_setup_network_in_child_namespaces(lxc_conf,
&lxc_conf->network);
if (ret < 0)
......
......@@ -197,12 +197,12 @@ typedef enum lxc_mount_options_t {
__hidden extern const char *lxc_mount_options_info[LXC_MOUNT_MAX];
struct lxc_mount_options {
int create_dir : 1;
int create_file : 1;
int optional : 1;
int relative : 1;
int recursive : 1;
int bind : 1;
unsigned int create_dir : 1;
unsigned int create_file : 1;
unsigned int optional : 1;
unsigned int relative : 1;
unsigned int recursive : 1;
unsigned int bind : 1;
char userns_path[PATH_MAX];
unsigned long mnt_flags;
unsigned long prop_flags;
......@@ -517,8 +517,6 @@ __hidden extern int lxc_idmapped_mounts_parent(struct lxc_handler *handler);
__hidden extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid);
__hidden extern int lxc_create_tty(const char *name, struct lxc_conf *conf);
__hidden extern void lxc_delete_tty(struct lxc_tty_info *ttys);
__hidden extern int lxc_send_ttys_to_parent(struct lxc_handler *handler);
__hidden extern int lxc_send_devpts_to_parent(struct lxc_handler *handler);
__hidden extern int lxc_clear_config_caps(struct lxc_conf *c);
__hidden extern int lxc_clear_config_keepcaps(struct lxc_conf *c);
__hidden extern int lxc_clear_cgroups(struct lxc_conf *c, const char *key, int version);
......@@ -582,7 +580,8 @@ static inline int chown_mapped_root(const char *path, const struct lxc_conf *con
return userns_exec_mapped_root(path, -EBADF, conf);
}
__hidden int lxc_setup_devpts_parent(struct lxc_handler *handler);
__hidden extern int lxc_sync_fds_parent(struct lxc_handler *handler);
__hidden extern int lxc_sync_fds_child(struct lxc_handler *handler);
static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs)
{
......
......@@ -1085,20 +1085,6 @@ static int do_start(void *data)
INFO("Unshared CLONE_NEWNET");
}
/* Tell the parent task it can begin to configure the container and wait
* for it to finish.
*/
if (!lxc_sync_barrier_parent(handler, START_SYNC_CONFIGURE))
goto out_error;
if (handler->ns_clone_flags & CLONE_NEWNET) {
ret = lxc_network_recv_from_parent(handler);
if (ret < 0) {
ERROR("Failed to receive veth names from parent");
goto out_warn_father;
}
}
/* If we are in a new user namespace, become root there to have
* privilege over our namespace.
*/
......@@ -1166,8 +1152,11 @@ static int do_start(void *data)
}
}
/* Ask father to setup cgroups and wait for him to finish. */
if (!lxc_sync_barrier_parent(handler, START_SYNC_CGROUP))
/*
* Tell the parent task it can begin to configure the container and wait
* for it to finish.
*/
if (!lxc_sync_wake_parent(handler, START_SYNC_CONFIGURE))
goto out_error;
/* Unshare cgroup namespace after we have setup our cgroups. If we do it
......@@ -1259,6 +1248,9 @@ static int do_start(void *data)
}
}
if (!lxc_sync_wait_parent(handler, START_SYNC_POST_CONFIGURE))
goto out_warn_father;
/* Setup the container, ip, names, utsname, ... */
ret = lxc_setup(handler);
if (ret < 0) {
......@@ -1330,32 +1322,12 @@ static int do_start(void *data)
if (!lxc_sync_barrier_parent(handler, START_SYNC_CGROUP_LIMITS))
goto out_warn_father;
ret = lxc_seccomp_send_notifier_fd(&handler->conf->seccomp, data_sock0);
ret = lxc_sync_fds_child(handler);
if (ret < 0) {
SYSERROR("Failed to send seccomp notify fd to parent");
SYSERROR("Failed to sync file descriptors with parent");
goto out_warn_father;
}
ret = lxc_send_devpts_to_parent(handler);
if (ret < 0) {
SYSERROR("Failed to send seccomp devpts fd to parent");
goto out_warn_father;
}
ret = lxc_send_ttys_to_parent(handler);
if (ret < 0) {
SYSERROR("Failed to send tty file descriptors to parent");
goto out_warn_father;
}
if (handler->ns_clone_flags & CLONE_NEWNET) {
ret = lxc_network_send_name_and_ifindex_to_parent(handler);
if (ret < 0) {
SYSERROR("Failed to send network device names and ifindices to parent");
goto out_warn_father;
}
}
if (!lxc_sync_wait_parent(handler, START_SYNC_READY_START))
goto out_warn_father;
......@@ -1464,44 +1436,6 @@ out_error:
return -1;
}
static int lxc_recv_ttys_from_child(struct lxc_handler *handler)
{
int i;
struct lxc_terminal_info *tty;
int ret = -1;
int sock = handler->data_sock[1];
struct lxc_conf *conf = handler->conf;
struct lxc_tty_info *ttys = &conf->ttys;
if (!conf->ttys.max)
return 0;
ttys->tty = malloc(sizeof(*ttys->tty) * ttys->max);
if (!ttys->tty)
return -1;
for (i = 0; i < conf->ttys.max; i++) {
int ttyx = -EBADF, ttyy = -EBADF;
ret = lxc_abstract_unix_recv_two_fds(sock, &ttyx, &ttyy);
if (ret < 0)
break;
tty = &ttys->tty[i];
tty->busy = -1;
tty->ptx = ttyx;
tty->pty = ttyy;
TRACE("Received pty with ptx fd %d and pty fd %d from child", tty->ptx, tty->pty);
}
if (ret < 0)
SYSERROR("Failed to receive %zu ttys from child", ttys->max);
else
TRACE("Received %zu ttys from child", ttys->max);
return ret;
}
int resolve_clone_flags(struct lxc_handler *handler)
{
int i;
......@@ -1809,12 +1743,6 @@ static int lxc_spawn(struct lxc_handler *handler)
}
}
if (!lxc_sync_wake_child(handler, START_SYNC_STARTUP))
goto out_delete_net;
if (!lxc_sync_wait_child(handler, START_SYNC_CONFIGURE))
goto out_delete_net;
if (!cgroup_ops->setup_limits_legacy(cgroup_ops, handler->conf, false)) {
ERROR("Failed to setup cgroup limits for container \"%s\"", name);
goto out_delete_net;
......@@ -1838,6 +1766,9 @@ static int lxc_spawn(struct lxc_handler *handler)
if (!cgroup_ops->chown(cgroup_ops, handler->conf))
goto out_delete_net;
if (!lxc_sync_barrier_child(handler, START_SYNC_STARTUP))
goto out_delete_net;
/* If not done yet, we're now ready to preserve the network namespace */
if (handler->nsfd[LXC_NS_NET] < 0) {
ret = lxc_try_preserve_namespace(handler, LXC_NS_NET, "net");
......@@ -1861,41 +1792,41 @@ static int lxc_spawn(struct lxc_handler *handler)
ERROR("Failed to create the network");
goto out_delete_net;
}
}
ret = lxc_network_send_to_child(handler);
if (!lxc_list_empty(&conf->procs)) {
ret = setup_proc_filesystem(&conf->procs, handler->pid);
if (ret < 0)
goto out_delete_net;
}
if (!lxc_list_empty(&conf->limits)) {
ret = setup_resource_limits(&conf->limits, handler->pid);
if (ret < 0) {
ERROR("Failed to send veth names to child");
ERROR("Failed to setup resource limits");
goto out_delete_net;
}
}
/* Tell the child to continue its initialization. */
if (!lxc_sync_wake_child(handler, START_SYNC_POST_CONFIGURE))
goto out_delete_net;
ret = lxc_rootfs_prepare_parent(handler);
if (ret) {
ERROR("Failed to prepare rootfs");
goto out_delete_net;
}
if (!lxc_list_empty(&conf->procs)) {
ret = setup_proc_filesystem(&conf->procs, handler->pid);
if (ret < 0)
goto out_delete_net;
}
/* Tell the child to continue its initialization. We'll get
* START_SYNC_CGROUP when it is ready for us to setup cgroups.
*/
if (!lxc_sync_barrier_child(handler, START_SYNC_POST_CONFIGURE))
goto out_delete_net;
if (!lxc_list_empty(&conf->limits)) {
ret = setup_resource_limits(&conf->limits, handler->pid);
if (handler->ns_clone_flags & CLONE_NEWNET) {
ret = lxc_network_send_to_child(handler);
if (ret < 0) {
ERROR("Failed to setup resource limits");
SYSERROR("Failed to send veth names to child");
goto out_delete_net;
}
}
if (!lxc_sync_barrier_child(handler, START_SYNC_CGROUP_UNSHARE))
if (!lxc_sync_wait_child(handler, START_SYNC_IDMAPPED_MOUNTS))
goto out_delete_net;
ret = lxc_idmapped_mounts_parent(handler);
......@@ -1924,6 +1855,19 @@ static int lxc_spawn(struct lxc_handler *handler)
}
TRACE("Set up cgroup2 device controller limits");
cgroup_ops->finalize(cgroup_ops);
TRACE("Finished setting up cgroups");
/* Run any host-side start hooks */
ret = run_lxc_hooks(name, "start-host", conf, NULL);
if (ret < 0) {
ERROR("Failed to run lxc.hook.start-host");
goto out_delete_net;
}
if (!lxc_sync_wake_child(handler, START_SYNC_FDS))
goto out_delete_net;
if (handler->ns_unshare_flags & CLONE_NEWCGROUP) {
/* Now we're ready to preserve the cgroup namespace */
ret = lxc_try_preserve_namespace(handler, LXC_NS_CGROUP, "cgroup");
......@@ -1935,9 +1879,6 @@ static int lxc_spawn(struct lxc_handler *handler)
}
}
cgroup_ops->finalize(cgroup_ops);
TRACE("Finished setting up cgroups");
if (handler->ns_unshare_flags & CLONE_NEWTIME) {
/* Now we're ready to preserve the time namespace */
ret = lxc_try_preserve_namespace(handler, LXC_NS_TIME, "time");
......@@ -1949,43 +1890,12 @@ static int lxc_spawn(struct lxc_handler *handler)
}
}
/* Run any host-side start hooks */
ret = run_lxc_hooks(name, "start-host", conf, NULL);
if (ret < 0) {
ERROR("Failed to run lxc.hook.start-host");
goto out_delete_net;
}
if (!lxc_sync_wake_child(handler, START_SYNC_FDS))
goto out_delete_net;
ret = lxc_seccomp_recv_notifier_fd(&handler->conf->seccomp, data_sock1);
if (ret < 0) {
SYSERROR("Failed to receive seccomp notify fd from child");
goto out_delete_net;
}
ret = lxc_setup_devpts_parent(handler);
ret = lxc_sync_fds_parent(handler);
if (ret < 0) {
SYSERROR("Failed to receive devpts fd from child");
SYSERROR("Failed to sync file descriptors with child");
goto out_delete_net;
}
/* Read tty fds allocated by child. */
ret = lxc_recv_ttys_from_child(handler);
if (ret < 0) {
ERROR("Failed to receive tty info from child process");
goto out_delete_net;
}
if (handler->ns_clone_flags & CLONE_NEWNET) {
ret = lxc_network_recv_name_and_ifindex_from_child(handler);
if (ret < 0) {
ERROR("Failed to receive names and ifindices for network devices from child");
goto out_delete_net;
}
}
/*
* Tell the child to complete its initialization and wait for it to
* exec or return an error. (The child will never return
......
......@@ -68,10 +68,6 @@ static inline const char *start_sync_to_string(int state)
return "configure";
case START_SYNC_POST_CONFIGURE:
return "post-configure";
case START_SYNC_CGROUP:
return "cgroup";
case START_SYNC_CGROUP_UNSHARE:
return "cgroup-unshare";
case START_SYNC_CGROUP_LIMITS:
return "cgroup-limits";
case START_SYNC_IDMAPPED_MOUNTS:
......
......@@ -17,14 +17,12 @@ enum /* start */ {
START_SYNC_STARTUP = 0,
START_SYNC_CONFIGURE = 1,
START_SYNC_POST_CONFIGURE = 2,
START_SYNC_CGROUP = 3,
START_SYNC_CGROUP_UNSHARE = 4,
START_SYNC_IDMAPPED_MOUNTS = 5,
START_SYNC_CGROUP_LIMITS = 6,
START_SYNC_FDS = 7,
START_SYNC_READY_START = 8,
START_SYNC_RESTART = 9,
START_SYNC_POST_RESTART = 10,
START_SYNC_IDMAPPED_MOUNTS = 3,
START_SYNC_CGROUP_LIMITS = 4,
START_SYNC_FDS = 5,
START_SYNC_READY_START = 6,
START_SYNC_RESTART = 7,
START_SYNC_POST_RESTART = 8,
};
enum /* attach */ {
......
......@@ -70,6 +70,11 @@ UNPRIV_LOG=$(mktemp --dry-run)
cleanup() {
cd /
if [ $DONE -eq 0 ]; then
cat "${UNPRIV_LOG}"
fi
rm -f "${UNPRIV_LOG}" || true
run_cmd lxc-stop -n c2 -k -l trace -o "${UNPRIV_LOG}" || true
run_cmd lxc-stop -n c1 -k -l trace -o "${UNPRIV_LOG}" || true
pkill -u $(id -u $TUSER) -9 || true
......@@ -82,12 +87,10 @@ cleanup() {
deluser $TUSER
if [ $DONE -eq 0 ]; then
cat "${UNPRIV_LOG}"
rm -f "${UNPRIV_LOG}" || true
echo "FAIL"
exit 1
fi
rm -f "${UNPRIV_LOG}" || true
echo "PASS"
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment