cgroups: handle older kernels (e.g. v4.9)

On olders kernels the restrictions to move processes between cgroups are different than they are on newer kernels. Specifically, we're running into the following check: if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && !uid_eq(cred->euid, tcred->uid) && !uid_eq(cred->euid, tcred->suid)) ret = -EACCES; which dictates that in order to move a process into a cgroup one either needs to be global root (no restrictions apply) or the effective uid of the process trying to move the process and the {saved}uid of the process that is supposed to be moved need to be identical. The new attaching logic we did didn't fulfill this criterion for because it's not present on new kernels. Closes https://github.com/lxc/lxd/issues/7104. Signed-off-by: 's avatarChristian Brauner <christian.brauner@ubuntu.com>
parent 6821739c
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <sys/types.h> #include <sys/types.h>
#include <unistd.h> #include <unistd.h>
#include "af_unix.h"
#include "caps.h" #include "caps.h"
#include "cgroup.h" #include "cgroup.h"
#include "cgroup2_devices.h" #include "cgroup2_devices.h"
...@@ -2123,20 +2124,80 @@ static int cgroup_attach_leaf(const struct lxc_conf *conf, int unified_fd, pid_t ...@@ -2123,20 +2124,80 @@ static int cgroup_attach_leaf(const struct lxc_conf *conf, int unified_fd, pid_t
return log_error_errno(-1, errno, "Failed to attach to unified cgroup"); return log_error_errno(-1, errno, "Failed to attach to unified cgroup");
} }
static int cgroup_attach_create_leaf(const struct lxc_conf *conf,
int unified_fd, int *sk_fd)
{
__do_close int sk = *sk_fd, target_fd = -EBADF;
ssize_t ret;
/* Create leaf cgroup. */
ret = mkdirat(unified_fd, ".lxc", 0755);
if (ret < 0 && errno != EEXIST)
return log_error_errno(-1, errno, "Failed to create leaf cgroup \".lxc\"");
target_fd = openat(unified_fd, ".lxc/cgroup.procs", O_WRONLY | O_CLOEXEC | O_NOFOLLOW);
if (target_fd < 0)
return log_error_errno(-errno, errno, "Failed to open \".lxc/cgroup.procs\"");
ret = lxc_abstract_unix_send_fds(sk, &target_fd, 1, NULL, 0);
if (ret <= 0)
return log_error_errno(-errno, errno, "Failed to send \".lxc/cgroup.procs\" fd %d", target_fd);
return log_debug(0, "Sent target cgroup fd %d", target_fd);
}
static int cgroup_attach_move_into_leaf(const struct lxc_conf *conf,
int *sk_fd, pid_t pid)
{
__do_close int sk = *sk_fd, target_fd = -EBADF;
char pidstr[INTTYPE_TO_STRLEN(int64_t) + 1];
size_t pidstr_len;
ssize_t ret;
ret = lxc_abstract_unix_recv_fds(sk, &target_fd, 1, NULL, 0);
if (ret <= 0)
return log_error_errno(-1, errno, "Failed to receive target cgroup fd");
pidstr_len = sprintf(pidstr, INT64_FMT, (int64_t)pid);
ret = lxc_write_nointr(target_fd, pidstr, pidstr_len);
if (ret != pidstr_len && errno != EBUSY)
return log_error_errno(-1, errno, "Failed to move process into target cgroup");
return log_debug(0, "Moved process into target cgroup");
}
struct userns_exec_unified_attach_data { struct userns_exec_unified_attach_data {
const struct lxc_conf *conf; const struct lxc_conf *conf;
int unified_fd; int unified_fd;
int sk_pair[2];
pid_t pid; pid_t pid;
}; };
static int cgroup_unified_attach_wrapper(void *data) static int cgroup_unified_attach_child_wrapper(void *data)
{
struct userns_exec_unified_attach_data *args = data;
if (!args->conf || args->unified_fd < 0 || args->pid <= 0 ||
args->sk_pair[0] < 0 || args->sk_pair[1] < 0)
return ret_errno(EINVAL);
close_prot_errno_disarm(args->sk_pair[0]);
return cgroup_attach_create_leaf(args->conf, args->unified_fd,
&args->sk_pair[1]);
}
static int cgroup_unified_attach_parent_wrapper(void *data)
{ {
struct userns_exec_unified_attach_data *args = data; struct userns_exec_unified_attach_data *args = data;
if (!args->conf || args->unified_fd < 0 || args->pid <= 0) if (!args->conf || args->unified_fd < 0 || args->pid <= 0 ||
args->sk_pair[0] < 0 || args->sk_pair[1] < 0)
return ret_errno(EINVAL); return ret_errno(EINVAL);
return cgroup_attach_leaf(args->conf, args->unified_fd, args->pid); close_prot_errno_disarm(args->sk_pair[1]);
return cgroup_attach_move_into_leaf(args->conf, &args->sk_pair[0],
args->pid);
} }
int cgroup_attach(const struct lxc_conf *conf, const char *name, int cgroup_attach(const struct lxc_conf *conf, const char *name,
...@@ -2159,7 +2220,15 @@ int cgroup_attach(const struct lxc_conf *conf, const char *name, ...@@ -2159,7 +2220,15 @@ int cgroup_attach(const struct lxc_conf *conf, const char *name,
.pid = pid, .pid = pid,
}; };
ret = userns_exec_minimal(conf, cgroup_unified_attach_wrapper, &args); ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, args.sk_pair);
if (ret < 0)
return -errno;
ret = userns_exec_minimal(conf,
cgroup_unified_attach_parent_wrapper,
&args,
cgroup_unified_attach_child_wrapper,
&args);
} else { } else {
ret = cgroup_attach_leaf(conf, unified_fd, pid); ret = cgroup_attach_leaf(conf, unified_fd, pid);
} }
...@@ -2213,7 +2282,15 @@ static int __cg_unified_attach(const struct hierarchy *h, ...@@ -2213,7 +2282,15 @@ static int __cg_unified_attach(const struct hierarchy *h,
.pid = pid, .pid = pid,
}; };
ret = userns_exec_minimal(conf, cgroup_unified_attach_wrapper, &args); ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, args.sk_pair);
if (ret < 0)
return -errno;
ret = userns_exec_minimal(conf,
cgroup_unified_attach_parent_wrapper,
&args,
cgroup_unified_attach_child_wrapper,
&args);
} else { } else {
ret = cgroup_attach_leaf(conf, unified_fd, pid); ret = cgroup_attach_leaf(conf, unified_fd, pid);
} }
......
...@@ -4126,7 +4126,9 @@ on_error: ...@@ -4126,7 +4126,9 @@ on_error:
return ret; return ret;
} }
int userns_exec_minimal(const struct lxc_conf *conf, int (*fn)(void *), void *data) int userns_exec_minimal(const struct lxc_conf *conf,
int (*fn_parent)(void *), void *fn_parent_data,
int (*fn_child)(void *), void *fn_child_data)
{ {
call_cleaner(lxc_free_idmap) struct lxc_list *idmap = NULL; call_cleaner(lxc_free_idmap) struct lxc_list *idmap = NULL;
uid_t resuid = LXC_INVALID_UID; uid_t resuid = LXC_INVALID_UID;
...@@ -4136,7 +4138,7 @@ int userns_exec_minimal(const struct lxc_conf *conf, int (*fn)(void *), void *da ...@@ -4136,7 +4138,7 @@ int userns_exec_minimal(const struct lxc_conf *conf, int (*fn)(void *), void *da
pid_t pid; pid_t pid;
int sock_fds[2]; int sock_fds[2];
if (!conf || !fn || !data) if (!conf || !fn_child)
return ret_errno(EINVAL); return ret_errno(EINVAL);
idmap = get_minimal_idmap(conf, &resuid, &resgid); idmap = get_minimal_idmap(conf, &resuid, &resgid);
...@@ -4189,7 +4191,7 @@ int userns_exec_minimal(const struct lxc_conf *conf, int (*fn)(void *), void *da ...@@ -4189,7 +4191,7 @@ int userns_exec_minimal(const struct lxc_conf *conf, int (*fn)(void *), void *da
_exit(EXIT_FAILURE); _exit(EXIT_FAILURE);
} }
ret = fn(data); ret = fn_child(fn_child_data);
if (ret) { if (ret) {
SYSERROR("Running function in new user namespace failed"); SYSERROR("Running function in new user namespace failed");
_exit(EXIT_FAILURE); _exit(EXIT_FAILURE);
...@@ -4232,6 +4234,11 @@ int userns_exec_minimal(const struct lxc_conf *conf, int (*fn)(void *), void *da ...@@ -4232,6 +4234,11 @@ int userns_exec_minimal(const struct lxc_conf *conf, int (*fn)(void *), void *da
goto on_error; goto on_error;
} }
if (fn_parent && fn_parent(fn_parent_data)) {
SYSERROR("Running parent function failed");
_exit(EXIT_FAILURE);
}
on_error: on_error:
close_prot_errno_disarm(sock_fds[0]); close_prot_errno_disarm(sock_fds[0]);
close_prot_errno_disarm(sock_fds[1]); close_prot_errno_disarm(sock_fds[1]);
......
...@@ -467,6 +467,8 @@ extern int setup_proc_filesystem(struct lxc_list *procs, pid_t pid); ...@@ -467,6 +467,8 @@ extern int setup_proc_filesystem(struct lxc_list *procs, pid_t pid);
extern int lxc_clear_procs(struct lxc_conf *c, const char *key); extern int lxc_clear_procs(struct lxc_conf *c, const char *key);
extern int lxc_clear_apparmor_raw(struct lxc_conf *c); extern int lxc_clear_apparmor_raw(struct lxc_conf *c);
extern int lxc_clear_namespace(struct lxc_conf *c); extern int lxc_clear_namespace(struct lxc_conf *c);
extern int userns_exec_minimal(const struct lxc_conf *conf, int (*fn)(void *), void *data); extern int userns_exec_minimal(const struct lxc_conf *conf,
int (*fn_parent)(void *), void *fn_parent_data,
int (*fn_child)(void *), void *fn_child_data);
#endif /* __LXC_CONF_H */ #endif /* __LXC_CONF_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment