Unverified Commit b6f48637 by Stéphane Graber Committed by GitHub

Merge pull request #3689 from brauner/2021-02-21/fixes

cgroups: introduce fd-only cgroup attach via LXC_CMD_GET_CGROUP_CTX
parents 79399658 3a6678c7
......@@ -112,22 +112,18 @@ int lxc_abstract_unix_connect(const char *path)
return move_fd(fd);
}
int lxc_abstract_unix_send_fds_iov(int fd, int *sendfds, int num_sendfds,
int lxc_abstract_unix_send_fds_iov(int fd, const int *sendfds, int num_sendfds,
struct iovec *iov, size_t iovlen)
{
__do_free char *cmsgbuf = NULL;
int ret;
struct msghdr msg;
struct msghdr msg = {};
struct cmsghdr *cmsg = NULL;
size_t cmsgbufsize = CMSG_SPACE(num_sendfds * sizeof(int));
memset(&msg, 0, sizeof(msg));
cmsgbuf = malloc(cmsgbufsize);
if (!cmsgbuf) {
errno = ENOMEM;
return -1;
}
if (!cmsgbuf)
return ret_errno(-ENOMEM);
msg.msg_control = cmsgbuf;
msg.msg_controllen = cmsgbufsize;
......@@ -151,13 +147,13 @@ int lxc_abstract_unix_send_fds_iov(int fd, int *sendfds, int num_sendfds,
return ret;
}
int lxc_abstract_unix_send_fds(int fd, int *sendfds, int num_sendfds,
int lxc_abstract_unix_send_fds(int fd, const int *sendfds, int num_sendfds,
void *data, size_t size)
{
char buf[1] = {0};
char buf[1] = {};
struct iovec iov = {
.iov_base = data ? data : buf,
.iov_len = data ? size : sizeof(buf),
.iov_base = data ? data : buf,
.iov_len = data ? size : sizeof(buf),
};
return lxc_abstract_unix_send_fds_iov(fd, sendfds, num_sendfds, &iov, 1);
}
......@@ -168,60 +164,174 @@ int lxc_unix_send_fds(int fd, int *sendfds, int num_sendfds, void *data,
return lxc_abstract_unix_send_fds(fd, sendfds, num_sendfds, data, size);
}
static int lxc_abstract_unix_recv_fds_iov(int fd, int *recvfds, int num_recvfds,
struct iovec *iov, size_t iovlen)
static ssize_t lxc_abstract_unix_recv_fds_iov(int fd,
struct unix_fds *ret_fds,
struct iovec *ret_iov,
size_t size_ret_iov)
{
__do_free char *cmsgbuf = NULL;
int ret;
struct msghdr msg;
ssize_t ret;
struct msghdr msg = {};
struct cmsghdr *cmsg = NULL;
size_t cmsgbufsize = CMSG_SPACE(sizeof(struct ucred)) +
CMSG_SPACE(num_recvfds * sizeof(int));
CMSG_SPACE(ret_fds->fd_count_max * sizeof(int));
memset(&msg, 0, sizeof(msg));
cmsgbuf = malloc(cmsgbufsize);
cmsgbuf = zalloc(cmsgbufsize);
if (!cmsgbuf)
return ret_errno(ENOMEM);
msg.msg_control = cmsgbuf;
msg.msg_controllen = cmsgbufsize;
msg.msg_control = cmsgbuf;
msg.msg_controllen = cmsgbufsize;
msg.msg_iov = iov;
msg.msg_iovlen = iovlen;
msg.msg_iov = ret_iov;
msg.msg_iovlen = size_ret_iov;
do {
ret = recvmsg(fd, &msg, MSG_CMSG_CLOEXEC);
} while (ret < 0 && errno == EINTR);
if (ret < 0 || ret == 0)
return ret;
again:
ret = recvmsg(fd, &msg, MSG_CMSG_CLOEXEC);
if (ret < 0) {
if (errno == EINTR)
goto again;
/*
* If SO_PASSCRED is set we will always get a ucred message.
*/
for (struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
if (cmsg->cmsg_type != SCM_RIGHTS)
continue;
memset(recvfds, -1, num_recvfds * sizeof(int));
if (cmsg &&
cmsg->cmsg_len == CMSG_LEN(num_recvfds * sizeof(int)) &&
cmsg->cmsg_level == SOL_SOCKET)
memcpy(recvfds, CMSG_DATA(cmsg), num_recvfds * sizeof(int));
break;
return syserrno(-errno, "Failed to receive response");
}
if (ret == 0)
return 0;
/* If SO_PASSCRED is set we will always get a ucred message. */
for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
__u32 idx;
/*
* This causes some compilers to complaing about
* increased alignment requirements but I haven't found
* a better way to deal with this yet. Suggestions
* welcome!
*/
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wcast-align"
int *fds_raw = (int *)CMSG_DATA(cmsg);
#pragma GCC diagnostic pop
__u32 num_raw = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
/*
* We received an insane amount of file descriptors
* which exceeds the kernel limit we know about so
* close them and return an error.
*/
if (num_raw > KERNEL_SCM_MAX_FD) {
for (idx = 0; idx < num_raw; idx++)
close(fds_raw[idx]);
return syserrno_set(-EFBIG, "Received excessive number of file descriptors");
}
if (ret_fds->fd_count_max > num_raw) {
/*
* Make sure any excess entries in the fd array
* are set to -EBADF so our cleanup functions
* can safely be called.
*/
for (idx = num_raw; idx < ret_fds->fd_count_max; idx++)
ret_fds->fd[idx] = -EBADF;
WARN("Received fewer file descriptors than we expected %u != %u", ret_fds->fd_count_max, num_raw);
} else if (ret_fds->fd_count_max < num_raw) {
/* Make sure we close any excess fds we received. */
for (idx = ret_fds->fd_count_max; idx < num_raw; idx++)
close(fds_raw[idx]);
WARN("Received more file descriptors than we expected %u != %u", ret_fds->fd_count_max, num_raw);
/* Cap the number of received file descriptors. */
num_raw = ret_fds->fd_count_max;
}
memcpy(ret_fds->fd, CMSG_DATA(cmsg), num_raw * sizeof(int));
ret_fds->fd_count_ret = num_raw;
break;
}
}
return ret;
}
int lxc_abstract_unix_recv_fds(int fd, int *recvfds, int num_recvfds,
void *data, size_t size)
ssize_t lxc_abstract_unix_recv_fds(int fd, struct unix_fds *ret_fds,
void *ret_data, size_t size_ret_data)
{
char buf[1] = {0};
char buf[1] = {};
struct iovec iov = {
.iov_base = ret_data ? ret_data : buf,
.iov_len = ret_data ? size_ret_data : sizeof(buf),
};
ssize_t ret;
ret = lxc_abstract_unix_recv_fds_iov(fd, ret_fds, &iov, 1);
if (ret < 0)
return ret;
return ret;
}
ssize_t lxc_abstract_unix_recv_one_fd(int fd, int *ret_fd, void *ret_data,
size_t size_ret_data)
{
call_cleaner(put_unix_fds) struct unix_fds *fds = NULL;
char buf[1] = {};
struct iovec iov = {
.iov_base = data ? data : buf,
.iov_len = data ? size : sizeof(buf),
.iov_base = ret_data ? ret_data : buf,
.iov_len = ret_data ? size_ret_data : sizeof(buf),
};
ssize_t ret;
fds = &(struct unix_fds){
.fd_count_max = 1,
};
return lxc_abstract_unix_recv_fds_iov(fd, recvfds, num_recvfds, &iov, 1);
ret = lxc_abstract_unix_recv_fds_iov(fd, fds, &iov, 1);
if (ret < 0)
return ret;
if (ret == 0)
return ret_errno(ENODATA);
if (fds->fd_count_ret != fds->fd_count_max)
*ret_fd = -EBADF;
else
*ret_fd = move_fd(fds->fd[0]);
return ret;
}
ssize_t lxc_abstract_unix_recv_two_fds(int fd, int *ret_fd)
{
call_cleaner(put_unix_fds) struct unix_fds *fds = NULL;
char buf[1] = {};
struct iovec iov = {
.iov_base = buf,
.iov_len = sizeof(buf),
};
ssize_t ret;
fds = &(struct unix_fds){
.fd_count_max = 2,
};
ret = lxc_abstract_unix_recv_fds_iov(fd, fds, &iov, 1);
if (ret < 0)
return ret;
if (ret == 0)
return ret_errno(ENODATA);
if (fds->fd_count_ret != fds->fd_count_max) {
ret_fd[0] = -EBADF;
ret_fd[1] = -EBADF;
} else {
ret_fd[0] = move_fd(fds->fd[0]);
ret_fd[1] = move_fd(fds->fd[1]);
}
return 0;
}
int lxc_abstract_unix_send_credential(int fd, void *data, size_t size)
......
......@@ -5,9 +5,24 @@
#include <stdio.h>
#include <sys/socket.h>
#include <stddef.h>
#include <sys/un.h>
#include "compiler.h"
#include "macro.h"
#include "memory_utils.h"
/*
* Technically 253 is the kernel limit but we want to the struct to be a
* multiple of 8.
*/
#define KERNEL_SCM_MAX_FD 252
struct unix_fds {
__u32 fd_count_max;
__u32 fd_count_ret;
__s32 fd[KERNEL_SCM_MAX_FD];
} __attribute__((aligned(8)));
/* does not enforce \0-termination */
__hidden extern int lxc_abstract_unix_open(const char *path, int type, int flags);
......@@ -15,14 +30,29 @@ __hidden extern void lxc_abstract_unix_close(int fd);
/* does not enforce \0-termination */
__hidden extern int lxc_abstract_unix_connect(const char *path);
__hidden extern int lxc_abstract_unix_send_fds(int fd, int *sendfds, int num_sendfds, void *data,
size_t size) __access_r(2, 3) __access_r(4, 5);
__hidden extern int lxc_abstract_unix_send_fds(int fd, const int *sendfds,
int num_sendfds, void *data,
size_t size) __access_r(2, 3)
__access_r(4, 5);
__hidden extern int lxc_abstract_unix_send_fds_iov(int fd, const int *sendfds,
int num_sendfds,
struct iovec *iov,
size_t iovlen)
__access_r(2, 3);
__hidden extern ssize_t lxc_abstract_unix_recv_fds(int fd,
struct unix_fds *ret_fds,
void *ret_data,
size_t size_ret_data)
__access_r(3, 4);
__hidden extern int lxc_abstract_unix_send_fds_iov(int fd, int *sendfds, int num_sendfds,
struct iovec *iov, size_t iovlen) __access_r(2, 3);
__hidden extern ssize_t lxc_abstract_unix_recv_one_fd(int fd, int *ret_fd,
void *ret_data,
size_t size_ret_data)
__access_r(3, 4);
__hidden extern int lxc_abstract_unix_recv_fds(int fd, int *recvfds, int num_recvfds, void *data,
size_t size) __access_r(2, 3) __access_r(4, 5);
__hidden extern ssize_t lxc_abstract_unix_recv_two_fds(int fd, int *ret_fd);
__hidden extern int lxc_unix_send_fds(int fd, int *sendfds, int num_sendfds, void *data, size_t size);
......@@ -37,4 +67,13 @@ __hidden extern int lxc_unix_connect(struct sockaddr_un *addr);
__hidden extern int lxc_unix_connect_type(struct sockaddr_un *addr, int type);
__hidden extern int lxc_socket_set_timeout(int fd, int rcv_timeout, int snd_timeout);
static inline void put_unix_fds(struct unix_fds *fds)
{
if (!IS_ERR_OR_NULL(fds)) {
for (size_t idx = 0; idx < fds->fd_count_ret; idx++)
close_prot_errno_disarm(fds->fd[idx]);
}
}
define_cleanup_function(struct unix_fds *, put_unix_fds);
#endif /* __LXC_AF_UNIX_H */
......@@ -164,7 +164,7 @@ static inline bool sync_wake_fd(int fd, int fd_send)
static inline bool sync_wait_fd(int fd, int *fd_recv)
{
return lxc_abstract_unix_recv_fds(fd, fd_recv, 1, NULL, 0) > 0;
return lxc_abstract_unix_recv_one_fd(fd, fd_recv, NULL, 0) > 0;
}
static bool attach_lsm(lxc_attach_options_t *options)
......@@ -400,7 +400,6 @@ static int get_attach_context(struct attach_context *ctx,
ctx->init_pid = pidfd_get_pid(ctx->dfd_self_pid, ctx->init_pidfd);
else
ctx->init_pid = lxc_cmd_get_init_pid(container->name, container->config_path);
if (ctx->init_pid < 0)
return log_error(-1, "Failed to get init pid");
......@@ -488,16 +487,16 @@ static int same_nsfd(int dfd_pid1, int dfd_pid2, const char *ns_path)
ret = fstatat(dfd_pid1, ns_path, &ns_st1, 0);
if (ret)
return -1;
return -errno;
ret = fstatat(dfd_pid2, ns_path, &ns_st2, 0);
if (ret)
return -1;
return -errno;
/* processes are in the same namespace */
if ((ns_st1.st_dev == ns_st2.st_dev) &&
(ns_st1.st_ino == ns_st2.st_ino))
return -EINVAL;
return 1;
return 0;
}
......@@ -511,19 +510,23 @@ static int same_ns(int dfd_pid1, int dfd_pid2, const char *ns_path)
(PROTECT_LOOKUP_BENEATH_WITH_MAGICLINKS &
~(RESOLVE_NO_XDEV | RESOLVE_BENEATH)), 0);
if (ns_fd2 < 0) {
/* The kernel does not support this namespace. This is not an error. */
if (errno == ENOENT)
return -EINVAL;
return log_error_errno(-errno, errno, "Failed to open %d(%s)",
dfd_pid2, ns_path);
return -ENOENT;
return syserrno(-errno, "Failed to open %d(%s)", dfd_pid2, ns_path);
}
ret = same_nsfd(dfd_pid1, dfd_pid2, ns_path);
if (ret < 0)
return ret;
switch (ret) {
case -ENOENT:
__fallthrough;
case 1:
return ret_errno(ENOENT);
case 0:
/* processes are in different namespaces */
return move_fd(ns_fd2);
}
/* processes are in different namespaces */
return move_fd(ns_fd2);
return ret;
}
static int __prepare_namespaces_pidfd(struct attach_context *ctx)
......@@ -537,14 +540,19 @@ static int __prepare_namespaces_pidfd(struct attach_context *ctx)
ret = same_nsfd(ctx->dfd_self_pid,
ctx->dfd_init_pid,
ns_info[i].proc_path);
if (ret == -EINVAL)
switch (ret) {
case -ENOENT:
__fallthrough;
case 1:
ctx->ns_inherited &= ~ns_info[i].clone_flag;
else if (ret < 0)
return log_error_errno(-1, errno,
"Failed to determine whether %s namespace is shared",
ns_info[i].proc_name);
else
break;
case 0:
TRACE("Shared %s namespace needs attach", ns_info[i].proc_name);
break;
}
return syserrno(-errno, "Failed to determine whether %s namespace is shared",
ns_info[i].proc_name);
}
return 0;
......@@ -573,7 +581,7 @@ static int __prepare_namespaces_nsfd(struct attach_context *ctx,
if (ctx->ns_fd[i] >= 0)
continue;
if (ctx->ns_fd[i] == -EINVAL) {
if (ctx->ns_fd[i] == -ENOENT) {
ctx->ns_inherited &= ~ns_info[i].clone_flag;
continue;
}
......
......@@ -1305,6 +1305,9 @@ static int chown_cgroup_wrapper(void *data)
for (int i = 0; arg->hierarchies[i]; i++) {
int dirfd = arg->hierarchies[i]->dfd_con;
if (dirfd < 0)
return syserrno_set(-EBADF, "Invalid cgroup file descriptor");
(void)fchowmodat(dirfd, "", destuid, nsgid, 0775);
/*
......@@ -1361,7 +1364,7 @@ __cgfsng_ops static bool cgfsng_chown(struct cgroup_ops *ops,
return true;
}
__cgfsng_ops static void cgfsng_payload_finalize(struct cgroup_ops *ops)
__cgfsng_ops static void cgfsng_finalize(struct cgroup_ops *ops)
{
if (!ops)
return;
......@@ -1371,15 +1374,12 @@ __cgfsng_ops static void cgfsng_payload_finalize(struct cgroup_ops *ops)
for (int i = 0; ops->hierarchies[i]; i++) {
struct hierarchy *h = ops->hierarchies[i];
/*
* we don't keep the fds for non-unified hierarchies around
* mainly because we don't make use of them anymore after the
* core cgroup setup is done but also because there are quite a
* lot of them.
*/
if (!is_unified_hierarchy(h))
close_prot_errno_disarm(h->dfd_con);
/* Close all monitor cgroup file descriptors. */
close_prot_errno_disarm(h->dfd_mon);
}
/* Close the cgroup root file descriptor. */
close_prot_errno_disarm(ops->dfd_mnt);
/*
* The checking for freezer support should obviously be done at cgroup
......@@ -2183,8 +2183,8 @@ static int cgroup_attach_move_into_leaf(const struct lxc_conf *conf,
size_t pidstr_len;
ssize_t ret;
ret = lxc_abstract_unix_recv_fds(sk, target_fds, 2, NULL, 0);
if (ret <= 0)
ret = lxc_abstract_unix_recv_two_fds(sk, target_fds);
if (ret < 0)
return log_error_errno(-1, errno, "Failed to receive target cgroup fd");
target_fd0 = target_fds[0];
target_fd1 = target_fds[1];
......@@ -3322,7 +3322,7 @@ struct cgroup_ops *cgroup_ops_init(struct lxc_conf *conf)
cgfsng_ops->payload_delegate_controllers = cgfsng_payload_delegate_controllers;
cgfsng_ops->payload_create = cgfsng_payload_create;
cgfsng_ops->payload_enter = cgfsng_payload_enter;
cgfsng_ops->payload_finalize = cgfsng_payload_finalize;
cgfsng_ops->finalize = cgfsng_finalize;
cgfsng_ops->get_cgroup = cgfsng_get_cgroup;
cgfsng_ops->get = cgfsng_get;
cgfsng_ops->set = cgfsng_set;
......@@ -3345,23 +3345,14 @@ struct cgroup_ops *cgroup_ops_init(struct lxc_conf *conf)
return move_ptr(cgfsng_ops);
}
int cgroup_attach(const struct lxc_conf *conf, const char *name,
const char *lxcpath, pid_t pid)
static int __unified_attach_fd(const struct lxc_conf *conf, int fd_unified, pid_t pid)
{
__do_close int unified_fd = -EBADF;
int ret;
if (!conf || is_empty_string(name) || is_empty_string(lxcpath) || pid <= 0)
return ret_errno(EINVAL);
unified_fd = lxc_cmd_get_cgroup2_fd(name, lxcpath);
if (unified_fd < 0)
return ret_errno(ENOCGROUP2);
if (!lxc_list_empty(&conf->id_map)) {
struct userns_exec_unified_attach_data args = {
.conf = conf,
.unified_fd = unified_fd,
.unified_fd = fd_unified,
.pid = pid,
};
......@@ -3375,7 +3366,76 @@ int cgroup_attach(const struct lxc_conf *conf, const char *name,
cgroup_unified_attach_child_wrapper,
&args);
} else {
ret = cgroup_attach_leaf(conf, unified_fd, pid);
ret = cgroup_attach_leaf(conf, fd_unified, pid);
}
return ret;
}
static int __cgroup_attach_many(const struct lxc_conf *conf, const char *name,
const char *lxcpath, pid_t pid)
{
call_cleaner(put_cgroup_ctx) struct cgroup_ctx *ctx = &(struct cgroup_ctx){};
int ret;
char pidstr[INTTYPE_TO_STRLEN(pid_t)];
size_t idx;
ssize_t pidstr_len;
ret = lxc_cmd_get_cgroup_ctx(name, lxcpath, NULL, true,
sizeof(struct cgroup_ctx), ctx);
if (ret < 0)
return ret_errno(ENOSYS);
pidstr_len = strnprintf(pidstr, sizeof(pidstr), "%d", pid);
if (pidstr_len < 0)
return pidstr_len;
for (idx = 0; idx < ctx->fd_len; idx++) {
int dfd_con = ctx->fd[idx];
if (unified_cgroup_fd(dfd_con))
ret = __unified_attach_fd(conf, dfd_con, pid);
else
ret = lxc_writeat(dfd_con, "cgroup.procs", pidstr, pidstr_len);
if (ret)
return syserrno(ret, "Failed to attach to cgroup fd %d", dfd_con);
else
TRACE("Attached to cgroup fd %d", dfd_con);
}
if (idx == 0)
return syserrno_set(-ENOENT, "Failed to attach to cgroups");
TRACE("Attached to %s cgroup layout", cgroup_layout_name(ctx->cgroup_layout));
return 0;
}
static int __cgroup_attach_unified(const struct lxc_conf *conf, const char *name,
const char *lxcpath, pid_t pid)
{
__do_close int dfd_unified = -EBADF;
if (!conf || is_empty_string(name) || is_empty_string(lxcpath) || pid <= 0)
return ret_errno(EINVAL);
dfd_unified = lxc_cmd_get_cgroup2_fd(name, lxcpath);
if (dfd_unified < 0)
return ret_errno(ENOCGROUP2);
return __unified_attach_fd(conf, dfd_unified, pid);
}
int cgroup_attach(const struct lxc_conf *conf, const char *name,
const char *lxcpath, pid_t pid)
{
int ret;
ret = __cgroup_attach_many(conf, name, lxcpath, pid);
if (ret < 0) {
if (ret != ENOSYS)
return ret;
ret = __cgroup_attach_unified(conf, name, lxcpath, pid);
}
return ret;
......
......@@ -5,9 +5,11 @@
#include <stdbool.h>
#include <stddef.h>
#include <linux/types.h>
#include <sys/types.h>
#include <linux/magic.h>
#include "af_unix.h"
#include "compiler.h"
#include "macro.h"
#include "memory_utils.h"
......@@ -33,6 +35,22 @@ typedef enum {
CGROUP_LAYOUT_UNIFIED = 2,
} cgroup_layout_t;
static inline const char *cgroup_layout_name(cgroup_layout_t layout)
{
switch (layout) {
case CGROUP_LAYOUT_LEGACY:
return "legacy";
case CGROUP_LAYOUT_HYBRID:
return "hybrid";
case CGROUP_LAYOUT_UNIFIED:
return "unified";
case CGROUP_LAYOUT_UNKNOWN:
break;
}
return "unknown";
}
typedef enum {
LEGACY_HIERARCHY = CGROUP_SUPER_MAGIC,
UNIFIED_HIERARCHY = CGROUP2_SUPER_MAGIC,
......@@ -41,6 +59,17 @@ typedef enum {
#define DEVICES_CONTROLLER (1U << 0)
#define FREEZER_CONTROLLER (1U << 1)
/* That's plenty of hierarchies. */
#define CGROUP_CTX_MAX_FD 20
// BUILD_BUG_ON(CGROUP_CTX_MAX_FD > KERNEL_SCM_MAX_FD);
struct cgroup_ctx {
__s32 cgroup_layout;
__u32 utilities;
__u32 fd_len;
__s32 fd[CGROUP_CTX_MAX_FD];
} __attribute__((aligned(8)));
/* A descriptor for a mounted hierarchy
*
* @controllers
......@@ -218,7 +247,7 @@ struct cgroup_ops {
struct lxc_handler *handler);
bool (*monitor_delegate_controllers)(struct cgroup_ops *ops);
bool (*payload_delegate_controllers)(struct cgroup_ops *ops);
void (*payload_finalize)(struct cgroup_ops *ops);
void (*finalize)(struct cgroup_ops *ops);
const char *(*get_limiting_cgroup)(struct cgroup_ops *ops, const char *controller);
};
......@@ -257,4 +286,36 @@ static inline int cgroup_unified_fd(const struct cgroup_ops *ops)
__first, __VA_ARGS__); \
})
static void put_cgroup_ctx(struct cgroup_ctx *ctx)
{
if (!IS_ERR_OR_NULL(ctx)) {
for (__u32 idx = 0; idx < ctx->fd_len; idx++)
close_prot_errno_disarm(ctx->fd[idx]);
}
}
define_cleanup_function(struct cgroup_ctx *, put_cgroup_ctx);
static inline int prepare_cgroup_ctx(struct cgroup_ops *ops,
struct cgroup_ctx *ctx)
{
__u32 idx;
for (idx = 0; ops->hierarchies[idx]; idx++) {
if (idx >= CGROUP_CTX_MAX_FD)
return ret_errno(E2BIG);
ctx->fd[idx] = ops->hierarchies[idx]->dfd_con;
}
if (idx == 0)
return ret_errno(ENOENT);
ctx->fd_len = idx;
ctx->cgroup_layout = ops->cgroup_layout;
if (ops->unified && ops->unified->dfd_con > 0)
ctx->utilities = ops->unified->utilities;
return 0;
}
#endif /* __LXC_CGROUP_H */
......@@ -88,6 +88,7 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd)
[LXC_CMD_GET_LIMITING_CGROUP2_FD] = "get_limiting_cgroup2_fd",
[LXC_CMD_GET_DEVPTS_FD] = "get_devpts_fd",
[LXC_CMD_GET_SECCOMP_NOTIFY_FD] = "get_seccomp_notify_fd",
[LXC_CMD_GET_CGROUP_CTX] = "get_cgroup_ctx",
};
if (cmd >= LXC_CMD_MAX)
......@@ -96,6 +97,19 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd)
return cmdname[cmd];
}
static int __transfer_cgroup_ctx_fds(struct unix_fds *fds, struct cgroup_ctx *ctx)
{
/* This shouldn't be able to happen but better safe than sorry. */
if (ctx->fd_len != fds->fd_count_ret ||
fds->fd_count_ret > CGROUP_CTX_MAX_FD)
return syswarn_set(-EINVAL, "Unexpected number of file descriptors received %u != %u",
ctx->fd_len, fds->fd_count_ret);
memcpy(ctx->fd, fds->fd, ctx->fd_len * sizeof(__s32));
fds->fd_count_ret = 0;
return 0;
}
/*
* lxc_cmd_rsp_recv: Receive a response to a command
*
......@@ -115,16 +129,43 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd)
*/
static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
{
__do_close int fd_rsp = -EBADF;
int ret;
call_cleaner(put_unix_fds) struct unix_fds *fds = &(struct unix_fds){};
struct lxc_cmd_rsp *rsp = &cmd->rsp;
const char *reqstr = lxc_cmd_str(cmd->req.cmd);
int fret = 0;
int ret;
ret = lxc_abstract_unix_recv_fds(sock, &fd_rsp, 1, rsp, sizeof(*rsp));
switch (cmd->req.cmd) {
case LXC_CMD_GET_CGROUP2_FD:
__fallthrough;
case LXC_CMD_GET_LIMITING_CGROUP2_FD:
__fallthrough;
case LXC_CMD_GET_INIT_PIDFD:
__fallthrough;
case LXC_CMD_GET_SECCOMP_NOTIFY_FD:
__fallthrough;
case LXC_CMD_GET_DEVPTS_FD:
__fallthrough;
case LXC_CMD_CONSOLE:
fds->fd_count_max = 1;
break;
case LXC_CMD_GET_CGROUP_CTX:
fds->fd_count_max = CGROUP_CTX_MAX_FD;
break;
default:
fds->fd_count_max = 0;
break;
}
ret = lxc_abstract_unix_recv_fds(sock, fds, rsp, sizeof(*rsp));
if (ret < 0)
return log_warn_errno(-1,
errno, "Failed to receive response for command \"%s\"",
lxc_cmd_str(cmd->req.cmd));
TRACE("Command \"%s\" received response", lxc_cmd_str(cmd->req.cmd));
return syserrno(ret, "Failed to receive response for command \"%s\"", reqstr);
if (fds->fd_count_max == 0) {
TRACE("Command \"%s\" received response with %u file descriptors", reqstr, fds->fd_count_ret);
} else if (fds->fd_count_ret == 0) {
WARN("Command \"%s\" received response without expected file descriptors", reqstr);
fret = -EBADF;
}
if (cmd->req.cmd == LXC_CMD_CONSOLE) {
struct lxc_cmd_console_rsp_data *rspdata;
......@@ -137,66 +178,62 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
rspdata = malloc(sizeof(*rspdata));
if (!rspdata)
return log_warn_errno(-1,
ENOMEM, "Failed to receive response for command \"%s\"",
lxc_cmd_str(cmd->req.cmd));
return syserrno_set(-ENOMEM, "Failed to receive response for command \"%s\"", reqstr);
rspdata->ptxfd = move_fd(fd_rsp);
rspdata->ptxfd = move_fd(fds->fd[0]);
rspdata->ttynum = PTR_TO_INT(rsp->data);
rsp->data = rspdata;
}
if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD ||
cmd->req.cmd == LXC_CMD_GET_LIMITING_CGROUP2_FD)
{
int cgroup2_fd = move_fd(fd_rsp);
rsp->data = INT_TO_PTR(cgroup2_fd);
}
if (cmd->req.cmd == LXC_CMD_GET_INIT_PIDFD) {
int init_pidfd = move_fd(fd_rsp);
rsp->data = INT_TO_PTR(init_pidfd);
}
if (cmd->req.cmd == LXC_CMD_GET_DEVPTS_FD) {
int devpts_fd = move_fd(fd_rsp);
rsp->data = INT_TO_PTR(devpts_fd);
}
if (cmd->req.cmd == LXC_CMD_GET_SECCOMP_NOTIFY_FD) {
int seccomp_notify_fd = move_fd(fd_rsp);
rsp->data = INT_TO_PTR(seccomp_notify_fd);
switch (cmd->req.cmd) {
case LXC_CMD_GET_CGROUP2_FD:
__fallthrough;
case LXC_CMD_GET_LIMITING_CGROUP2_FD:
__fallthrough;
case LXC_CMD_GET_INIT_PIDFD:
__fallthrough;
case LXC_CMD_GET_DEVPTS_FD:
__fallthrough;
case LXC_CMD_GET_SECCOMP_NOTIFY_FD:
rsp->data = INT_TO_PTR(move_fd(fds->fd[0]));
return log_debug(fret ?: ret, "Finished processing \"%s\"", reqstr);
case LXC_CMD_GET_CGROUP_CTX:
if (rsp->datalen > sizeof(struct cgroup_ctx))
return syserrno_set(-EINVAL, "Invalid response size from server for \"%s\"", reqstr);
/* Don't pointlessly allocate. */
rsp->data = (void *)cmd->req.data;
break;
default:
break;
}
if (rsp->datalen == 0)
return log_debug(ret,
"Response data length for command \"%s\" is 0",
lxc_cmd_str(cmd->req.cmd));
return log_debug(fret ?: ret, "Response data length for command \"%s\" is 0", reqstr);
if ((rsp->datalen > LXC_CMD_DATA_MAX) &&
(cmd->req.cmd != LXC_CMD_CONSOLE_LOG))
return log_error(-1, "Response data for command \"%s\" is too long: %d bytes > %d",
lxc_cmd_str(cmd->req.cmd), rsp->datalen,
LXC_CMD_DATA_MAX);
return syserrno_set(-E2BIG, "Response data for command \"%s\" is too long: %d bytes > %d",
reqstr, rsp->datalen, LXC_CMD_DATA_MAX);
if (cmd->req.cmd == LXC_CMD_CONSOLE_LOG) {
rsp->data = malloc(rsp->datalen + 1);
((char *)rsp->data)[rsp->datalen] = '\0';
} else {
if (cmd->req.cmd == LXC_CMD_CONSOLE_LOG)
rsp->data = zalloc(rsp->datalen + 1);
else if (cmd->req.cmd != LXC_CMD_GET_CGROUP_CTX)
rsp->data = malloc(rsp->datalen);
}
if (!rsp->data)
return log_error_errno(-1,
ENOMEM, "Failed to allocate response buffer for command \"%s\"",
lxc_cmd_str(cmd->req.cmd));
return syserrno_set(-ENOMEM, "Failed to allocate response buffer for command \"%s\"", reqstr);
ret = lxc_recv_nointr(sock, rsp->data, rsp->datalen, 0);
if (ret != rsp->datalen)
return log_error_errno(-1,
errno, "Failed to receive response data for command \"%s\"",
lxc_cmd_str(cmd->req.cmd));
return syserrno(-errno, "Failed to receive response data for command \"%s\"", reqstr);
return ret;
if (cmd->req.cmd == LXC_CMD_GET_CGROUP_CTX) {
ret = __transfer_cgroup_ctx_fds(fds, rsp->data);
if (ret < 0)
return syserrno(ret, "Failed to transfer file descriptors for \"%s\"", reqstr);
}
return fret ?: ret;
}
/*
......@@ -207,26 +244,84 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
*
* Returns 0 on success, < 0 on failure
*/
static int lxc_cmd_rsp_send(int fd, struct lxc_cmd_rsp *rsp)
static int __lxc_cmd_rsp_send(int fd, struct lxc_cmd_rsp *rsp)
{
ssize_t ret;
errno = EMSGSIZE;
ret = lxc_send_nointr(fd, rsp, sizeof(*rsp), MSG_NOSIGNAL);
if (ret < 0 || (size_t)ret != sizeof(*rsp))
return log_error_errno(-1, errno, "Failed to send command response %zd", ret);
return syserrno(-errno, "Failed to send command response %zd", ret);
if (!rsp->data || rsp->datalen <= 0)
return 0;
errno = EMSGSIZE;
ret = lxc_send_nointr(fd, rsp->data, rsp->datalen, MSG_NOSIGNAL);
if (ret < 0 || ret != (ssize_t)rsp->datalen)
return log_warn_errno(-1, errno, "Failed to send command response data %zd", ret);
return syswarn(-errno, "Failed to send command response %zd", ret);
return 0;
}
static inline int lxc_cmd_rsp_send_reap(int fd, struct lxc_cmd_rsp *rsp)
{
int ret;
ret = __lxc_cmd_rsp_send(fd, rsp);
if (ret < 0)
return ret;
return LXC_CMD_REAP_CLIENT_FD;
}
static inline int lxc_cmd_rsp_send_keep(int fd, struct lxc_cmd_rsp *rsp)
{
int ret;
ret = __lxc_cmd_rsp_send(fd, rsp);
if (ret < 0)
return ret;
return 0;
}
static inline int rsp_one_fd(int fd, int fd_send, struct lxc_cmd_rsp *rsp)
{
int ret;
ret = lxc_abstract_unix_send_fds(fd, &fd_send, 1, rsp, sizeof(*rsp));
if (ret < 0)
return ret;
return LXC_CMD_REAP_CLIENT_FD;
}
static inline int rsp_many_fds(int fd, __u32 fds_len,
const __s32 fds[KERNEL_SCM_MAX_FD],
struct lxc_cmd_rsp *rsp)
{
ssize_t ret;
if (fds_len > KERNEL_SCM_MAX_FD) {
rsp->ret = -E2BIG;
return lxc_cmd_rsp_send_reap(fd, rsp);
} else if (fds_len == 0) {
rsp->ret = -ENOENT;
return lxc_cmd_rsp_send_reap(fd, rsp);
}
ret = lxc_abstract_unix_send_fds(fd, fds, fds_len, rsp, sizeof(*rsp));
if (ret < 0)
return ret;
if (rsp->data && rsp->datalen > 0) {
ret = lxc_send_nointr(fd, rsp->data, rsp->datalen, MSG_NOSIGNAL);
if (ret < 0 || ret != (ssize_t)rsp->datalen)
return syswarn(-errno, "Failed to send command response %zd", ret);
}
return LXC_CMD_REAP_CLIENT_FD;
}
static int lxc_cmd_send(const char *name, struct lxc_cmd_rr *cmd,
const char *lxcpath, const char *hashed_sock_name)
{
......@@ -349,7 +444,6 @@ int lxc_try_cmd(const char *name, const char *lxcpath)
*/
static int validate_string_request(int fd, const struct lxc_cmd_req *req)
{
int ret;
size_t maxlen = req->datalen - 1;
const char *data = req->data;
......@@ -357,16 +451,12 @@ static int validate_string_request(int fd, const struct lxc_cmd_req *req)
return 0;
struct lxc_cmd_rsp rsp = {
.ret = -EINVAL,
.datalen = 0,
.data = NULL,
.ret = -EINVAL,
.datalen = 0,
.data = NULL,
};
ret = lxc_cmd_rsp_send(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return -1;
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
/* Implementations of the commands and their callbacks */
......@@ -410,25 +500,25 @@ static int lxc_cmd_get_init_pid_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
{
int ret;
struct lxc_cmd_rsp rsp = {
.data = PID_TO_PTR(handler->pid)
.data = PID_TO_PTR(handler->pid),
};
ret = lxc_cmd_rsp_send(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return 0;
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
int lxc_cmd_get_init_pidfd(const char *name, const char *lxcpath)
{
int pidfd;
int ret, stopped;
struct lxc_cmd_rr cmd = {
.req = {
.cmd = LXC_CMD_GET_INIT_PIDFD,
},
.rsp = {
.data = INT_TO_PTR(-EBADF),
.ret = ENOSYS,
},
};
ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
......@@ -436,9 +526,13 @@ int lxc_cmd_get_init_pidfd(const char *name, const char *lxcpath)
return log_debug_errno(-1, errno, "Failed to process init pidfd command");
if (cmd.rsp.ret < 0)
return log_debug_errno(-EBADF, errno, "Failed to receive init pidfd");
return syserrno_set(cmd.rsp.ret, "Failed to receive init pidfd");
return PTR_TO_INT(cmd.rsp.data);
pidfd = PTR_TO_INT(cmd.rsp.data);
if (pidfd < 0)
return syserrno_set(pidfd, "Failed to receive init pidfd");
return pidfd;
}
static int lxc_cmd_get_init_pidfd_callback(int fd, struct lxc_cmd_req *req,
......@@ -446,17 +540,14 @@ static int lxc_cmd_get_init_pidfd_callback(int fd, struct lxc_cmd_req *req,
struct lxc_epoll_descr *descr)
{
struct lxc_cmd_rsp rsp = {
.ret = 0,
.ret = -EBADF,
};
int ret;
if (handler->pidfd < 0)
rsp.ret = -EBADF;
ret = lxc_abstract_unix_send_fds(fd, &handler->pidfd, 1, &rsp, sizeof(rsp));
if (ret < 0)
return log_error(LXC_CMD_REAP_CLIENT_FD, "Failed to send init pidfd");
return lxc_cmd_rsp_send_reap(fd, &rsp);
return 0;
rsp.ret = 0;
return rsp_one_fd(fd, handler->pidfd, &rsp);
}
int lxc_cmd_get_devpts_fd(const char *name, const char *lxcpath)
......@@ -483,20 +574,14 @@ static int lxc_cmd_get_devpts_fd_callback(int fd, struct lxc_cmd_req *req,
struct lxc_epoll_descr *descr)
{
struct lxc_cmd_rsp rsp = {
.ret = 0,
.ret = -EBADF,
};
int ret;
if (!handler->conf || handler->conf->devpts_fd < 0) {
rsp.ret = -EBADF;
ret = lxc_abstract_unix_send_fds(fd, NULL, 0, &rsp, sizeof(rsp));
} else {
ret = lxc_abstract_unix_send_fds(fd, &handler->conf->devpts_fd, 1, &rsp, sizeof(rsp));
}
if (ret < 0)
return log_error(LXC_CMD_REAP_CLIENT_FD, "Failed to send devpts fd");
if (!handler->conf || handler->conf->devpts_fd < 0)
return lxc_cmd_rsp_send_reap(fd, &rsp);
return 0;
rsp.ret = 0;
return rsp_one_fd(fd, handler->conf->devpts_fd, &rsp);
}
int lxc_cmd_get_seccomp_notify_fd(const char *name, const char *lxcpath)
......@@ -528,22 +613,75 @@ static int lxc_cmd_get_seccomp_notify_fd_callback(int fd, struct lxc_cmd_req *re
{
#ifdef HAVE_SECCOMP_NOTIFY
struct lxc_cmd_rsp rsp = {
.ret = 0,
.ret = -EBADF,
};
int ret;
if (!handler->conf || handler->conf->seccomp.notifier.notify_fd < 0)
rsp.ret = -EBADF;
ret = lxc_abstract_unix_send_fds(fd, &handler->conf->seccomp.notifier.notify_fd, 1, &rsp, sizeof(rsp));
if (ret < 0)
return log_error(LXC_CMD_REAP_CLIENT_FD, "Failed to send seccomp notify fd");
return lxc_cmd_rsp_send_reap(fd, &rsp);
return 0;
rsp.ret = 0;
return rsp_one_fd(fd, handler->conf->seccomp.notifier.notify_fd, &rsp);
#else
return ret_errno(EOPNOTSUPP);
return syserrno_set(-EOPNOTSUPP, "Seccomp notifier not supported");
#endif
}
int lxc_cmd_get_cgroup_ctx(const char *name, const char *lxcpath,
const char *controller, bool batch,
size_t size_ret_ctx, struct cgroup_ctx *ret_ctx)
{
struct lxc_cmd_rr cmd = {
.req = {
.cmd = LXC_CMD_GET_CGROUP_CTX,
.datalen = size_ret_ctx,
.data = ret_ctx,
},
.rsp = {
.ret = -ENOSYS,
},
};
int ret, stopped;
if (batch && !is_empty_string(controller))
return ret_errno(EINVAL);
ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
if (ret < 0)
return log_debug_errno(-1, errno, "Failed to process cgroup context command");
if (cmd.rsp.ret < 0)
return log_debug_errno(-EBADF, errno, "Failed to receive cgroup fds");
return 0;
}
static int lxc_cmd_get_cgroup_ctx_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
{
struct lxc_cmd_rsp rsp = {
.ret = EINVAL,
};
struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
struct cgroup_ctx ctx_server = {};
int ret;
ret = copy_struct_from_client(sizeof(struct cgroup_ctx), &ctx_server,
req->datalen, req->data);
if (ret < 0)
return lxc_cmd_rsp_send_reap(fd, &rsp);
ret = prepare_cgroup_ctx(cgroup_ops, &ctx_server);
if (ret < 0) {
rsp.ret = ret;
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
rsp.data = &ctx_server;
rsp.datalen = min(sizeof(struct cgroup_ctx), (size_t)req->datalen);
return rsp_many_fds(fd, ctx_server.fd_len, ctx_server.fd, &rsp);
}
/*
* lxc_cmd_get_clone_flags: Get clone flags container was spawned with
*
......@@ -572,16 +710,11 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
{
int ret;
struct lxc_cmd_rsp rsp = {
.data = INT_TO_PTR(handler->ns_clone_flags),
};
ret = lxc_cmd_rsp_send(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return 0;
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
static char *lxc_cmd_get_cgroup_path_do(const char *name, const char *lxcpath,
......@@ -701,11 +834,7 @@ static int lxc_cmd_get_cgroup_callback_do(int fd, struct lxc_cmd_req *req,
rsp.datalen = strlen(path) + 1;
rsp.data = (char *)path;
ret = lxc_cmd_rsp_send(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return 0;
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
......@@ -784,11 +913,7 @@ static int lxc_cmd_get_config_item_callback(int fd, struct lxc_cmd_req *req,
err1:
rsp.ret = -1;
out:
cilen = lxc_cmd_rsp_send(fd, &rsp);
if (cilen < 0)
return LXC_CMD_REAP_CLIENT_FD;
return 0;
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
/*
......@@ -827,16 +952,11 @@ static int lxc_cmd_get_state_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
{
int ret;
struct lxc_cmd_rsp rsp = {
.data = INT_TO_PTR(handler->state),
};
ret = lxc_cmd_rsp_send(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return 0;
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
/*
......@@ -909,11 +1029,7 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req,
rsp.ret = -errno;
}
ret = lxc_cmd_rsp_send(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return 0;
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
/*
......@@ -985,23 +1101,24 @@ static int lxc_cmd_console_callback(int fd, struct lxc_cmd_req *req,
struct lxc_epoll_descr *descr)
{
int ptxfd, ret;
struct lxc_cmd_rsp rsp;
struct lxc_cmd_rsp rsp = {
.ret = -EBADF,
};
int ttynum = PTR_TO_INT(req->data);
ptxfd = lxc_terminal_allocate(handler->conf, fd, &ttynum);
if (ptxfd < 0)
return LXC_CMD_REAP_CLIENT_FD;
return lxc_cmd_rsp_send_reap(fd, &rsp);
memset(&rsp, 0, sizeof(rsp));
rsp.ret = 0;
rsp.data = INT_TO_PTR(ttynum);
ret = lxc_abstract_unix_send_fds(fd, &ptxfd, 1, &rsp, sizeof(rsp));
if (ret < 0) {
lxc_terminal_free(handler->conf, fd);
return log_error_errno(LXC_CMD_REAP_CLIENT_FD, errno,
"Failed to send tty to client");
return ret;
}
return 0;
return log_debug(0, "Send tty to client");
}
/*
......@@ -1034,7 +1151,6 @@ static int lxc_cmd_get_name_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
{
int ret;
struct lxc_cmd_rsp rsp;
memset(&rsp, 0, sizeof(rsp));
......@@ -1043,11 +1159,7 @@ static int lxc_cmd_get_name_callback(int fd, struct lxc_cmd_req *req,
rsp.datalen = strlen(handler->name) + 1;
rsp.ret = 0;
ret = lxc_cmd_rsp_send(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return 0;
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
/*
......@@ -1080,18 +1192,13 @@ static int lxc_cmd_get_lxcpath_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
{
int ret;
struct lxc_cmd_rsp rsp = {
.ret = 0,
.data = (char *)handler->lxcpath,
.datalen = strlen(handler->lxcpath) + 1,
};
ret = lxc_cmd_rsp_send(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return 0;
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
int lxc_cmd_add_state_client(const char *name, const char *lxcpath,
......@@ -1140,29 +1247,29 @@ static int lxc_cmd_add_state_client_callback(__owns int fd, struct lxc_cmd_req *
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
{
int ret;
struct lxc_cmd_rsp rsp = {0};
struct lxc_cmd_rsp rsp = {
.ret = -EINVAL,
};
if (req->datalen < 0)
return LXC_CMD_REAP_CLIENT_FD;
goto reap_fd;
if (req->datalen != (sizeof(lxc_state_t) * MAX_STATE))
return LXC_CMD_REAP_CLIENT_FD;
goto reap_fd;
if (!req->data)
return LXC_CMD_REAP_CLIENT_FD;
goto reap_fd;
rsp.ret = lxc_add_state_client(fd, handler, (lxc_state_t *)req->data);
if (rsp.ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
goto reap_fd;
rsp.data = INT_TO_PTR(rsp.ret);
ret = lxc_cmd_rsp_send(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return lxc_cmd_rsp_send_keep(fd, &rsp);
return 0;
reap_fd:
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
int lxc_cmd_add_bpf_device_cgroup(const char *name, const char *lxcpath,
......@@ -1193,18 +1300,19 @@ static int lxc_cmd_add_bpf_device_cgroup_callback(int fd, struct lxc_cmd_req *re
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
{
int ret;
struct lxc_cmd_rsp rsp = {};
struct lxc_cmd_rsp rsp = {
.ret = -EINVAL,
};
struct lxc_conf *conf;
if (req->datalen <= 0)
return LXC_CMD_REAP_CLIENT_FD;
goto out;
if (req->datalen != sizeof(struct device_item))
return LXC_CMD_REAP_CLIENT_FD;
goto out;
if (!req->data)
return LXC_CMD_REAP_CLIENT_FD;
goto out;
conf = handler->conf;
if (!bpf_cgroup_devices_update(handler->cgroup_ops,
......@@ -1214,11 +1322,8 @@ static int lxc_cmd_add_bpf_device_cgroup_callback(int fd, struct lxc_cmd_req *re
else
rsp.ret = 0;
ret = lxc_cmd_rsp_send(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return 0;
out:
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
int lxc_cmd_console_log(const char *name, const char *lxcpath,
......@@ -1294,7 +1399,7 @@ static int lxc_cmd_console_log_callback(int fd, struct lxc_cmd_req *req,
lxc_ringbuf_move_read_addr(buf, rsp.datalen);
out:
return lxc_cmd_rsp_send(fd, &rsp);
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
int lxc_cmd_serve_state_clients(const char *name, const char *lxcpath,
......@@ -1326,13 +1431,9 @@ static int lxc_cmd_serve_state_clients_callback(int fd, struct lxc_cmd_req *req,
ret = lxc_serve_state_clients(handler->name, handler, state);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
ret = lxc_cmd_rsp_send(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return ret;
return 0;
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
int lxc_cmd_seccomp_notify_add_listener(const char *name, const char *lxcpath,
......@@ -1371,7 +1472,7 @@ static int lxc_cmd_seccomp_notify_add_listener_callback(int fd,
int ret;
__do_close int recv_fd = -EBADF;
ret = lxc_abstract_unix_recv_fds(fd, &recv_fd, 1, NULL, 0);
ret = lxc_abstract_unix_recv_one_fd(fd, &recv_fd, NULL, 0);
if (ret <= 0) {
rsp.ret = -errno;
goto out;
......@@ -1397,7 +1498,7 @@ out:
rsp.ret = -ENOSYS;
#endif
return lxc_cmd_rsp_send(fd, &rsp);
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
int lxc_cmd_freeze(const char *name, const char *lxcpath, int timeout)
......@@ -1430,7 +1531,7 @@ static int lxc_cmd_freeze_callback(int fd, struct lxc_cmd_req *req,
if (pure_unified_layout(ops))
rsp.ret = ops->freeze(ops, timeout);
return lxc_cmd_rsp_send(fd, &rsp);
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
int lxc_cmd_unfreeze(const char *name, const char *lxcpath, int timeout)
......@@ -1463,7 +1564,7 @@ static int lxc_cmd_unfreeze_callback(int fd, struct lxc_cmd_req *req,
if (pure_unified_layout(ops))
rsp.ret = ops->unfreeze(ops, timeout);
return lxc_cmd_rsp_send(fd, &rsp);
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath)
......@@ -1473,6 +1574,9 @@ int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath)
.req = {
.cmd = LXC_CMD_GET_CGROUP2_FD,
},
.rsp = {
ret = -ENOSYS,
},
};
ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
......@@ -1492,6 +1596,9 @@ int lxc_cmd_get_limiting_cgroup2_fd(const char *name, const char *lxcpath)
.req = {
.cmd = LXC_CMD_GET_LIMITING_CGROUP2_FD,
},
.rsp = {
.ret = -ENOSYS,
},
};
ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
......@@ -1499,7 +1606,7 @@ int lxc_cmd_get_limiting_cgroup2_fd(const char *name, const char *lxcpath)
return -1;
if (cmd.rsp.ret < 0)
return log_debug_errno(cmd.rsp.ret, -cmd.rsp.ret, "Failed to receive cgroup2 fd");
return syswarn_set(cmd.rsp.ret, "Failed to receive cgroup2 limit fd");
return PTR_TO_INT(cmd.rsp.data);
}
......@@ -1513,20 +1620,21 @@ static int lxc_cmd_get_cgroup2_fd_callback_do(int fd, struct lxc_cmd_req *req,
.ret = -EINVAL,
};
struct cgroup_ops *ops = handler->cgroup_ops;
int ret, send_fd;
int send_fd;
if (!pure_unified_layout(ops) || !ops->unified)
return lxc_cmd_rsp_send(fd, &rsp);
return lxc_cmd_rsp_send_reap(fd, &rsp);
send_fd = limiting_cgroup ? ops->unified->dfd_lim
: ops->unified->dfd_con;
rsp.ret = 0;
ret = lxc_abstract_unix_send_fds(fd, &send_fd, 1, &rsp, sizeof(rsp));
if (ret < 0)
return log_error(LXC_CMD_REAP_CLIENT_FD, "Failed to send cgroup2 fd");
if (send_fd < 0) {
rsp.ret = -EBADF;
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
return 0;
rsp.ret = 0;
return rsp_one_fd(fd, send_fd, &rsp);
}
static int lxc_cmd_get_cgroup2_fd_callback(int fd, struct lxc_cmd_req *req,
......@@ -1546,6 +1654,16 @@ static int lxc_cmd_get_limiting_cgroup2_fd_callback(int fd,
true);
}
static int lxc_cmd_rsp_send_enosys(int fd, int id)
{
struct lxc_cmd_rsp rsp = {
.ret = -ENOSYS,
};
__lxc_cmd_rsp_send(fd, &rsp);
return syserrno_set(-ENOSYS, "Invalid command id %d", id);
}
static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
......@@ -1577,10 +1695,11 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
[LXC_CMD_GET_LIMITING_CGROUP2_FD] = lxc_cmd_get_limiting_cgroup2_fd_callback,
[LXC_CMD_GET_DEVPTS_FD] = lxc_cmd_get_devpts_fd_callback,
[LXC_CMD_GET_SECCOMP_NOTIFY_FD] = lxc_cmd_get_seccomp_notify_fd_callback,
[LXC_CMD_GET_CGROUP_CTX] = lxc_cmd_get_cgroup_ctx_callback,
};
if (req->cmd >= LXC_CMD_MAX)
return log_trace_errno(-1, EINVAL, "Invalid command id %d", req->cmd);
return lxc_cmd_rsp_send_enosys(fd, req->cmd);
return cb[req->cmd](fd, req, handler, descr);
}
......@@ -1648,7 +1767,7 @@ static int lxc_cmd_handler(int fd, uint32_t events, void *data,
.ret = -EPERM,
};
lxc_cmd_rsp_send(fd, &rsp);
__lxc_cmd_rsp_send(fd, &rsp);
}
goto out_close;
......@@ -1679,9 +1798,14 @@ static int lxc_cmd_handler(int fd, uint32_t events, void *data,
}
ret = lxc_cmd_process(fd, &req, handler, descr);
if (ret) {
/* This is not an error, but only a request to close fd. */
if (ret < 0) {
DEBUG("Failed to process command %s; cleaning up client fd %d", lxc_cmd_str(req.cmd), fd);
goto out_close;
} else if (ret == LXC_CMD_REAP_CLIENT_FD) {
TRACE("Processed command %s; cleaning up client fd %d", lxc_cmd_str(req.cmd), fd);
goto out_close;
} else {
TRACE("Processed command %s; keeping client fd %d", lxc_cmd_str(req.cmd), fd);
}
out:
......
......@@ -8,6 +8,7 @@
#include <unistd.h>
#include "compiler.h"
#include "cgroups/cgroup.h"
#include "lxccontainer.h"
#include "macro.h"
#include "state.h"
......@@ -20,29 +21,30 @@
#define LXC_CMD_REAP_CLIENT_FD 1
typedef enum {
LXC_CMD_CONSOLE,
LXC_CMD_TERMINAL_WINCH,
LXC_CMD_STOP,
LXC_CMD_GET_STATE,
LXC_CMD_GET_INIT_PID,
LXC_CMD_GET_CLONE_FLAGS,
LXC_CMD_GET_CGROUP,
LXC_CMD_GET_CONFIG_ITEM,
LXC_CMD_GET_NAME,
LXC_CMD_GET_LXCPATH,
LXC_CMD_ADD_STATE_CLIENT,
LXC_CMD_CONSOLE_LOG,
LXC_CMD_SERVE_STATE_CLIENTS,
LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER,
LXC_CMD_ADD_BPF_DEVICE_CGROUP,
LXC_CMD_FREEZE,
LXC_CMD_UNFREEZE,
LXC_CMD_GET_CGROUP2_FD,
LXC_CMD_GET_INIT_PIDFD,
LXC_CMD_GET_LIMITING_CGROUP,
LXC_CMD_GET_LIMITING_CGROUP2_FD,
LXC_CMD_GET_DEVPTS_FD,
LXC_CMD_GET_SECCOMP_NOTIFY_FD,
LXC_CMD_CONSOLE = 0,
LXC_CMD_TERMINAL_WINCH = 1,
LXC_CMD_STOP = 2,
LXC_CMD_GET_STATE = 3,
LXC_CMD_GET_INIT_PID = 4,
LXC_CMD_GET_CLONE_FLAGS = 5,
LXC_CMD_GET_CGROUP = 6,
LXC_CMD_GET_CONFIG_ITEM = 7,
LXC_CMD_GET_NAME = 8,
LXC_CMD_GET_LXCPATH = 9,
LXC_CMD_ADD_STATE_CLIENT = 10,
LXC_CMD_CONSOLE_LOG = 11,
LXC_CMD_SERVE_STATE_CLIENTS = 12,
LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER = 13,
LXC_CMD_ADD_BPF_DEVICE_CGROUP = 14,
LXC_CMD_FREEZE = 15,
LXC_CMD_UNFREEZE = 16,
LXC_CMD_GET_CGROUP2_FD = 17,
LXC_CMD_GET_INIT_PIDFD = 18,
LXC_CMD_GET_LIMITING_CGROUP = 19,
LXC_CMD_GET_LIMITING_CGROUP2_FD = 20,
LXC_CMD_GET_DEVPTS_FD = 21,
LXC_CMD_GET_SECCOMP_NOTIFY_FD = 22,
LXC_CMD_GET_CGROUP_CTX = 23,
LXC_CMD_MAX,
} lxc_cmd_t;
......@@ -122,6 +124,11 @@ __hidden extern int lxc_try_cmd(const char *name, const char *lxcpath);
__hidden extern int lxc_cmd_console_log(const char *name, const char *lxcpath,
struct lxc_console_log *log);
__hidden extern int lxc_cmd_get_seccomp_notify_fd(const char *name, const char *lxcpath);
__hidden extern int lxc_cmd_get_cgroup_ctx(const char *name, const char *lxcpath,
const char *controller, bool batch,
size_t size_ret_ctx,
struct cgroup_ctx *ret_ctx)
__access_r(6, 5);
__hidden extern int lxc_cmd_seccomp_notify_add_listener(const char *name, const char *lxcpath, int fd,
/* unused */ unsigned int command,
/* unused */ unsigned int flags);
......
......@@ -1509,8 +1509,10 @@ int lxc_setup_devpts_parent(struct lxc_handler *handler)
if (handler->conf->pty_max <= 0)
return 0;
ret = lxc_abstract_unix_recv_fds(handler->data_sock[1], &handler->conf->devpts_fd, 1,
&handler->conf->devpts_fd, sizeof(handler->conf->devpts_fd));
ret = lxc_abstract_unix_recv_one_fd(handler->data_sock[1],
&handler->conf->devpts_fd,
&handler->conf->devpts_fd,
sizeof(handler->conf->devpts_fd));
if (ret < 0)
return log_error_errno(-1, errno, "Failed to receive devpts fd from child");
......
......@@ -530,6 +530,14 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \
__internal_ret__; \
})
#define syswarn_set(__ret__, format, ...) \
({ \
typeof(__ret__) __internal_ret__ = (__ret__); \
errno = abs(__ret__); \
SYSWARN(format, ##__VA_ARGS__); \
__internal_ret__; \
})
#define log_error(__ret__, format, ...) \
({ \
typeof(__ret__) __internal_ret__ = (__ret__); \
......
......@@ -737,4 +737,12 @@ static inline int PTR_RET(const void *ptr)
return 0;
}
#define min(x, y) \
({ \
typeof(x) _min1 = (x); \
typeof(y) _min2 = (y); \
(void)(&_min1 == &_min2); \
_min1 < _min2 ? _min1 : _min2; \
})
#endif /* __LXC_MACRO_H */
......@@ -101,7 +101,7 @@ static inline void *memdup(const void *data, size_t len)
({ \
if (a >= 0 && a != b) \
close(a); \
if (close >= 0) \
if (b >= 0) \
close(b); \
a = b = -EBADF; \
})
......
......@@ -1637,9 +1637,9 @@ int lxc_seccomp_recv_notifier_fd(struct lxc_seccomp *seccomp, int socket_fd)
if (seccomp->notifier.wants_supervision) {
int ret;
ret = lxc_abstract_unix_recv_fds(socket_fd,
&seccomp->notifier.notify_fd,
1, NULL, 0);
ret = lxc_abstract_unix_recv_one_fd(socket_fd,
&seccomp->notifier.notify_fd,
NULL, 0);
if (ret < 0)
return -1;
}
......
......@@ -1041,7 +1041,7 @@ static int do_start(void *data)
lxc_sync_fini_parent(handler);
if (lxc_abstract_unix_recv_fds(data_sock1, &status_fd, 1, NULL, 0) < 0) {
if (lxc_abstract_unix_recv_one_fd(data_sock1, &status_fd, NULL, 0) < 0) {
ERROR("Failed to receive status file descriptor to child process");
goto out_warn_father;
}
......@@ -1460,7 +1460,7 @@ static int lxc_recv_ttys_from_child(struct lxc_handler *handler)
for (i = 0; i < conf->ttys.max; i++) {
int ttyfds[2];
ret = lxc_abstract_unix_recv_fds(sock, ttyfds, 2, NULL, 0);
ret = lxc_abstract_unix_recv_two_fds(sock, ttyfds);
if (ret < 0)
break;
......@@ -1888,7 +1888,7 @@ static int lxc_spawn(struct lxc_handler *handler)
}
}
cgroup_ops->payload_finalize(cgroup_ops);
cgroup_ops->finalize(cgroup_ops);
TRACE("Finished setting up cgroups");
if (handler->ns_unshare_flags & CLONE_NEWTIME) {
......
......@@ -36,7 +36,7 @@ static const char *const strstate[] = {
const char *lxc_state2str(lxc_state_t state)
{
if (state < STOPPED || state > MAX_STATE - 1)
return NULL;
return "INVALID STATE";
return strstate[state];
}
......
......@@ -245,4 +245,30 @@ __hidden extern int safe_mount_beneath_at(int beneat_fd, const char *src, const
const char *fstype, unsigned int flags, const void *data);
__hidden __lxc_unused int print_r(int fd, const char *path);
static inline int copy_struct_from_client(__u32 server_size, void *dst,
__u32 client_size, const void *src)
{
__u32 size = min(server_size, client_size);
__u32 rest = min(server_size, client_size) - size;
/* Deal with trailing bytes. */
if (client_size < server_size) {
memset(dst + size, 0, rest);
} else if (client_size > server_size) {
/* TODO: Actually come up with a nice way to test for 0. */
return 0;
}
memcpy(dst, src, size);
return 0;
}
static inline __u32 copy_struct_to_client(__u32 client_size, void *dst,
__u32 server_size, const void *src)
{
__u32 size = min(server_size, client_size);
memcpy(dst, src, size);
return size;
}
#endif /* __LXC_UTILS_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment