Unverified Commit b6f48637 by Stéphane Graber Committed by GitHub

Merge pull request #3689 from brauner/2021-02-21/fixes

cgroups: introduce fd-only cgroup attach via LXC_CMD_GET_CGROUP_CTX
parents 79399658 3a6678c7
...@@ -112,22 +112,18 @@ int lxc_abstract_unix_connect(const char *path) ...@@ -112,22 +112,18 @@ int lxc_abstract_unix_connect(const char *path)
return move_fd(fd); return move_fd(fd);
} }
int lxc_abstract_unix_send_fds_iov(int fd, int *sendfds, int num_sendfds, int lxc_abstract_unix_send_fds_iov(int fd, const int *sendfds, int num_sendfds,
struct iovec *iov, size_t iovlen) struct iovec *iov, size_t iovlen)
{ {
__do_free char *cmsgbuf = NULL; __do_free char *cmsgbuf = NULL;
int ret; int ret;
struct msghdr msg; struct msghdr msg = {};
struct cmsghdr *cmsg = NULL; struct cmsghdr *cmsg = NULL;
size_t cmsgbufsize = CMSG_SPACE(num_sendfds * sizeof(int)); size_t cmsgbufsize = CMSG_SPACE(num_sendfds * sizeof(int));
memset(&msg, 0, sizeof(msg));
cmsgbuf = malloc(cmsgbufsize); cmsgbuf = malloc(cmsgbufsize);
if (!cmsgbuf) { if (!cmsgbuf)
errno = ENOMEM; return ret_errno(-ENOMEM);
return -1;
}
msg.msg_control = cmsgbuf; msg.msg_control = cmsgbuf;
msg.msg_controllen = cmsgbufsize; msg.msg_controllen = cmsgbufsize;
...@@ -151,10 +147,10 @@ int lxc_abstract_unix_send_fds_iov(int fd, int *sendfds, int num_sendfds, ...@@ -151,10 +147,10 @@ int lxc_abstract_unix_send_fds_iov(int fd, int *sendfds, int num_sendfds,
return ret; return ret;
} }
int lxc_abstract_unix_send_fds(int fd, int *sendfds, int num_sendfds, int lxc_abstract_unix_send_fds(int fd, const int *sendfds, int num_sendfds,
void *data, size_t size) void *data, size_t size)
{ {
char buf[1] = {0}; char buf[1] = {};
struct iovec iov = { struct iovec iov = {
.iov_base = data ? data : buf, .iov_base = data ? data : buf,
.iov_len = data ? size : sizeof(buf), .iov_len = data ? size : sizeof(buf),
...@@ -168,60 +164,174 @@ int lxc_unix_send_fds(int fd, int *sendfds, int num_sendfds, void *data, ...@@ -168,60 +164,174 @@ int lxc_unix_send_fds(int fd, int *sendfds, int num_sendfds, void *data,
return lxc_abstract_unix_send_fds(fd, sendfds, num_sendfds, data, size); return lxc_abstract_unix_send_fds(fd, sendfds, num_sendfds, data, size);
} }
static int lxc_abstract_unix_recv_fds_iov(int fd, int *recvfds, int num_recvfds, static ssize_t lxc_abstract_unix_recv_fds_iov(int fd,
struct iovec *iov, size_t iovlen) struct unix_fds *ret_fds,
struct iovec *ret_iov,
size_t size_ret_iov)
{ {
__do_free char *cmsgbuf = NULL; __do_free char *cmsgbuf = NULL;
int ret; ssize_t ret;
struct msghdr msg; struct msghdr msg = {};
struct cmsghdr *cmsg = NULL;
size_t cmsgbufsize = CMSG_SPACE(sizeof(struct ucred)) + size_t cmsgbufsize = CMSG_SPACE(sizeof(struct ucred)) +
CMSG_SPACE(num_recvfds * sizeof(int)); CMSG_SPACE(ret_fds->fd_count_max * sizeof(int));
memset(&msg, 0, sizeof(msg));
cmsgbuf = malloc(cmsgbufsize); cmsgbuf = zalloc(cmsgbufsize);
if (!cmsgbuf) if (!cmsgbuf)
return ret_errno(ENOMEM); return ret_errno(ENOMEM);
msg.msg_control = cmsgbuf; msg.msg_control = cmsgbuf;
msg.msg_controllen = cmsgbufsize; msg.msg_controllen = cmsgbufsize;
msg.msg_iov = iov; msg.msg_iov = ret_iov;
msg.msg_iovlen = iovlen; msg.msg_iovlen = size_ret_iov;
do { again:
ret = recvmsg(fd, &msg, MSG_CMSG_CLOEXEC); ret = recvmsg(fd, &msg, MSG_CMSG_CLOEXEC);
} while (ret < 0 && errno == EINTR); if (ret < 0) {
if (ret < 0 || ret == 0) if (errno == EINTR)
return ret; goto again;
return syserrno(-errno, "Failed to receive response");
}
if (ret == 0)
return 0;
/* If SO_PASSCRED is set we will always get a ucred message. */
for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
__u32 idx;
/* /*
* If SO_PASSCRED is set we will always get a ucred message. * This causes some compilers to complaing about
* increased alignment requirements but I haven't found
* a better way to deal with this yet. Suggestions
* welcome!
*/ */
for (struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { #pragma GCC diagnostic push
if (cmsg->cmsg_type != SCM_RIGHTS) #pragma GCC diagnostic ignored "-Wcast-align"
continue; int *fds_raw = (int *)CMSG_DATA(cmsg);
#pragma GCC diagnostic pop
memset(recvfds, -1, num_recvfds * sizeof(int)); __u32 num_raw = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
if (cmsg &&
cmsg->cmsg_len == CMSG_LEN(num_recvfds * sizeof(int)) && /*
cmsg->cmsg_level == SOL_SOCKET) * We received an insane amount of file descriptors
memcpy(recvfds, CMSG_DATA(cmsg), num_recvfds * sizeof(int)); * which exceeds the kernel limit we know about so
* close them and return an error.
*/
if (num_raw > KERNEL_SCM_MAX_FD) {
for (idx = 0; idx < num_raw; idx++)
close(fds_raw[idx]);
return syserrno_set(-EFBIG, "Received excessive number of file descriptors");
}
if (ret_fds->fd_count_max > num_raw) {
/*
* Make sure any excess entries in the fd array
* are set to -EBADF so our cleanup functions
* can safely be called.
*/
for (idx = num_raw; idx < ret_fds->fd_count_max; idx++)
ret_fds->fd[idx] = -EBADF;
WARN("Received fewer file descriptors than we expected %u != %u", ret_fds->fd_count_max, num_raw);
} else if (ret_fds->fd_count_max < num_raw) {
/* Make sure we close any excess fds we received. */
for (idx = ret_fds->fd_count_max; idx < num_raw; idx++)
close(fds_raw[idx]);
WARN("Received more file descriptors than we expected %u != %u", ret_fds->fd_count_max, num_raw);
/* Cap the number of received file descriptors. */
num_raw = ret_fds->fd_count_max;
}
memcpy(ret_fds->fd, CMSG_DATA(cmsg), num_raw * sizeof(int));
ret_fds->fd_count_ret = num_raw;
break; break;
} }
}
return ret; return ret;
} }
int lxc_abstract_unix_recv_fds(int fd, int *recvfds, int num_recvfds, ssize_t lxc_abstract_unix_recv_fds(int fd, struct unix_fds *ret_fds,
void *data, size_t size) void *ret_data, size_t size_ret_data)
{ {
char buf[1] = {0}; char buf[1] = {};
struct iovec iov = { struct iovec iov = {
.iov_base = data ? data : buf, .iov_base = ret_data ? ret_data : buf,
.iov_len = data ? size : sizeof(buf), .iov_len = ret_data ? size_ret_data : sizeof(buf),
}; };
return lxc_abstract_unix_recv_fds_iov(fd, recvfds, num_recvfds, &iov, 1); ssize_t ret;
ret = lxc_abstract_unix_recv_fds_iov(fd, ret_fds, &iov, 1);
if (ret < 0)
return ret;
return ret;
}
ssize_t lxc_abstract_unix_recv_one_fd(int fd, int *ret_fd, void *ret_data,
size_t size_ret_data)
{
call_cleaner(put_unix_fds) struct unix_fds *fds = NULL;
char buf[1] = {};
struct iovec iov = {
.iov_base = ret_data ? ret_data : buf,
.iov_len = ret_data ? size_ret_data : sizeof(buf),
};
ssize_t ret;
fds = &(struct unix_fds){
.fd_count_max = 1,
};
ret = lxc_abstract_unix_recv_fds_iov(fd, fds, &iov, 1);
if (ret < 0)
return ret;
if (ret == 0)
return ret_errno(ENODATA);
if (fds->fd_count_ret != fds->fd_count_max)
*ret_fd = -EBADF;
else
*ret_fd = move_fd(fds->fd[0]);
return ret;
}
ssize_t lxc_abstract_unix_recv_two_fds(int fd, int *ret_fd)
{
call_cleaner(put_unix_fds) struct unix_fds *fds = NULL;
char buf[1] = {};
struct iovec iov = {
.iov_base = buf,
.iov_len = sizeof(buf),
};
ssize_t ret;
fds = &(struct unix_fds){
.fd_count_max = 2,
};
ret = lxc_abstract_unix_recv_fds_iov(fd, fds, &iov, 1);
if (ret < 0)
return ret;
if (ret == 0)
return ret_errno(ENODATA);
if (fds->fd_count_ret != fds->fd_count_max) {
ret_fd[0] = -EBADF;
ret_fd[1] = -EBADF;
} else {
ret_fd[0] = move_fd(fds->fd[0]);
ret_fd[1] = move_fd(fds->fd[1]);
}
return 0;
} }
int lxc_abstract_unix_send_credential(int fd, void *data, size_t size) int lxc_abstract_unix_send_credential(int fd, void *data, size_t size)
......
...@@ -5,9 +5,24 @@ ...@@ -5,9 +5,24 @@
#include <stdio.h> #include <stdio.h>
#include <sys/socket.h> #include <sys/socket.h>
#include <stddef.h>
#include <sys/un.h> #include <sys/un.h>
#include "compiler.h" #include "compiler.h"
#include "macro.h"
#include "memory_utils.h"
/*
* Technically 253 is the kernel limit but we want to the struct to be a
* multiple of 8.
*/
#define KERNEL_SCM_MAX_FD 252
struct unix_fds {
__u32 fd_count_max;
__u32 fd_count_ret;
__s32 fd[KERNEL_SCM_MAX_FD];
} __attribute__((aligned(8)));
/* does not enforce \0-termination */ /* does not enforce \0-termination */
__hidden extern int lxc_abstract_unix_open(const char *path, int type, int flags); __hidden extern int lxc_abstract_unix_open(const char *path, int type, int flags);
...@@ -15,14 +30,29 @@ __hidden extern void lxc_abstract_unix_close(int fd); ...@@ -15,14 +30,29 @@ __hidden extern void lxc_abstract_unix_close(int fd);
/* does not enforce \0-termination */ /* does not enforce \0-termination */
__hidden extern int lxc_abstract_unix_connect(const char *path); __hidden extern int lxc_abstract_unix_connect(const char *path);
__hidden extern int lxc_abstract_unix_send_fds(int fd, int *sendfds, int num_sendfds, void *data, __hidden extern int lxc_abstract_unix_send_fds(int fd, const int *sendfds,
size_t size) __access_r(2, 3) __access_r(4, 5); int num_sendfds, void *data,
size_t size) __access_r(2, 3)
__access_r(4, 5);
__hidden extern int lxc_abstract_unix_send_fds_iov(int fd, const int *sendfds,
int num_sendfds,
struct iovec *iov,
size_t iovlen)
__access_r(2, 3);
__hidden extern ssize_t lxc_abstract_unix_recv_fds(int fd,
struct unix_fds *ret_fds,
void *ret_data,
size_t size_ret_data)
__access_r(3, 4);
__hidden extern int lxc_abstract_unix_send_fds_iov(int fd, int *sendfds, int num_sendfds, __hidden extern ssize_t lxc_abstract_unix_recv_one_fd(int fd, int *ret_fd,
struct iovec *iov, size_t iovlen) __access_r(2, 3); void *ret_data,
size_t size_ret_data)
__access_r(3, 4);
__hidden extern int lxc_abstract_unix_recv_fds(int fd, int *recvfds, int num_recvfds, void *data, __hidden extern ssize_t lxc_abstract_unix_recv_two_fds(int fd, int *ret_fd);
size_t size) __access_r(2, 3) __access_r(4, 5);
__hidden extern int lxc_unix_send_fds(int fd, int *sendfds, int num_sendfds, void *data, size_t size); __hidden extern int lxc_unix_send_fds(int fd, int *sendfds, int num_sendfds, void *data, size_t size);
...@@ -37,4 +67,13 @@ __hidden extern int lxc_unix_connect(struct sockaddr_un *addr); ...@@ -37,4 +67,13 @@ __hidden extern int lxc_unix_connect(struct sockaddr_un *addr);
__hidden extern int lxc_unix_connect_type(struct sockaddr_un *addr, int type); __hidden extern int lxc_unix_connect_type(struct sockaddr_un *addr, int type);
__hidden extern int lxc_socket_set_timeout(int fd, int rcv_timeout, int snd_timeout); __hidden extern int lxc_socket_set_timeout(int fd, int rcv_timeout, int snd_timeout);
static inline void put_unix_fds(struct unix_fds *fds)
{
if (!IS_ERR_OR_NULL(fds)) {
for (size_t idx = 0; idx < fds->fd_count_ret; idx++)
close_prot_errno_disarm(fds->fd[idx]);
}
}
define_cleanup_function(struct unix_fds *, put_unix_fds);
#endif /* __LXC_AF_UNIX_H */ #endif /* __LXC_AF_UNIX_H */
...@@ -164,7 +164,7 @@ static inline bool sync_wake_fd(int fd, int fd_send) ...@@ -164,7 +164,7 @@ static inline bool sync_wake_fd(int fd, int fd_send)
static inline bool sync_wait_fd(int fd, int *fd_recv) static inline bool sync_wait_fd(int fd, int *fd_recv)
{ {
return lxc_abstract_unix_recv_fds(fd, fd_recv, 1, NULL, 0) > 0; return lxc_abstract_unix_recv_one_fd(fd, fd_recv, NULL, 0) > 0;
} }
static bool attach_lsm(lxc_attach_options_t *options) static bool attach_lsm(lxc_attach_options_t *options)
...@@ -400,7 +400,6 @@ static int get_attach_context(struct attach_context *ctx, ...@@ -400,7 +400,6 @@ static int get_attach_context(struct attach_context *ctx,
ctx->init_pid = pidfd_get_pid(ctx->dfd_self_pid, ctx->init_pidfd); ctx->init_pid = pidfd_get_pid(ctx->dfd_self_pid, ctx->init_pidfd);
else else
ctx->init_pid = lxc_cmd_get_init_pid(container->name, container->config_path); ctx->init_pid = lxc_cmd_get_init_pid(container->name, container->config_path);
if (ctx->init_pid < 0) if (ctx->init_pid < 0)
return log_error(-1, "Failed to get init pid"); return log_error(-1, "Failed to get init pid");
...@@ -488,16 +487,16 @@ static int same_nsfd(int dfd_pid1, int dfd_pid2, const char *ns_path) ...@@ -488,16 +487,16 @@ static int same_nsfd(int dfd_pid1, int dfd_pid2, const char *ns_path)
ret = fstatat(dfd_pid1, ns_path, &ns_st1, 0); ret = fstatat(dfd_pid1, ns_path, &ns_st1, 0);
if (ret) if (ret)
return -1; return -errno;
ret = fstatat(dfd_pid2, ns_path, &ns_st2, 0); ret = fstatat(dfd_pid2, ns_path, &ns_st2, 0);
if (ret) if (ret)
return -1; return -errno;
/* processes are in the same namespace */ /* processes are in the same namespace */
if ((ns_st1.st_dev == ns_st2.st_dev) && if ((ns_st1.st_dev == ns_st2.st_dev) &&
(ns_st1.st_ino == ns_st2.st_ino)) (ns_st1.st_ino == ns_st2.st_ino))
return -EINVAL; return 1;
return 0; return 0;
} }
...@@ -511,19 +510,23 @@ static int same_ns(int dfd_pid1, int dfd_pid2, const char *ns_path) ...@@ -511,19 +510,23 @@ static int same_ns(int dfd_pid1, int dfd_pid2, const char *ns_path)
(PROTECT_LOOKUP_BENEATH_WITH_MAGICLINKS & (PROTECT_LOOKUP_BENEATH_WITH_MAGICLINKS &
~(RESOLVE_NO_XDEV | RESOLVE_BENEATH)), 0); ~(RESOLVE_NO_XDEV | RESOLVE_BENEATH)), 0);
if (ns_fd2 < 0) { if (ns_fd2 < 0) {
/* The kernel does not support this namespace. This is not an error. */
if (errno == ENOENT) if (errno == ENOENT)
return -EINVAL; return -ENOENT;
return log_error_errno(-errno, errno, "Failed to open %d(%s)", return syserrno(-errno, "Failed to open %d(%s)", dfd_pid2, ns_path);
dfd_pid2, ns_path);
} }
ret = same_nsfd(dfd_pid1, dfd_pid2, ns_path); ret = same_nsfd(dfd_pid1, dfd_pid2, ns_path);
if (ret < 0) switch (ret) {
return ret; case -ENOENT:
__fallthrough;
case 1:
return ret_errno(ENOENT);
case 0:
/* processes are in different namespaces */ /* processes are in different namespaces */
return move_fd(ns_fd2); return move_fd(ns_fd2);
}
return ret;
} }
static int __prepare_namespaces_pidfd(struct attach_context *ctx) static int __prepare_namespaces_pidfd(struct attach_context *ctx)
...@@ -537,14 +540,19 @@ static int __prepare_namespaces_pidfd(struct attach_context *ctx) ...@@ -537,14 +540,19 @@ static int __prepare_namespaces_pidfd(struct attach_context *ctx)
ret = same_nsfd(ctx->dfd_self_pid, ret = same_nsfd(ctx->dfd_self_pid,
ctx->dfd_init_pid, ctx->dfd_init_pid,
ns_info[i].proc_path); ns_info[i].proc_path);
if (ret == -EINVAL) switch (ret) {
case -ENOENT:
__fallthrough;
case 1:
ctx->ns_inherited &= ~ns_info[i].clone_flag; ctx->ns_inherited &= ~ns_info[i].clone_flag;
else if (ret < 0) break;
return log_error_errno(-1, errno, case 0:
"Failed to determine whether %s namespace is shared",
ns_info[i].proc_name);
else
TRACE("Shared %s namespace needs attach", ns_info[i].proc_name); TRACE("Shared %s namespace needs attach", ns_info[i].proc_name);
break;
}
return syserrno(-errno, "Failed to determine whether %s namespace is shared",
ns_info[i].proc_name);
} }
return 0; return 0;
...@@ -573,7 +581,7 @@ static int __prepare_namespaces_nsfd(struct attach_context *ctx, ...@@ -573,7 +581,7 @@ static int __prepare_namespaces_nsfd(struct attach_context *ctx,
if (ctx->ns_fd[i] >= 0) if (ctx->ns_fd[i] >= 0)
continue; continue;
if (ctx->ns_fd[i] == -EINVAL) { if (ctx->ns_fd[i] == -ENOENT) {
ctx->ns_inherited &= ~ns_info[i].clone_flag; ctx->ns_inherited &= ~ns_info[i].clone_flag;
continue; continue;
} }
......
...@@ -1305,6 +1305,9 @@ static int chown_cgroup_wrapper(void *data) ...@@ -1305,6 +1305,9 @@ static int chown_cgroup_wrapper(void *data)
for (int i = 0; arg->hierarchies[i]; i++) { for (int i = 0; arg->hierarchies[i]; i++) {
int dirfd = arg->hierarchies[i]->dfd_con; int dirfd = arg->hierarchies[i]->dfd_con;
if (dirfd < 0)
return syserrno_set(-EBADF, "Invalid cgroup file descriptor");
(void)fchowmodat(dirfd, "", destuid, nsgid, 0775); (void)fchowmodat(dirfd, "", destuid, nsgid, 0775);
/* /*
...@@ -1361,7 +1364,7 @@ __cgfsng_ops static bool cgfsng_chown(struct cgroup_ops *ops, ...@@ -1361,7 +1364,7 @@ __cgfsng_ops static bool cgfsng_chown(struct cgroup_ops *ops,
return true; return true;
} }
__cgfsng_ops static void cgfsng_payload_finalize(struct cgroup_ops *ops) __cgfsng_ops static void cgfsng_finalize(struct cgroup_ops *ops)
{ {
if (!ops) if (!ops)
return; return;
...@@ -1371,15 +1374,12 @@ __cgfsng_ops static void cgfsng_payload_finalize(struct cgroup_ops *ops) ...@@ -1371,15 +1374,12 @@ __cgfsng_ops static void cgfsng_payload_finalize(struct cgroup_ops *ops)
for (int i = 0; ops->hierarchies[i]; i++) { for (int i = 0; ops->hierarchies[i]; i++) {
struct hierarchy *h = ops->hierarchies[i]; struct hierarchy *h = ops->hierarchies[i];
/*
* we don't keep the fds for non-unified hierarchies around /* Close all monitor cgroup file descriptors. */
* mainly because we don't make use of them anymore after the close_prot_errno_disarm(h->dfd_mon);
* core cgroup setup is done but also because there are quite a
* lot of them.
*/
if (!is_unified_hierarchy(h))
close_prot_errno_disarm(h->dfd_con);
} }
/* Close the cgroup root file descriptor. */
close_prot_errno_disarm(ops->dfd_mnt);
/* /*
* The checking for freezer support should obviously be done at cgroup * The checking for freezer support should obviously be done at cgroup
...@@ -2183,8 +2183,8 @@ static int cgroup_attach_move_into_leaf(const struct lxc_conf *conf, ...@@ -2183,8 +2183,8 @@ static int cgroup_attach_move_into_leaf(const struct lxc_conf *conf,
size_t pidstr_len; size_t pidstr_len;
ssize_t ret; ssize_t ret;
ret = lxc_abstract_unix_recv_fds(sk, target_fds, 2, NULL, 0); ret = lxc_abstract_unix_recv_two_fds(sk, target_fds);
if (ret <= 0) if (ret < 0)
return log_error_errno(-1, errno, "Failed to receive target cgroup fd"); return log_error_errno(-1, errno, "Failed to receive target cgroup fd");
target_fd0 = target_fds[0]; target_fd0 = target_fds[0];
target_fd1 = target_fds[1]; target_fd1 = target_fds[1];
...@@ -3322,7 +3322,7 @@ struct cgroup_ops *cgroup_ops_init(struct lxc_conf *conf) ...@@ -3322,7 +3322,7 @@ struct cgroup_ops *cgroup_ops_init(struct lxc_conf *conf)
cgfsng_ops->payload_delegate_controllers = cgfsng_payload_delegate_controllers; cgfsng_ops->payload_delegate_controllers = cgfsng_payload_delegate_controllers;
cgfsng_ops->payload_create = cgfsng_payload_create; cgfsng_ops->payload_create = cgfsng_payload_create;
cgfsng_ops->payload_enter = cgfsng_payload_enter; cgfsng_ops->payload_enter = cgfsng_payload_enter;
cgfsng_ops->payload_finalize = cgfsng_payload_finalize; cgfsng_ops->finalize = cgfsng_finalize;
cgfsng_ops->get_cgroup = cgfsng_get_cgroup; cgfsng_ops->get_cgroup = cgfsng_get_cgroup;
cgfsng_ops->get = cgfsng_get; cgfsng_ops->get = cgfsng_get;
cgfsng_ops->set = cgfsng_set; cgfsng_ops->set = cgfsng_set;
...@@ -3345,23 +3345,14 @@ struct cgroup_ops *cgroup_ops_init(struct lxc_conf *conf) ...@@ -3345,23 +3345,14 @@ struct cgroup_ops *cgroup_ops_init(struct lxc_conf *conf)
return move_ptr(cgfsng_ops); return move_ptr(cgfsng_ops);
} }
int cgroup_attach(const struct lxc_conf *conf, const char *name, static int __unified_attach_fd(const struct lxc_conf *conf, int fd_unified, pid_t pid)
const char *lxcpath, pid_t pid)
{ {
__do_close int unified_fd = -EBADF;
int ret; int ret;
if (!conf || is_empty_string(name) || is_empty_string(lxcpath) || pid <= 0)
return ret_errno(EINVAL);
unified_fd = lxc_cmd_get_cgroup2_fd(name, lxcpath);
if (unified_fd < 0)
return ret_errno(ENOCGROUP2);
if (!lxc_list_empty(&conf->id_map)) { if (!lxc_list_empty(&conf->id_map)) {
struct userns_exec_unified_attach_data args = { struct userns_exec_unified_attach_data args = {
.conf = conf, .conf = conf,
.unified_fd = unified_fd, .unified_fd = fd_unified,
.pid = pid, .pid = pid,
}; };
...@@ -3375,7 +3366,76 @@ int cgroup_attach(const struct lxc_conf *conf, const char *name, ...@@ -3375,7 +3366,76 @@ int cgroup_attach(const struct lxc_conf *conf, const char *name,
cgroup_unified_attach_child_wrapper, cgroup_unified_attach_child_wrapper,
&args); &args);
} else { } else {
ret = cgroup_attach_leaf(conf, unified_fd, pid); ret = cgroup_attach_leaf(conf, fd_unified, pid);
}
return ret;
}
static int __cgroup_attach_many(const struct lxc_conf *conf, const char *name,
const char *lxcpath, pid_t pid)
{
call_cleaner(put_cgroup_ctx) struct cgroup_ctx *ctx = &(struct cgroup_ctx){};
int ret;
char pidstr[INTTYPE_TO_STRLEN(pid_t)];
size_t idx;
ssize_t pidstr_len;
ret = lxc_cmd_get_cgroup_ctx(name, lxcpath, NULL, true,
sizeof(struct cgroup_ctx), ctx);
if (ret < 0)
return ret_errno(ENOSYS);
pidstr_len = strnprintf(pidstr, sizeof(pidstr), "%d", pid);
if (pidstr_len < 0)
return pidstr_len;
for (idx = 0; idx < ctx->fd_len; idx++) {
int dfd_con = ctx->fd[idx];
if (unified_cgroup_fd(dfd_con))
ret = __unified_attach_fd(conf, dfd_con, pid);
else
ret = lxc_writeat(dfd_con, "cgroup.procs", pidstr, pidstr_len);
if (ret)
return syserrno(ret, "Failed to attach to cgroup fd %d", dfd_con);
else
TRACE("Attached to cgroup fd %d", dfd_con);
}
if (idx == 0)
return syserrno_set(-ENOENT, "Failed to attach to cgroups");
TRACE("Attached to %s cgroup layout", cgroup_layout_name(ctx->cgroup_layout));
return 0;
}
static int __cgroup_attach_unified(const struct lxc_conf *conf, const char *name,
const char *lxcpath, pid_t pid)
{
__do_close int dfd_unified = -EBADF;
if (!conf || is_empty_string(name) || is_empty_string(lxcpath) || pid <= 0)
return ret_errno(EINVAL);
dfd_unified = lxc_cmd_get_cgroup2_fd(name, lxcpath);
if (dfd_unified < 0)
return ret_errno(ENOCGROUP2);
return __unified_attach_fd(conf, dfd_unified, pid);
}
int cgroup_attach(const struct lxc_conf *conf, const char *name,
const char *lxcpath, pid_t pid)
{
int ret;
ret = __cgroup_attach_many(conf, name, lxcpath, pid);
if (ret < 0) {
if (ret != ENOSYS)
return ret;
ret = __cgroup_attach_unified(conf, name, lxcpath, pid);
} }
return ret; return ret;
......
...@@ -5,9 +5,11 @@ ...@@ -5,9 +5,11 @@
#include <stdbool.h> #include <stdbool.h>
#include <stddef.h> #include <stddef.h>
#include <linux/types.h>
#include <sys/types.h> #include <sys/types.h>
#include <linux/magic.h> #include <linux/magic.h>
#include "af_unix.h"
#include "compiler.h" #include "compiler.h"
#include "macro.h" #include "macro.h"
#include "memory_utils.h" #include "memory_utils.h"
...@@ -33,6 +35,22 @@ typedef enum { ...@@ -33,6 +35,22 @@ typedef enum {
CGROUP_LAYOUT_UNIFIED = 2, CGROUP_LAYOUT_UNIFIED = 2,
} cgroup_layout_t; } cgroup_layout_t;
static inline const char *cgroup_layout_name(cgroup_layout_t layout)
{
switch (layout) {
case CGROUP_LAYOUT_LEGACY:
return "legacy";
case CGROUP_LAYOUT_HYBRID:
return "hybrid";
case CGROUP_LAYOUT_UNIFIED:
return "unified";
case CGROUP_LAYOUT_UNKNOWN:
break;
}
return "unknown";
}
typedef enum { typedef enum {
LEGACY_HIERARCHY = CGROUP_SUPER_MAGIC, LEGACY_HIERARCHY = CGROUP_SUPER_MAGIC,
UNIFIED_HIERARCHY = CGROUP2_SUPER_MAGIC, UNIFIED_HIERARCHY = CGROUP2_SUPER_MAGIC,
...@@ -41,6 +59,17 @@ typedef enum { ...@@ -41,6 +59,17 @@ typedef enum {
#define DEVICES_CONTROLLER (1U << 0) #define DEVICES_CONTROLLER (1U << 0)
#define FREEZER_CONTROLLER (1U << 1) #define FREEZER_CONTROLLER (1U << 1)
/* That's plenty of hierarchies. */
#define CGROUP_CTX_MAX_FD 20
// BUILD_BUG_ON(CGROUP_CTX_MAX_FD > KERNEL_SCM_MAX_FD);
struct cgroup_ctx {
__s32 cgroup_layout;
__u32 utilities;
__u32 fd_len;
__s32 fd[CGROUP_CTX_MAX_FD];
} __attribute__((aligned(8)));
/* A descriptor for a mounted hierarchy /* A descriptor for a mounted hierarchy
* *
* @controllers * @controllers
...@@ -218,7 +247,7 @@ struct cgroup_ops { ...@@ -218,7 +247,7 @@ struct cgroup_ops {
struct lxc_handler *handler); struct lxc_handler *handler);
bool (*monitor_delegate_controllers)(struct cgroup_ops *ops); bool (*monitor_delegate_controllers)(struct cgroup_ops *ops);
bool (*payload_delegate_controllers)(struct cgroup_ops *ops); bool (*payload_delegate_controllers)(struct cgroup_ops *ops);
void (*payload_finalize)(struct cgroup_ops *ops); void (*finalize)(struct cgroup_ops *ops);
const char *(*get_limiting_cgroup)(struct cgroup_ops *ops, const char *controller); const char *(*get_limiting_cgroup)(struct cgroup_ops *ops, const char *controller);
}; };
...@@ -257,4 +286,36 @@ static inline int cgroup_unified_fd(const struct cgroup_ops *ops) ...@@ -257,4 +286,36 @@ static inline int cgroup_unified_fd(const struct cgroup_ops *ops)
__first, __VA_ARGS__); \ __first, __VA_ARGS__); \
}) })
static void put_cgroup_ctx(struct cgroup_ctx *ctx)
{
if (!IS_ERR_OR_NULL(ctx)) {
for (__u32 idx = 0; idx < ctx->fd_len; idx++)
close_prot_errno_disarm(ctx->fd[idx]);
}
}
define_cleanup_function(struct cgroup_ctx *, put_cgroup_ctx);
static inline int prepare_cgroup_ctx(struct cgroup_ops *ops,
struct cgroup_ctx *ctx)
{
__u32 idx;
for (idx = 0; ops->hierarchies[idx]; idx++) {
if (idx >= CGROUP_CTX_MAX_FD)
return ret_errno(E2BIG);
ctx->fd[idx] = ops->hierarchies[idx]->dfd_con;
}
if (idx == 0)
return ret_errno(ENOENT);
ctx->fd_len = idx;
ctx->cgroup_layout = ops->cgroup_layout;
if (ops->unified && ops->unified->dfd_con > 0)
ctx->utilities = ops->unified->utilities;
return 0;
}
#endif /* __LXC_CGROUP_H */ #endif /* __LXC_CGROUP_H */
...@@ -88,6 +88,7 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd) ...@@ -88,6 +88,7 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd)
[LXC_CMD_GET_LIMITING_CGROUP2_FD] = "get_limiting_cgroup2_fd", [LXC_CMD_GET_LIMITING_CGROUP2_FD] = "get_limiting_cgroup2_fd",
[LXC_CMD_GET_DEVPTS_FD] = "get_devpts_fd", [LXC_CMD_GET_DEVPTS_FD] = "get_devpts_fd",
[LXC_CMD_GET_SECCOMP_NOTIFY_FD] = "get_seccomp_notify_fd", [LXC_CMD_GET_SECCOMP_NOTIFY_FD] = "get_seccomp_notify_fd",
[LXC_CMD_GET_CGROUP_CTX] = "get_cgroup_ctx",
}; };
if (cmd >= LXC_CMD_MAX) if (cmd >= LXC_CMD_MAX)
...@@ -96,6 +97,19 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd) ...@@ -96,6 +97,19 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd)
return cmdname[cmd]; return cmdname[cmd];
} }
static int __transfer_cgroup_ctx_fds(struct unix_fds *fds, struct cgroup_ctx *ctx)
{
/* This shouldn't be able to happen but better safe than sorry. */
if (ctx->fd_len != fds->fd_count_ret ||
fds->fd_count_ret > CGROUP_CTX_MAX_FD)
return syswarn_set(-EINVAL, "Unexpected number of file descriptors received %u != %u",
ctx->fd_len, fds->fd_count_ret);
memcpy(ctx->fd, fds->fd, ctx->fd_len * sizeof(__s32));
fds->fd_count_ret = 0;
return 0;
}
/* /*
* lxc_cmd_rsp_recv: Receive a response to a command * lxc_cmd_rsp_recv: Receive a response to a command
* *
...@@ -115,16 +129,43 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd) ...@@ -115,16 +129,43 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd)
*/ */
static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd) static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
{ {
__do_close int fd_rsp = -EBADF; call_cleaner(put_unix_fds) struct unix_fds *fds = &(struct unix_fds){};
int ret;
struct lxc_cmd_rsp *rsp = &cmd->rsp; struct lxc_cmd_rsp *rsp = &cmd->rsp;
const char *reqstr = lxc_cmd_str(cmd->req.cmd);
int fret = 0;
int ret;
ret = lxc_abstract_unix_recv_fds(sock, &fd_rsp, 1, rsp, sizeof(*rsp)); switch (cmd->req.cmd) {
case LXC_CMD_GET_CGROUP2_FD:
__fallthrough;
case LXC_CMD_GET_LIMITING_CGROUP2_FD:
__fallthrough;
case LXC_CMD_GET_INIT_PIDFD:
__fallthrough;
case LXC_CMD_GET_SECCOMP_NOTIFY_FD:
__fallthrough;
case LXC_CMD_GET_DEVPTS_FD:
__fallthrough;
case LXC_CMD_CONSOLE:
fds->fd_count_max = 1;
break;
case LXC_CMD_GET_CGROUP_CTX:
fds->fd_count_max = CGROUP_CTX_MAX_FD;
break;
default:
fds->fd_count_max = 0;
break;
}
ret = lxc_abstract_unix_recv_fds(sock, fds, rsp, sizeof(*rsp));
if (ret < 0) if (ret < 0)
return log_warn_errno(-1, return syserrno(ret, "Failed to receive response for command \"%s\"", reqstr);
errno, "Failed to receive response for command \"%s\"",
lxc_cmd_str(cmd->req.cmd)); if (fds->fd_count_max == 0) {
TRACE("Command \"%s\" received response", lxc_cmd_str(cmd->req.cmd)); TRACE("Command \"%s\" received response with %u file descriptors", reqstr, fds->fd_count_ret);
} else if (fds->fd_count_ret == 0) {
WARN("Command \"%s\" received response without expected file descriptors", reqstr);
fret = -EBADF;
}
if (cmd->req.cmd == LXC_CMD_CONSOLE) { if (cmd->req.cmd == LXC_CMD_CONSOLE) {
struct lxc_cmd_console_rsp_data *rspdata; struct lxc_cmd_console_rsp_data *rspdata;
...@@ -137,66 +178,62 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd) ...@@ -137,66 +178,62 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
rspdata = malloc(sizeof(*rspdata)); rspdata = malloc(sizeof(*rspdata));
if (!rspdata) if (!rspdata)
return log_warn_errno(-1, return syserrno_set(-ENOMEM, "Failed to receive response for command \"%s\"", reqstr);
ENOMEM, "Failed to receive response for command \"%s\"",
lxc_cmd_str(cmd->req.cmd));
rspdata->ptxfd = move_fd(fd_rsp); rspdata->ptxfd = move_fd(fds->fd[0]);
rspdata->ttynum = PTR_TO_INT(rsp->data); rspdata->ttynum = PTR_TO_INT(rsp->data);
rsp->data = rspdata; rsp->data = rspdata;
} }
if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD || switch (cmd->req.cmd) {
cmd->req.cmd == LXC_CMD_GET_LIMITING_CGROUP2_FD) case LXC_CMD_GET_CGROUP2_FD:
{ __fallthrough;
int cgroup2_fd = move_fd(fd_rsp); case LXC_CMD_GET_LIMITING_CGROUP2_FD:
rsp->data = INT_TO_PTR(cgroup2_fd); __fallthrough;
} case LXC_CMD_GET_INIT_PIDFD:
__fallthrough;
if (cmd->req.cmd == LXC_CMD_GET_INIT_PIDFD) { case LXC_CMD_GET_DEVPTS_FD:
int init_pidfd = move_fd(fd_rsp); __fallthrough;
rsp->data = INT_TO_PTR(init_pidfd); case LXC_CMD_GET_SECCOMP_NOTIFY_FD:
} rsp->data = INT_TO_PTR(move_fd(fds->fd[0]));
return log_debug(fret ?: ret, "Finished processing \"%s\"", reqstr);
if (cmd->req.cmd == LXC_CMD_GET_DEVPTS_FD) { case LXC_CMD_GET_CGROUP_CTX:
int devpts_fd = move_fd(fd_rsp); if (rsp->datalen > sizeof(struct cgroup_ctx))
rsp->data = INT_TO_PTR(devpts_fd); return syserrno_set(-EINVAL, "Invalid response size from server for \"%s\"", reqstr);
}
/* Don't pointlessly allocate. */
if (cmd->req.cmd == LXC_CMD_GET_SECCOMP_NOTIFY_FD) { rsp->data = (void *)cmd->req.data;
int seccomp_notify_fd = move_fd(fd_rsp); break;
rsp->data = INT_TO_PTR(seccomp_notify_fd); default:
break;
} }
if (rsp->datalen == 0) if (rsp->datalen == 0)
return log_debug(ret, return log_debug(fret ?: ret, "Response data length for command \"%s\" is 0", reqstr);
"Response data length for command \"%s\" is 0",
lxc_cmd_str(cmd->req.cmd));
if ((rsp->datalen > LXC_CMD_DATA_MAX) && if ((rsp->datalen > LXC_CMD_DATA_MAX) &&
(cmd->req.cmd != LXC_CMD_CONSOLE_LOG)) (cmd->req.cmd != LXC_CMD_CONSOLE_LOG))
return log_error(-1, "Response data for command \"%s\" is too long: %d bytes > %d", return syserrno_set(-E2BIG, "Response data for command \"%s\" is too long: %d bytes > %d",
lxc_cmd_str(cmd->req.cmd), rsp->datalen, reqstr, rsp->datalen, LXC_CMD_DATA_MAX);
LXC_CMD_DATA_MAX);
if (cmd->req.cmd == LXC_CMD_CONSOLE_LOG) { if (cmd->req.cmd == LXC_CMD_CONSOLE_LOG)
rsp->data = malloc(rsp->datalen + 1); rsp->data = zalloc(rsp->datalen + 1);
((char *)rsp->data)[rsp->datalen] = '\0'; else if (cmd->req.cmd != LXC_CMD_GET_CGROUP_CTX)
} else {
rsp->data = malloc(rsp->datalen); rsp->data = malloc(rsp->datalen);
}
if (!rsp->data) if (!rsp->data)
return log_error_errno(-1, return syserrno_set(-ENOMEM, "Failed to allocate response buffer for command \"%s\"", reqstr);
ENOMEM, "Failed to allocate response buffer for command \"%s\"",
lxc_cmd_str(cmd->req.cmd));
ret = lxc_recv_nointr(sock, rsp->data, rsp->datalen, 0); ret = lxc_recv_nointr(sock, rsp->data, rsp->datalen, 0);
if (ret != rsp->datalen) if (ret != rsp->datalen)
return log_error_errno(-1, return syserrno(-errno, "Failed to receive response data for command \"%s\"", reqstr);
errno, "Failed to receive response data for command \"%s\"",
lxc_cmd_str(cmd->req.cmd));
return ret; if (cmd->req.cmd == LXC_CMD_GET_CGROUP_CTX) {
ret = __transfer_cgroup_ctx_fds(fds, rsp->data);
if (ret < 0)
return syserrno(ret, "Failed to transfer file descriptors for \"%s\"", reqstr);
}
return fret ?: ret;
} }
/* /*
...@@ -207,26 +244,84 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd) ...@@ -207,26 +244,84 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
* *
* Returns 0 on success, < 0 on failure * Returns 0 on success, < 0 on failure
*/ */
static int lxc_cmd_rsp_send(int fd, struct lxc_cmd_rsp *rsp) static int __lxc_cmd_rsp_send(int fd, struct lxc_cmd_rsp *rsp)
{ {
ssize_t ret; ssize_t ret;
errno = EMSGSIZE;
ret = lxc_send_nointr(fd, rsp, sizeof(*rsp), MSG_NOSIGNAL); ret = lxc_send_nointr(fd, rsp, sizeof(*rsp), MSG_NOSIGNAL);
if (ret < 0 || (size_t)ret != sizeof(*rsp)) if (ret < 0 || (size_t)ret != sizeof(*rsp))
return log_error_errno(-1, errno, "Failed to send command response %zd", ret); return syserrno(-errno, "Failed to send command response %zd", ret);
if (!rsp->data || rsp->datalen <= 0) if (!rsp->data || rsp->datalen <= 0)
return 0; return 0;
errno = EMSGSIZE;
ret = lxc_send_nointr(fd, rsp->data, rsp->datalen, MSG_NOSIGNAL); ret = lxc_send_nointr(fd, rsp->data, rsp->datalen, MSG_NOSIGNAL);
if (ret < 0 || ret != (ssize_t)rsp->datalen) if (ret < 0 || ret != (ssize_t)rsp->datalen)
return log_warn_errno(-1, errno, "Failed to send command response data %zd", ret); return syswarn(-errno, "Failed to send command response %zd", ret);
return 0; return 0;
} }
static inline int lxc_cmd_rsp_send_reap(int fd, struct lxc_cmd_rsp *rsp)
{
int ret;
ret = __lxc_cmd_rsp_send(fd, rsp);
if (ret < 0)
return ret;
return LXC_CMD_REAP_CLIENT_FD;
}
static inline int lxc_cmd_rsp_send_keep(int fd, struct lxc_cmd_rsp *rsp)
{
int ret;
ret = __lxc_cmd_rsp_send(fd, rsp);
if (ret < 0)
return ret;
return 0;
}
static inline int rsp_one_fd(int fd, int fd_send, struct lxc_cmd_rsp *rsp)
{
int ret;
ret = lxc_abstract_unix_send_fds(fd, &fd_send, 1, rsp, sizeof(*rsp));
if (ret < 0)
return ret;
return LXC_CMD_REAP_CLIENT_FD;
}
static inline int rsp_many_fds(int fd, __u32 fds_len,
const __s32 fds[KERNEL_SCM_MAX_FD],
struct lxc_cmd_rsp *rsp)
{
ssize_t ret;
if (fds_len > KERNEL_SCM_MAX_FD) {
rsp->ret = -E2BIG;
return lxc_cmd_rsp_send_reap(fd, rsp);
} else if (fds_len == 0) {
rsp->ret = -ENOENT;
return lxc_cmd_rsp_send_reap(fd, rsp);
}
ret = lxc_abstract_unix_send_fds(fd, fds, fds_len, rsp, sizeof(*rsp));
if (ret < 0)
return ret;
if (rsp->data && rsp->datalen > 0) {
ret = lxc_send_nointr(fd, rsp->data, rsp->datalen, MSG_NOSIGNAL);
if (ret < 0 || ret != (ssize_t)rsp->datalen)
return syswarn(-errno, "Failed to send command response %zd", ret);
}
return LXC_CMD_REAP_CLIENT_FD;
}
static int lxc_cmd_send(const char *name, struct lxc_cmd_rr *cmd, static int lxc_cmd_send(const char *name, struct lxc_cmd_rr *cmd,
const char *lxcpath, const char *hashed_sock_name) const char *lxcpath, const char *hashed_sock_name)
{ {
...@@ -349,7 +444,6 @@ int lxc_try_cmd(const char *name, const char *lxcpath) ...@@ -349,7 +444,6 @@ int lxc_try_cmd(const char *name, const char *lxcpath)
*/ */
static int validate_string_request(int fd, const struct lxc_cmd_req *req) static int validate_string_request(int fd, const struct lxc_cmd_req *req)
{ {
int ret;
size_t maxlen = req->datalen - 1; size_t maxlen = req->datalen - 1;
const char *data = req->data; const char *data = req->data;
...@@ -362,11 +456,7 @@ static int validate_string_request(int fd, const struct lxc_cmd_req *req) ...@@ -362,11 +456,7 @@ static int validate_string_request(int fd, const struct lxc_cmd_req *req)
.data = NULL, .data = NULL,
}; };
ret = lxc_cmd_rsp_send(fd, &rsp); return lxc_cmd_rsp_send_reap(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return -1;
} }
/* Implementations of the commands and their callbacks */ /* Implementations of the commands and their callbacks */
...@@ -410,25 +500,25 @@ static int lxc_cmd_get_init_pid_callback(int fd, struct lxc_cmd_req *req, ...@@ -410,25 +500,25 @@ static int lxc_cmd_get_init_pid_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler, struct lxc_handler *handler,
struct lxc_epoll_descr *descr) struct lxc_epoll_descr *descr)
{ {
int ret;
struct lxc_cmd_rsp rsp = { struct lxc_cmd_rsp rsp = {
.data = PID_TO_PTR(handler->pid) .data = PID_TO_PTR(handler->pid),
}; };
ret = lxc_cmd_rsp_send(fd, &rsp); return lxc_cmd_rsp_send_reap(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return 0;
} }
int lxc_cmd_get_init_pidfd(const char *name, const char *lxcpath) int lxc_cmd_get_init_pidfd(const char *name, const char *lxcpath)
{ {
int pidfd;
int ret, stopped; int ret, stopped;
struct lxc_cmd_rr cmd = { struct lxc_cmd_rr cmd = {
.req = { .req = {
.cmd = LXC_CMD_GET_INIT_PIDFD, .cmd = LXC_CMD_GET_INIT_PIDFD,
}, },
.rsp = {
.data = INT_TO_PTR(-EBADF),
.ret = ENOSYS,
},
}; };
ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL); ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
...@@ -436,9 +526,13 @@ int lxc_cmd_get_init_pidfd(const char *name, const char *lxcpath) ...@@ -436,9 +526,13 @@ int lxc_cmd_get_init_pidfd(const char *name, const char *lxcpath)
return log_debug_errno(-1, errno, "Failed to process init pidfd command"); return log_debug_errno(-1, errno, "Failed to process init pidfd command");
if (cmd.rsp.ret < 0) if (cmd.rsp.ret < 0)
return log_debug_errno(-EBADF, errno, "Failed to receive init pidfd"); return syserrno_set(cmd.rsp.ret, "Failed to receive init pidfd");
return PTR_TO_INT(cmd.rsp.data); pidfd = PTR_TO_INT(cmd.rsp.data);
if (pidfd < 0)
return syserrno_set(pidfd, "Failed to receive init pidfd");
return pidfd;
} }
static int lxc_cmd_get_init_pidfd_callback(int fd, struct lxc_cmd_req *req, static int lxc_cmd_get_init_pidfd_callback(int fd, struct lxc_cmd_req *req,
...@@ -446,17 +540,14 @@ static int lxc_cmd_get_init_pidfd_callback(int fd, struct lxc_cmd_req *req, ...@@ -446,17 +540,14 @@ static int lxc_cmd_get_init_pidfd_callback(int fd, struct lxc_cmd_req *req,
struct lxc_epoll_descr *descr) struct lxc_epoll_descr *descr)
{ {
struct lxc_cmd_rsp rsp = { struct lxc_cmd_rsp rsp = {
.ret = 0, .ret = -EBADF,
}; };
int ret;
if (handler->pidfd < 0) if (handler->pidfd < 0)
rsp.ret = -EBADF; return lxc_cmd_rsp_send_reap(fd, &rsp);
ret = lxc_abstract_unix_send_fds(fd, &handler->pidfd, 1, &rsp, sizeof(rsp));
if (ret < 0)
return log_error(LXC_CMD_REAP_CLIENT_FD, "Failed to send init pidfd");
return 0; rsp.ret = 0;
return rsp_one_fd(fd, handler->pidfd, &rsp);
} }
int lxc_cmd_get_devpts_fd(const char *name, const char *lxcpath) int lxc_cmd_get_devpts_fd(const char *name, const char *lxcpath)
...@@ -483,20 +574,14 @@ static int lxc_cmd_get_devpts_fd_callback(int fd, struct lxc_cmd_req *req, ...@@ -483,20 +574,14 @@ static int lxc_cmd_get_devpts_fd_callback(int fd, struct lxc_cmd_req *req,
struct lxc_epoll_descr *descr) struct lxc_epoll_descr *descr)
{ {
struct lxc_cmd_rsp rsp = { struct lxc_cmd_rsp rsp = {
.ret = 0, .ret = -EBADF,
}; };
int ret;
if (!handler->conf || handler->conf->devpts_fd < 0) { if (!handler->conf || handler->conf->devpts_fd < 0)
rsp.ret = -EBADF; return lxc_cmd_rsp_send_reap(fd, &rsp);
ret = lxc_abstract_unix_send_fds(fd, NULL, 0, &rsp, sizeof(rsp));
} else {
ret = lxc_abstract_unix_send_fds(fd, &handler->conf->devpts_fd, 1, &rsp, sizeof(rsp));
}
if (ret < 0)
return log_error(LXC_CMD_REAP_CLIENT_FD, "Failed to send devpts fd");
return 0; rsp.ret = 0;
return rsp_one_fd(fd, handler->conf->devpts_fd, &rsp);
} }
int lxc_cmd_get_seccomp_notify_fd(const char *name, const char *lxcpath) int lxc_cmd_get_seccomp_notify_fd(const char *name, const char *lxcpath)
...@@ -528,22 +613,75 @@ static int lxc_cmd_get_seccomp_notify_fd_callback(int fd, struct lxc_cmd_req *re ...@@ -528,22 +613,75 @@ static int lxc_cmd_get_seccomp_notify_fd_callback(int fd, struct lxc_cmd_req *re
{ {
#ifdef HAVE_SECCOMP_NOTIFY #ifdef HAVE_SECCOMP_NOTIFY
struct lxc_cmd_rsp rsp = { struct lxc_cmd_rsp rsp = {
.ret = 0, .ret = -EBADF,
}; };
int ret;
if (!handler->conf || handler->conf->seccomp.notifier.notify_fd < 0) if (!handler->conf || handler->conf->seccomp.notifier.notify_fd < 0)
rsp.ret = -EBADF; return lxc_cmd_rsp_send_reap(fd, &rsp);
ret = lxc_abstract_unix_send_fds(fd, &handler->conf->seccomp.notifier.notify_fd, 1, &rsp, sizeof(rsp));
if (ret < 0)
return log_error(LXC_CMD_REAP_CLIENT_FD, "Failed to send seccomp notify fd");
return 0; rsp.ret = 0;
return rsp_one_fd(fd, handler->conf->seccomp.notifier.notify_fd, &rsp);
#else #else
return ret_errno(EOPNOTSUPP); return syserrno_set(-EOPNOTSUPP, "Seccomp notifier not supported");
#endif #endif
} }
int lxc_cmd_get_cgroup_ctx(const char *name, const char *lxcpath,
const char *controller, bool batch,
size_t size_ret_ctx, struct cgroup_ctx *ret_ctx)
{
struct lxc_cmd_rr cmd = {
.req = {
.cmd = LXC_CMD_GET_CGROUP_CTX,
.datalen = size_ret_ctx,
.data = ret_ctx,
},
.rsp = {
.ret = -ENOSYS,
},
};
int ret, stopped;
if (batch && !is_empty_string(controller))
return ret_errno(EINVAL);
ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
if (ret < 0)
return log_debug_errno(-1, errno, "Failed to process cgroup context command");
if (cmd.rsp.ret < 0)
return log_debug_errno(-EBADF, errno, "Failed to receive cgroup fds");
return 0;
}
static int lxc_cmd_get_cgroup_ctx_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
{
struct lxc_cmd_rsp rsp = {
.ret = EINVAL,
};
struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
struct cgroup_ctx ctx_server = {};
int ret;
ret = copy_struct_from_client(sizeof(struct cgroup_ctx), &ctx_server,
req->datalen, req->data);
if (ret < 0)
return lxc_cmd_rsp_send_reap(fd, &rsp);
ret = prepare_cgroup_ctx(cgroup_ops, &ctx_server);
if (ret < 0) {
rsp.ret = ret;
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
rsp.data = &ctx_server;
rsp.datalen = min(sizeof(struct cgroup_ctx), (size_t)req->datalen);
return rsp_many_fds(fd, ctx_server.fd_len, ctx_server.fd, &rsp);
}
/* /*
* lxc_cmd_get_clone_flags: Get clone flags container was spawned with * lxc_cmd_get_clone_flags: Get clone flags container was spawned with
* *
...@@ -572,16 +710,11 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req, ...@@ -572,16 +710,11 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler, struct lxc_handler *handler,
struct lxc_epoll_descr *descr) struct lxc_epoll_descr *descr)
{ {
int ret;
struct lxc_cmd_rsp rsp = { struct lxc_cmd_rsp rsp = {
.data = INT_TO_PTR(handler->ns_clone_flags), .data = INT_TO_PTR(handler->ns_clone_flags),
}; };
ret = lxc_cmd_rsp_send(fd, &rsp); return lxc_cmd_rsp_send_reap(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return 0;
} }
static char *lxc_cmd_get_cgroup_path_do(const char *name, const char *lxcpath, static char *lxc_cmd_get_cgroup_path_do(const char *name, const char *lxcpath,
...@@ -701,11 +834,7 @@ static int lxc_cmd_get_cgroup_callback_do(int fd, struct lxc_cmd_req *req, ...@@ -701,11 +834,7 @@ static int lxc_cmd_get_cgroup_callback_do(int fd, struct lxc_cmd_req *req,
rsp.datalen = strlen(path) + 1; rsp.datalen = strlen(path) + 1;
rsp.data = (char *)path; rsp.data = (char *)path;
ret = lxc_cmd_rsp_send(fd, &rsp); return lxc_cmd_rsp_send_reap(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return 0;
} }
static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
...@@ -784,11 +913,7 @@ static int lxc_cmd_get_config_item_callback(int fd, struct lxc_cmd_req *req, ...@@ -784,11 +913,7 @@ static int lxc_cmd_get_config_item_callback(int fd, struct lxc_cmd_req *req,
err1: err1:
rsp.ret = -1; rsp.ret = -1;
out: out:
cilen = lxc_cmd_rsp_send(fd, &rsp); return lxc_cmd_rsp_send_reap(fd, &rsp);
if (cilen < 0)
return LXC_CMD_REAP_CLIENT_FD;
return 0;
} }
/* /*
...@@ -827,16 +952,11 @@ static int lxc_cmd_get_state_callback(int fd, struct lxc_cmd_req *req, ...@@ -827,16 +952,11 @@ static int lxc_cmd_get_state_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler, struct lxc_handler *handler,
struct lxc_epoll_descr *descr) struct lxc_epoll_descr *descr)
{ {
int ret;
struct lxc_cmd_rsp rsp = { struct lxc_cmd_rsp rsp = {
.data = INT_TO_PTR(handler->state), .data = INT_TO_PTR(handler->state),
}; };
ret = lxc_cmd_rsp_send(fd, &rsp); return lxc_cmd_rsp_send_reap(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return 0;
} }
/* /*
...@@ -909,11 +1029,7 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req, ...@@ -909,11 +1029,7 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req,
rsp.ret = -errno; rsp.ret = -errno;
} }
ret = lxc_cmd_rsp_send(fd, &rsp); return lxc_cmd_rsp_send_reap(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return 0;
} }
/* /*
...@@ -985,23 +1101,24 @@ static int lxc_cmd_console_callback(int fd, struct lxc_cmd_req *req, ...@@ -985,23 +1101,24 @@ static int lxc_cmd_console_callback(int fd, struct lxc_cmd_req *req,
struct lxc_epoll_descr *descr) struct lxc_epoll_descr *descr)
{ {
int ptxfd, ret; int ptxfd, ret;
struct lxc_cmd_rsp rsp; struct lxc_cmd_rsp rsp = {
.ret = -EBADF,
};
int ttynum = PTR_TO_INT(req->data); int ttynum = PTR_TO_INT(req->data);
ptxfd = lxc_terminal_allocate(handler->conf, fd, &ttynum); ptxfd = lxc_terminal_allocate(handler->conf, fd, &ttynum);
if (ptxfd < 0) if (ptxfd < 0)
return LXC_CMD_REAP_CLIENT_FD; return lxc_cmd_rsp_send_reap(fd, &rsp);
memset(&rsp, 0, sizeof(rsp)); rsp.ret = 0;
rsp.data = INT_TO_PTR(ttynum); rsp.data = INT_TO_PTR(ttynum);
ret = lxc_abstract_unix_send_fds(fd, &ptxfd, 1, &rsp, sizeof(rsp)); ret = lxc_abstract_unix_send_fds(fd, &ptxfd, 1, &rsp, sizeof(rsp));
if (ret < 0) { if (ret < 0) {
lxc_terminal_free(handler->conf, fd); lxc_terminal_free(handler->conf, fd);
return log_error_errno(LXC_CMD_REAP_CLIENT_FD, errno, return ret;
"Failed to send tty to client");
} }
return 0; return log_debug(0, "Send tty to client");
} }
/* /*
...@@ -1034,7 +1151,6 @@ static int lxc_cmd_get_name_callback(int fd, struct lxc_cmd_req *req, ...@@ -1034,7 +1151,6 @@ static int lxc_cmd_get_name_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler, struct lxc_handler *handler,
struct lxc_epoll_descr *descr) struct lxc_epoll_descr *descr)
{ {
int ret;
struct lxc_cmd_rsp rsp; struct lxc_cmd_rsp rsp;
memset(&rsp, 0, sizeof(rsp)); memset(&rsp, 0, sizeof(rsp));
...@@ -1043,11 +1159,7 @@ static int lxc_cmd_get_name_callback(int fd, struct lxc_cmd_req *req, ...@@ -1043,11 +1159,7 @@ static int lxc_cmd_get_name_callback(int fd, struct lxc_cmd_req *req,
rsp.datalen = strlen(handler->name) + 1; rsp.datalen = strlen(handler->name) + 1;
rsp.ret = 0; rsp.ret = 0;
ret = lxc_cmd_rsp_send(fd, &rsp); return lxc_cmd_rsp_send_reap(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return 0;
} }
/* /*
...@@ -1080,18 +1192,13 @@ static int lxc_cmd_get_lxcpath_callback(int fd, struct lxc_cmd_req *req, ...@@ -1080,18 +1192,13 @@ static int lxc_cmd_get_lxcpath_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler, struct lxc_handler *handler,
struct lxc_epoll_descr *descr) struct lxc_epoll_descr *descr)
{ {
int ret;
struct lxc_cmd_rsp rsp = { struct lxc_cmd_rsp rsp = {
.ret = 0, .ret = 0,
.data = (char *)handler->lxcpath, .data = (char *)handler->lxcpath,
.datalen = strlen(handler->lxcpath) + 1, .datalen = strlen(handler->lxcpath) + 1,
}; };
ret = lxc_cmd_rsp_send(fd, &rsp); return lxc_cmd_rsp_send_reap(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return 0;
} }
int lxc_cmd_add_state_client(const char *name, const char *lxcpath, int lxc_cmd_add_state_client(const char *name, const char *lxcpath,
...@@ -1140,29 +1247,29 @@ static int lxc_cmd_add_state_client_callback(__owns int fd, struct lxc_cmd_req * ...@@ -1140,29 +1247,29 @@ static int lxc_cmd_add_state_client_callback(__owns int fd, struct lxc_cmd_req *
struct lxc_handler *handler, struct lxc_handler *handler,
struct lxc_epoll_descr *descr) struct lxc_epoll_descr *descr)
{ {
int ret; struct lxc_cmd_rsp rsp = {
struct lxc_cmd_rsp rsp = {0}; .ret = -EINVAL,
};
if (req->datalen < 0) if (req->datalen < 0)
return LXC_CMD_REAP_CLIENT_FD; goto reap_fd;
if (req->datalen != (sizeof(lxc_state_t) * MAX_STATE)) if (req->datalen != (sizeof(lxc_state_t) * MAX_STATE))
return LXC_CMD_REAP_CLIENT_FD; goto reap_fd;
if (!req->data) if (!req->data)
return LXC_CMD_REAP_CLIENT_FD; goto reap_fd;
rsp.ret = lxc_add_state_client(fd, handler, (lxc_state_t *)req->data); rsp.ret = lxc_add_state_client(fd, handler, (lxc_state_t *)req->data);
if (rsp.ret < 0) if (rsp.ret < 0)
return LXC_CMD_REAP_CLIENT_FD; goto reap_fd;
rsp.data = INT_TO_PTR(rsp.ret); rsp.data = INT_TO_PTR(rsp.ret);
ret = lxc_cmd_rsp_send(fd, &rsp); return lxc_cmd_rsp_send_keep(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return 0; reap_fd:
return lxc_cmd_rsp_send_reap(fd, &rsp);
} }
int lxc_cmd_add_bpf_device_cgroup(const char *name, const char *lxcpath, int lxc_cmd_add_bpf_device_cgroup(const char *name, const char *lxcpath,
...@@ -1193,18 +1300,19 @@ static int lxc_cmd_add_bpf_device_cgroup_callback(int fd, struct lxc_cmd_req *re ...@@ -1193,18 +1300,19 @@ static int lxc_cmd_add_bpf_device_cgroup_callback(int fd, struct lxc_cmd_req *re
struct lxc_handler *handler, struct lxc_handler *handler,
struct lxc_epoll_descr *descr) struct lxc_epoll_descr *descr)
{ {
int ret; struct lxc_cmd_rsp rsp = {
struct lxc_cmd_rsp rsp = {}; .ret = -EINVAL,
};
struct lxc_conf *conf; struct lxc_conf *conf;
if (req->datalen <= 0) if (req->datalen <= 0)
return LXC_CMD_REAP_CLIENT_FD; goto out;
if (req->datalen != sizeof(struct device_item)) if (req->datalen != sizeof(struct device_item))
return LXC_CMD_REAP_CLIENT_FD; goto out;
if (!req->data) if (!req->data)
return LXC_CMD_REAP_CLIENT_FD; goto out;
conf = handler->conf; conf = handler->conf;
if (!bpf_cgroup_devices_update(handler->cgroup_ops, if (!bpf_cgroup_devices_update(handler->cgroup_ops,
...@@ -1214,11 +1322,8 @@ static int lxc_cmd_add_bpf_device_cgroup_callback(int fd, struct lxc_cmd_req *re ...@@ -1214,11 +1322,8 @@ static int lxc_cmd_add_bpf_device_cgroup_callback(int fd, struct lxc_cmd_req *re
else else
rsp.ret = 0; rsp.ret = 0;
ret = lxc_cmd_rsp_send(fd, &rsp); out:
if (ret < 0) return lxc_cmd_rsp_send_reap(fd, &rsp);
return LXC_CMD_REAP_CLIENT_FD;
return 0;
} }
int lxc_cmd_console_log(const char *name, const char *lxcpath, int lxc_cmd_console_log(const char *name, const char *lxcpath,
...@@ -1294,7 +1399,7 @@ static int lxc_cmd_console_log_callback(int fd, struct lxc_cmd_req *req, ...@@ -1294,7 +1399,7 @@ static int lxc_cmd_console_log_callback(int fd, struct lxc_cmd_req *req,
lxc_ringbuf_move_read_addr(buf, rsp.datalen); lxc_ringbuf_move_read_addr(buf, rsp.datalen);
out: out:
return lxc_cmd_rsp_send(fd, &rsp); return lxc_cmd_rsp_send_reap(fd, &rsp);
} }
int lxc_cmd_serve_state_clients(const char *name, const char *lxcpath, int lxc_cmd_serve_state_clients(const char *name, const char *lxcpath,
...@@ -1326,13 +1431,9 @@ static int lxc_cmd_serve_state_clients_callback(int fd, struct lxc_cmd_req *req, ...@@ -1326,13 +1431,9 @@ static int lxc_cmd_serve_state_clients_callback(int fd, struct lxc_cmd_req *req,
ret = lxc_serve_state_clients(handler->name, handler, state); ret = lxc_serve_state_clients(handler->name, handler, state);
if (ret < 0) if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD; return ret;
ret = lxc_cmd_rsp_send(fd, &rsp);
if (ret < 0)
return LXC_CMD_REAP_CLIENT_FD;
return 0; return lxc_cmd_rsp_send_reap(fd, &rsp);
} }
int lxc_cmd_seccomp_notify_add_listener(const char *name, const char *lxcpath, int lxc_cmd_seccomp_notify_add_listener(const char *name, const char *lxcpath,
...@@ -1371,7 +1472,7 @@ static int lxc_cmd_seccomp_notify_add_listener_callback(int fd, ...@@ -1371,7 +1472,7 @@ static int lxc_cmd_seccomp_notify_add_listener_callback(int fd,
int ret; int ret;
__do_close int recv_fd = -EBADF; __do_close int recv_fd = -EBADF;
ret = lxc_abstract_unix_recv_fds(fd, &recv_fd, 1, NULL, 0); ret = lxc_abstract_unix_recv_one_fd(fd, &recv_fd, NULL, 0);
if (ret <= 0) { if (ret <= 0) {
rsp.ret = -errno; rsp.ret = -errno;
goto out; goto out;
...@@ -1397,7 +1498,7 @@ out: ...@@ -1397,7 +1498,7 @@ out:
rsp.ret = -ENOSYS; rsp.ret = -ENOSYS;
#endif #endif
return lxc_cmd_rsp_send(fd, &rsp); return lxc_cmd_rsp_send_reap(fd, &rsp);
} }
int lxc_cmd_freeze(const char *name, const char *lxcpath, int timeout) int lxc_cmd_freeze(const char *name, const char *lxcpath, int timeout)
...@@ -1430,7 +1531,7 @@ static int lxc_cmd_freeze_callback(int fd, struct lxc_cmd_req *req, ...@@ -1430,7 +1531,7 @@ static int lxc_cmd_freeze_callback(int fd, struct lxc_cmd_req *req,
if (pure_unified_layout(ops)) if (pure_unified_layout(ops))
rsp.ret = ops->freeze(ops, timeout); rsp.ret = ops->freeze(ops, timeout);
return lxc_cmd_rsp_send(fd, &rsp); return lxc_cmd_rsp_send_reap(fd, &rsp);
} }
int lxc_cmd_unfreeze(const char *name, const char *lxcpath, int timeout) int lxc_cmd_unfreeze(const char *name, const char *lxcpath, int timeout)
...@@ -1463,7 +1564,7 @@ static int lxc_cmd_unfreeze_callback(int fd, struct lxc_cmd_req *req, ...@@ -1463,7 +1564,7 @@ static int lxc_cmd_unfreeze_callback(int fd, struct lxc_cmd_req *req,
if (pure_unified_layout(ops)) if (pure_unified_layout(ops))
rsp.ret = ops->unfreeze(ops, timeout); rsp.ret = ops->unfreeze(ops, timeout);
return lxc_cmd_rsp_send(fd, &rsp); return lxc_cmd_rsp_send_reap(fd, &rsp);
} }
int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath) int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath)
...@@ -1473,6 +1574,9 @@ int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath) ...@@ -1473,6 +1574,9 @@ int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath)
.req = { .req = {
.cmd = LXC_CMD_GET_CGROUP2_FD, .cmd = LXC_CMD_GET_CGROUP2_FD,
}, },
.rsp = {
ret = -ENOSYS,
},
}; };
ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL); ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
...@@ -1492,6 +1596,9 @@ int lxc_cmd_get_limiting_cgroup2_fd(const char *name, const char *lxcpath) ...@@ -1492,6 +1596,9 @@ int lxc_cmd_get_limiting_cgroup2_fd(const char *name, const char *lxcpath)
.req = { .req = {
.cmd = LXC_CMD_GET_LIMITING_CGROUP2_FD, .cmd = LXC_CMD_GET_LIMITING_CGROUP2_FD,
}, },
.rsp = {
.ret = -ENOSYS,
},
}; };
ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL); ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
...@@ -1499,7 +1606,7 @@ int lxc_cmd_get_limiting_cgroup2_fd(const char *name, const char *lxcpath) ...@@ -1499,7 +1606,7 @@ int lxc_cmd_get_limiting_cgroup2_fd(const char *name, const char *lxcpath)
return -1; return -1;
if (cmd.rsp.ret < 0) if (cmd.rsp.ret < 0)
return log_debug_errno(cmd.rsp.ret, -cmd.rsp.ret, "Failed to receive cgroup2 fd"); return syswarn_set(cmd.rsp.ret, "Failed to receive cgroup2 limit fd");
return PTR_TO_INT(cmd.rsp.data); return PTR_TO_INT(cmd.rsp.data);
} }
...@@ -1513,20 +1620,21 @@ static int lxc_cmd_get_cgroup2_fd_callback_do(int fd, struct lxc_cmd_req *req, ...@@ -1513,20 +1620,21 @@ static int lxc_cmd_get_cgroup2_fd_callback_do(int fd, struct lxc_cmd_req *req,
.ret = -EINVAL, .ret = -EINVAL,
}; };
struct cgroup_ops *ops = handler->cgroup_ops; struct cgroup_ops *ops = handler->cgroup_ops;
int ret, send_fd; int send_fd;
if (!pure_unified_layout(ops) || !ops->unified) if (!pure_unified_layout(ops) || !ops->unified)
return lxc_cmd_rsp_send(fd, &rsp); return lxc_cmd_rsp_send_reap(fd, &rsp);
send_fd = limiting_cgroup ? ops->unified->dfd_lim send_fd = limiting_cgroup ? ops->unified->dfd_lim
: ops->unified->dfd_con; : ops->unified->dfd_con;
rsp.ret = 0; if (send_fd < 0) {
ret = lxc_abstract_unix_send_fds(fd, &send_fd, 1, &rsp, sizeof(rsp)); rsp.ret = -EBADF;
if (ret < 0) return lxc_cmd_rsp_send_reap(fd, &rsp);
return log_error(LXC_CMD_REAP_CLIENT_FD, "Failed to send cgroup2 fd"); }
return 0; rsp.ret = 0;
return rsp_one_fd(fd, send_fd, &rsp);
} }
static int lxc_cmd_get_cgroup2_fd_callback(int fd, struct lxc_cmd_req *req, static int lxc_cmd_get_cgroup2_fd_callback(int fd, struct lxc_cmd_req *req,
...@@ -1546,6 +1654,16 @@ static int lxc_cmd_get_limiting_cgroup2_fd_callback(int fd, ...@@ -1546,6 +1654,16 @@ static int lxc_cmd_get_limiting_cgroup2_fd_callback(int fd,
true); true);
} }
static int lxc_cmd_rsp_send_enosys(int fd, int id)
{
struct lxc_cmd_rsp rsp = {
.ret = -ENOSYS,
};
__lxc_cmd_rsp_send(fd, &rsp);
return syserrno_set(-ENOSYS, "Invalid command id %d", id);
}
static int lxc_cmd_process(int fd, struct lxc_cmd_req *req, static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler, struct lxc_handler *handler,
struct lxc_epoll_descr *descr) struct lxc_epoll_descr *descr)
...@@ -1577,10 +1695,11 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req, ...@@ -1577,10 +1695,11 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
[LXC_CMD_GET_LIMITING_CGROUP2_FD] = lxc_cmd_get_limiting_cgroup2_fd_callback, [LXC_CMD_GET_LIMITING_CGROUP2_FD] = lxc_cmd_get_limiting_cgroup2_fd_callback,
[LXC_CMD_GET_DEVPTS_FD] = lxc_cmd_get_devpts_fd_callback, [LXC_CMD_GET_DEVPTS_FD] = lxc_cmd_get_devpts_fd_callback,
[LXC_CMD_GET_SECCOMP_NOTIFY_FD] = lxc_cmd_get_seccomp_notify_fd_callback, [LXC_CMD_GET_SECCOMP_NOTIFY_FD] = lxc_cmd_get_seccomp_notify_fd_callback,
[LXC_CMD_GET_CGROUP_CTX] = lxc_cmd_get_cgroup_ctx_callback,
}; };
if (req->cmd >= LXC_CMD_MAX) if (req->cmd >= LXC_CMD_MAX)
return log_trace_errno(-1, EINVAL, "Invalid command id %d", req->cmd); return lxc_cmd_rsp_send_enosys(fd, req->cmd);
return cb[req->cmd](fd, req, handler, descr); return cb[req->cmd](fd, req, handler, descr);
} }
...@@ -1648,7 +1767,7 @@ static int lxc_cmd_handler(int fd, uint32_t events, void *data, ...@@ -1648,7 +1767,7 @@ static int lxc_cmd_handler(int fd, uint32_t events, void *data,
.ret = -EPERM, .ret = -EPERM,
}; };
lxc_cmd_rsp_send(fd, &rsp); __lxc_cmd_rsp_send(fd, &rsp);
} }
goto out_close; goto out_close;
...@@ -1679,9 +1798,14 @@ static int lxc_cmd_handler(int fd, uint32_t events, void *data, ...@@ -1679,9 +1798,14 @@ static int lxc_cmd_handler(int fd, uint32_t events, void *data,
} }
ret = lxc_cmd_process(fd, &req, handler, descr); ret = lxc_cmd_process(fd, &req, handler, descr);
if (ret) { if (ret < 0) {
/* This is not an error, but only a request to close fd. */ DEBUG("Failed to process command %s; cleaning up client fd %d", lxc_cmd_str(req.cmd), fd);
goto out_close;
} else if (ret == LXC_CMD_REAP_CLIENT_FD) {
TRACE("Processed command %s; cleaning up client fd %d", lxc_cmd_str(req.cmd), fd);
goto out_close; goto out_close;
} else {
TRACE("Processed command %s; keeping client fd %d", lxc_cmd_str(req.cmd), fd);
} }
out: out:
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <unistd.h> #include <unistd.h>
#include "compiler.h" #include "compiler.h"
#include "cgroups/cgroup.h"
#include "lxccontainer.h" #include "lxccontainer.h"
#include "macro.h" #include "macro.h"
#include "state.h" #include "state.h"
...@@ -20,29 +21,30 @@ ...@@ -20,29 +21,30 @@
#define LXC_CMD_REAP_CLIENT_FD 1 #define LXC_CMD_REAP_CLIENT_FD 1
typedef enum { typedef enum {
LXC_CMD_CONSOLE, LXC_CMD_CONSOLE = 0,
LXC_CMD_TERMINAL_WINCH, LXC_CMD_TERMINAL_WINCH = 1,
LXC_CMD_STOP, LXC_CMD_STOP = 2,
LXC_CMD_GET_STATE, LXC_CMD_GET_STATE = 3,
LXC_CMD_GET_INIT_PID, LXC_CMD_GET_INIT_PID = 4,
LXC_CMD_GET_CLONE_FLAGS, LXC_CMD_GET_CLONE_FLAGS = 5,
LXC_CMD_GET_CGROUP, LXC_CMD_GET_CGROUP = 6,
LXC_CMD_GET_CONFIG_ITEM, LXC_CMD_GET_CONFIG_ITEM = 7,
LXC_CMD_GET_NAME, LXC_CMD_GET_NAME = 8,
LXC_CMD_GET_LXCPATH, LXC_CMD_GET_LXCPATH = 9,
LXC_CMD_ADD_STATE_CLIENT, LXC_CMD_ADD_STATE_CLIENT = 10,
LXC_CMD_CONSOLE_LOG, LXC_CMD_CONSOLE_LOG = 11,
LXC_CMD_SERVE_STATE_CLIENTS, LXC_CMD_SERVE_STATE_CLIENTS = 12,
LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER, LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER = 13,
LXC_CMD_ADD_BPF_DEVICE_CGROUP, LXC_CMD_ADD_BPF_DEVICE_CGROUP = 14,
LXC_CMD_FREEZE, LXC_CMD_FREEZE = 15,
LXC_CMD_UNFREEZE, LXC_CMD_UNFREEZE = 16,
LXC_CMD_GET_CGROUP2_FD, LXC_CMD_GET_CGROUP2_FD = 17,
LXC_CMD_GET_INIT_PIDFD, LXC_CMD_GET_INIT_PIDFD = 18,
LXC_CMD_GET_LIMITING_CGROUP, LXC_CMD_GET_LIMITING_CGROUP = 19,
LXC_CMD_GET_LIMITING_CGROUP2_FD, LXC_CMD_GET_LIMITING_CGROUP2_FD = 20,
LXC_CMD_GET_DEVPTS_FD, LXC_CMD_GET_DEVPTS_FD = 21,
LXC_CMD_GET_SECCOMP_NOTIFY_FD, LXC_CMD_GET_SECCOMP_NOTIFY_FD = 22,
LXC_CMD_GET_CGROUP_CTX = 23,
LXC_CMD_MAX, LXC_CMD_MAX,
} lxc_cmd_t; } lxc_cmd_t;
...@@ -122,6 +124,11 @@ __hidden extern int lxc_try_cmd(const char *name, const char *lxcpath); ...@@ -122,6 +124,11 @@ __hidden extern int lxc_try_cmd(const char *name, const char *lxcpath);
__hidden extern int lxc_cmd_console_log(const char *name, const char *lxcpath, __hidden extern int lxc_cmd_console_log(const char *name, const char *lxcpath,
struct lxc_console_log *log); struct lxc_console_log *log);
__hidden extern int lxc_cmd_get_seccomp_notify_fd(const char *name, const char *lxcpath); __hidden extern int lxc_cmd_get_seccomp_notify_fd(const char *name, const char *lxcpath);
__hidden extern int lxc_cmd_get_cgroup_ctx(const char *name, const char *lxcpath,
const char *controller, bool batch,
size_t size_ret_ctx,
struct cgroup_ctx *ret_ctx)
__access_r(6, 5);
__hidden extern int lxc_cmd_seccomp_notify_add_listener(const char *name, const char *lxcpath, int fd, __hidden extern int lxc_cmd_seccomp_notify_add_listener(const char *name, const char *lxcpath, int fd,
/* unused */ unsigned int command, /* unused */ unsigned int command,
/* unused */ unsigned int flags); /* unused */ unsigned int flags);
......
...@@ -1509,8 +1509,10 @@ int lxc_setup_devpts_parent(struct lxc_handler *handler) ...@@ -1509,8 +1509,10 @@ int lxc_setup_devpts_parent(struct lxc_handler *handler)
if (handler->conf->pty_max <= 0) if (handler->conf->pty_max <= 0)
return 0; return 0;
ret = lxc_abstract_unix_recv_fds(handler->data_sock[1], &handler->conf->devpts_fd, 1, ret = lxc_abstract_unix_recv_one_fd(handler->data_sock[1],
&handler->conf->devpts_fd, sizeof(handler->conf->devpts_fd)); &handler->conf->devpts_fd,
&handler->conf->devpts_fd,
sizeof(handler->conf->devpts_fd));
if (ret < 0) if (ret < 0)
return log_error_errno(-1, errno, "Failed to receive devpts fd from child"); return log_error_errno(-1, errno, "Failed to receive devpts fd from child");
......
...@@ -530,6 +530,14 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ ...@@ -530,6 +530,14 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \
__internal_ret__; \ __internal_ret__; \
}) })
#define syswarn_set(__ret__, format, ...) \
({ \
typeof(__ret__) __internal_ret__ = (__ret__); \
errno = abs(__ret__); \
SYSWARN(format, ##__VA_ARGS__); \
__internal_ret__; \
})
#define log_error(__ret__, format, ...) \ #define log_error(__ret__, format, ...) \
({ \ ({ \
typeof(__ret__) __internal_ret__ = (__ret__); \ typeof(__ret__) __internal_ret__ = (__ret__); \
......
...@@ -737,4 +737,12 @@ static inline int PTR_RET(const void *ptr) ...@@ -737,4 +737,12 @@ static inline int PTR_RET(const void *ptr)
return 0; return 0;
} }
#define min(x, y) \
({ \
typeof(x) _min1 = (x); \
typeof(y) _min2 = (y); \
(void)(&_min1 == &_min2); \
_min1 < _min2 ? _min1 : _min2; \
})
#endif /* __LXC_MACRO_H */ #endif /* __LXC_MACRO_H */
...@@ -101,7 +101,7 @@ static inline void *memdup(const void *data, size_t len) ...@@ -101,7 +101,7 @@ static inline void *memdup(const void *data, size_t len)
({ \ ({ \
if (a >= 0 && a != b) \ if (a >= 0 && a != b) \
close(a); \ close(a); \
if (close >= 0) \ if (b >= 0) \
close(b); \ close(b); \
a = b = -EBADF; \ a = b = -EBADF; \
}) })
......
...@@ -1637,9 +1637,9 @@ int lxc_seccomp_recv_notifier_fd(struct lxc_seccomp *seccomp, int socket_fd) ...@@ -1637,9 +1637,9 @@ int lxc_seccomp_recv_notifier_fd(struct lxc_seccomp *seccomp, int socket_fd)
if (seccomp->notifier.wants_supervision) { if (seccomp->notifier.wants_supervision) {
int ret; int ret;
ret = lxc_abstract_unix_recv_fds(socket_fd, ret = lxc_abstract_unix_recv_one_fd(socket_fd,
&seccomp->notifier.notify_fd, &seccomp->notifier.notify_fd,
1, NULL, 0); NULL, 0);
if (ret < 0) if (ret < 0)
return -1; return -1;
} }
......
...@@ -1041,7 +1041,7 @@ static int do_start(void *data) ...@@ -1041,7 +1041,7 @@ static int do_start(void *data)
lxc_sync_fini_parent(handler); lxc_sync_fini_parent(handler);
if (lxc_abstract_unix_recv_fds(data_sock1, &status_fd, 1, NULL, 0) < 0) { if (lxc_abstract_unix_recv_one_fd(data_sock1, &status_fd, NULL, 0) < 0) {
ERROR("Failed to receive status file descriptor to child process"); ERROR("Failed to receive status file descriptor to child process");
goto out_warn_father; goto out_warn_father;
} }
...@@ -1460,7 +1460,7 @@ static int lxc_recv_ttys_from_child(struct lxc_handler *handler) ...@@ -1460,7 +1460,7 @@ static int lxc_recv_ttys_from_child(struct lxc_handler *handler)
for (i = 0; i < conf->ttys.max; i++) { for (i = 0; i < conf->ttys.max; i++) {
int ttyfds[2]; int ttyfds[2];
ret = lxc_abstract_unix_recv_fds(sock, ttyfds, 2, NULL, 0); ret = lxc_abstract_unix_recv_two_fds(sock, ttyfds);
if (ret < 0) if (ret < 0)
break; break;
...@@ -1888,7 +1888,7 @@ static int lxc_spawn(struct lxc_handler *handler) ...@@ -1888,7 +1888,7 @@ static int lxc_spawn(struct lxc_handler *handler)
} }
} }
cgroup_ops->payload_finalize(cgroup_ops); cgroup_ops->finalize(cgroup_ops);
TRACE("Finished setting up cgroups"); TRACE("Finished setting up cgroups");
if (handler->ns_unshare_flags & CLONE_NEWTIME) { if (handler->ns_unshare_flags & CLONE_NEWTIME) {
......
...@@ -36,7 +36,7 @@ static const char *const strstate[] = { ...@@ -36,7 +36,7 @@ static const char *const strstate[] = {
const char *lxc_state2str(lxc_state_t state) const char *lxc_state2str(lxc_state_t state)
{ {
if (state < STOPPED || state > MAX_STATE - 1) if (state < STOPPED || state > MAX_STATE - 1)
return NULL; return "INVALID STATE";
return strstate[state]; return strstate[state];
} }
......
...@@ -245,4 +245,30 @@ __hidden extern int safe_mount_beneath_at(int beneat_fd, const char *src, const ...@@ -245,4 +245,30 @@ __hidden extern int safe_mount_beneath_at(int beneat_fd, const char *src, const
const char *fstype, unsigned int flags, const void *data); const char *fstype, unsigned int flags, const void *data);
__hidden __lxc_unused int print_r(int fd, const char *path); __hidden __lxc_unused int print_r(int fd, const char *path);
static inline int copy_struct_from_client(__u32 server_size, void *dst,
__u32 client_size, const void *src)
{
__u32 size = min(server_size, client_size);
__u32 rest = min(server_size, client_size) - size;
/* Deal with trailing bytes. */
if (client_size < server_size) {
memset(dst + size, 0, rest);
} else if (client_size > server_size) {
/* TODO: Actually come up with a nice way to test for 0. */
return 0;
}
memcpy(dst, src, size);
return 0;
}
static inline __u32 copy_struct_to_client(__u32 client_size, void *dst,
__u32 server_size, const void *src)
{
__u32 size = min(server_size, client_size);
memcpy(dst, src, size);
return size;
}
#endif /* __LXC_UTILS_H */ #endif /* __LXC_UTILS_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment