commands: add LXC_CMD_GET_CGROUP_FD and LXC_CMD_GET_LIMIT_CGROUP_FD

and port cgroup_get() and cgroup_set(). This means no more useless cgroup driver initialization on ever get or set. Signed-off-by: 's avatarChristian Brauner <christian.brauner@ubuntu.com>
parent 5d8caeb9
......@@ -104,7 +104,7 @@ static bool string_in_list(char **list, const char *entry)
/* Given a handler's cgroup data, return the struct hierarchy for the controller
* @c, or NULL if there is none.
*/
static struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *controller)
static struct hierarchy *get_hierarchy(const struct cgroup_ops *ops, const char *controller)
{
if (!ops->hierarchies)
return log_trace_errno(NULL, errno, "There are no useable cgroup controllers");
......@@ -148,6 +148,38 @@ static struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *contr
return ret_set_errno(NULL, ENOENT);
}
int prepare_cgroup_fd(const struct cgroup_ops *ops, struct cgroup_fd *fd, bool limit)
{
int dfd;
const struct hierarchy *h;
h = get_hierarchy(ops, fd->controller);
if (!h)
return ret_errno(ENOENT);
/*
* The client requested that the controller must be in a specific
* cgroup version.
*/
if (fd->type != 0 && fd->type != h->fs_type)
return ret_errno(EINVAL);
if (limit)
dfd = h->dfd_con;
else
dfd = h->dfd_lim;
if (dfd < 0)
return ret_errno(EBADF);
fd->layout = ops->cgroup_layout;
fd->type = h->fs_type;
if (fd->type == UNIFIED_HIERARCHY)
fd->utilities = h->utilities;
fd->fd = dfd;
return 0;
}
/* Taken over modified from the kernel sources. */
#define NBITS 32 /* bits in uint32_t */
#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
......@@ -3442,55 +3474,105 @@ int cgroup_attach(const struct lxc_conf *conf, const char *name,
}
/* Connects to command socket therefore isn't callable from command handler. */
int cgroup_get(const char *name, const char *lxcpath,
const char *filename, char *buf, size_t len)
int cgroup_get(const char *name, const char *lxcpath, const char *key, char *buf, size_t len)
{
__do_close int unified_fd = -EBADF;
ssize_t ret;
__do_close int dfd = -EBADF;
struct cgroup_fd fd = {
.fd = -EBADF,
};
size_t len_controller;
int ret;
if (is_empty_string(filename) || is_empty_string(name) ||
is_empty_string(lxcpath))
if (is_empty_string(name) || is_empty_string(lxcpath) ||
is_empty_string(key))
return ret_errno(EINVAL);
if ((buf && !len) || (len && !buf))
return ret_errno(EINVAL);
unified_fd = lxc_cmd_get_limit_cgroup2_fd(name, lxcpath);
if (unified_fd < 0)
return ret_errno(ENOSYS);
len_controller = strcspn(key, ".");
len_controller++; /* Don't forget the \0 byte. */
if (len_controller >= MAX_CGROUP_ROOT_NAMELEN)
return ret_errno(EINVAL);
(void)strlcpy(fd.controller, key, len_controller);
ret = lxc_read_try_buf_at(unified_fd, filename, buf, len);
if (ret < 0)
SYSERROR("Failed to read cgroup value");
ret = lxc_cmd_get_limit_cgroup_fd(name, lxcpath, sizeof(struct cgroup_fd), &fd);
if (ret < 0) {
if (!ERRNO_IS_NOT_SUPPORTED(ret))
return ret;
dfd = lxc_cmd_get_limit_cgroup2_fd(name, lxcpath);
if (dfd < 0) {
if (!ERRNO_IS_NOT_SUPPORTED(ret))
return ret;
return ret_errno(ENOSYS);
}
fd.type = UNIFIED_HIERARCHY;
fd.fd = move_fd(dfd);
}
dfd = move_fd(fd.fd);
TRACE("Reading %s from %s cgroup hierarchy", key, cgroup_hierarchy_name(fd.type));
if (fd.type == UNIFIED_HIERARCHY && strequal(fd.controller, "devices"))
return ret_errno(EOPNOTSUPP);
else
ret = lxc_read_try_buf_at(dfd, key, buf, len);
return ret;
}
/* Connects to command socket therefore isn't callable from command handler. */
int cgroup_set(const char *name, const char *lxcpath,
const char *filename, const char *value)
int cgroup_set(const char *name, const char *lxcpath, const char *key, const char *value)
{
__do_close int unified_fd = -EBADF;
ssize_t ret;
__do_close int dfd = -EBADF;
struct cgroup_fd fd = {
.fd = -EBADF,
};
size_t len_controller;
int ret;
if (is_empty_string(filename) || is_empty_string(value) ||
is_empty_string(name) || is_empty_string(lxcpath))
if (is_empty_string(name) || is_empty_string(lxcpath) ||
is_empty_string(key) || is_empty_string(value))
return ret_errno(EINVAL);
unified_fd = lxc_cmd_get_limit_cgroup2_fd(name, lxcpath);
if (unified_fd < 0)
return ret_errno(ENOSYS);
len_controller = strcspn(key, ".");
len_controller++; /* Don't forget the \0 byte. */
if (len_controller >= MAX_CGROUP_ROOT_NAMELEN)
return ret_errno(EINVAL);
(void)strlcpy(fd.controller, key, len_controller);
ret = lxc_cmd_get_limit_cgroup_fd(name, lxcpath, sizeof(struct cgroup_fd), &fd);
if (ret < 0) {
if (!ERRNO_IS_NOT_SUPPORTED(ret))
return ret;
dfd = lxc_cmd_get_limit_cgroup2_fd(name, lxcpath);
if (dfd < 0) {
if (!ERRNO_IS_NOT_SUPPORTED(ret))
return ret;
if (strnequal(filename, "devices.", STRLITERALLEN("devices."))) {
return ret_errno(ENOSYS);
}
fd.type = UNIFIED_HIERARCHY;
fd.fd = move_fd(dfd);
}
dfd = move_fd(fd.fd);
TRACE("Setting %s to %s in %s cgroup hierarchy", key, value, cgroup_hierarchy_name(fd.type));
if (fd.type == UNIFIED_HIERARCHY && strequal(fd.controller, "devices")) {
struct device_item device = {};
ret = device_cgroup_rule_parse(&device, filename, value);
ret = device_cgroup_rule_parse(&device, key, value);
if (ret < 0)
return log_error_errno(-1, EINVAL, "Failed to parse device string %s=%s", filename, value);
return log_error_errno(-1, EINVAL, "Failed to parse device string %s=%s",
key, value);
ret = lxc_cmd_add_bpf_device_cgroup(name, lxcpath, &device);
} else {
ret = lxc_writeat(unified_fd, filename, value, strlen(value));
ret = lxc_writeat(dfd, key, value, strlen(value));
}
return ret;
......
......@@ -52,16 +52,41 @@ static inline const char *cgroup_layout_name(cgroup_layout_t layout)
}
typedef enum {
LEGACY_HIERARCHY = CGROUP_SUPER_MAGIC,
LEGACY_HIERARCHY = CGROUP_SUPER_MAGIC,
UNIFIED_HIERARCHY = CGROUP2_SUPER_MAGIC,
} cgroupfs_type_magic_t;
static inline const char *cgroup_hierarchy_name(cgroupfs_type_magic_t type)
{
switch (type) {
case LEGACY_HIERARCHY:
return "legacy";
case UNIFIED_HIERARCHY:
return "unified";
}
return "unknown";
}
#define DEVICES_CONTROLLER (1U << 0)
#define FREEZER_CONTROLLER (1U << 1)
/*
* This is the maximum length of a cgroup controller in the kernel.
* This includes the \0 byte.
*/
#define MAX_CGROUP_ROOT_NAMELEN 64
/* That's plenty of hierarchies. */
#define CGROUP_CTX_MAX_FD 20
// BUILD_BUG_ON(CGROUP_CTX_MAX_FD > KERNEL_SCM_MAX_FD);
struct cgroup_fd {
__s32 layout;
__u32 utilities;
__s32 type;
__s32 fd;
char controller[MAX_CGROUP_ROOT_NAMELEN];
} __attribute__((aligned(8)));
struct cgroup_ctx {
__s32 layout;
......@@ -259,9 +284,9 @@ define_cleanup_function(struct cgroup_ops *, cgroup_exit);
__hidden extern int cgroup_attach(const struct lxc_conf *conf, const char *name,
const char *lxcpath, pid_t pid);
__hidden extern int cgroup_get(const char *name, const char *lxcpath,
const char *filename, char *buf, size_t len);
const char *key, char *buf, size_t len);
__hidden extern int cgroup_set(const char *name, const char *lxcpath,
const char *filename, const char *value);
const char *key, const char *value);
__hidden extern int cgroup_freeze(const char *name, const char *lxcpath, int timeout);
__hidden extern int cgroup_unfreeze(const char *name, const char *lxcpath, int timeout);
__hidden extern int __cgroup_unfreeze(int unified_fd, int timeout);
......@@ -317,5 +342,7 @@ static inline int prepare_cgroup_ctx(struct cgroup_ops *ops,
return 0;
}
__hidden extern int prepare_cgroup_fd(const struct cgroup_ops *ops,
struct cgroup_fd *fd, bool limit);
#endif /* __LXC_CGROUP_H */
......@@ -112,6 +112,12 @@ static int __transfer_cgroup_ctx_fds(struct unix_fds *fds, struct cgroup_ctx *ct
return 0;
}
static int __transfer_cgroup_fd(struct unix_fds *fds, struct cgroup_fd *fd)
{
fd->fd = move_fd(fds->fd[0]);
return 0;
}
/*
* lxc_cmd_rsp_recv: Receive a response to a command
*
......@@ -131,13 +137,18 @@ static int __transfer_cgroup_ctx_fds(struct unix_fds *fds, struct cgroup_ctx *ct
*/
static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
{
__do_free void *__private_ptr = NULL;
struct lxc_cmd_console_rsp_data *data_console = NULL;
call_cleaner(put_unix_fds) struct unix_fds *fds = &(struct unix_fds){};
struct lxc_cmd_rsp *rsp = &cmd->rsp;
int cur_cmd = cmd->req.cmd;
int cur_cmd = cmd->req.cmd, fret = 0;
const char *cur_cmdstr;
int fret = 0;
int ret;
/*
* Determine whether this command will receive file descriptors and how
* many at most.
*/
cur_cmdstr = lxc_cmd_str(cur_cmd);
switch (cur_cmd) {
case LXC_CMD_GET_CGROUP_FD:
......@@ -164,87 +175,140 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
fds->fd_count_max = 0;
break;
}
/* Receive the first response including file descriptors if any. */
ret = lxc_abstract_unix_recv_fds(sock, fds, rsp, sizeof(*rsp));
if (ret < 0)
return syserrno(ret, "Failed to receive response for command \"%s\"", cur_cmdstr);
/*
* Verify that we actually received any file descriptors if the command
* expects to do so.
*/
if (fds->fd_count_max == 0) {
TRACE("Command \"%s\" received response with %u file descriptors", cur_cmdstr, fds->fd_count_ret);
WARN("Command \"%s\" received response", cur_cmdstr);
} else if (fds->fd_count_ret == 0) {
WARN("Command \"%s\" received response without expected file descriptors", cur_cmdstr);
TRACE("Command \"%s\" received response without any of the expected %u file descriptors", cur_cmdstr, fds->fd_count_max);
fret = -EBADF;
} else {
TRACE("Command \"%s\" received response with %u of %u expected file descriptors", cur_cmdstr, fds->fd_count_ret, fds->fd_count_max);
}
if (cur_cmd == LXC_CMD_CONSOLE) {
struct lxc_cmd_console_rsp_data *rspdata;
/* recv() returns 0 bytes when a tty cannot be allocated,
* rsp->ret is < 0 when the peer permission check failed
*/
if (ret == 0 || rsp->ret < 0)
return 0;
rspdata = malloc(sizeof(*rspdata));
if (!rspdata)
return syserrno_set(fret ?: -ENOMEM, "Failed to receive response for command \"%s\"", cur_cmdstr);
rspdata->ptxfd = move_fd(fds->fd[0]);
rspdata->ttynum = PTR_TO_INT(rsp->data);
rsp->data = rspdata;
}
/*
* Ensure that no excessive data is sent unless someone retrieves the
* console ringbuffer.
*/
if ((rsp->datalen > LXC_CMD_DATA_MAX) &&
(cur_cmd != LXC_CMD_CONSOLE_LOG))
return syserrno_set(fret ?: -E2BIG, "Response data for command \"%s\" is too long: %d bytes > %d",
cur_cmdstr, rsp->datalen, LXC_CMD_DATA_MAX);
/*
* Prepare buffer for any command that expects to receive additional
* data. Note that some don't want any additional data.
*/
switch (cur_cmd) {
case LXC_CMD_GET_CGROUP_FD:
case LXC_CMD_GET_CGROUP2_FD: /* no data */
__fallthrough;
case LXC_CMD_GET_LIMIT_CGROUP_FD:
case LXC_CMD_GET_LIMIT_CGROUP2_FD: /* no data */
__fallthrough;
case LXC_CMD_GET_CGROUP2_FD:
case LXC_CMD_GET_INIT_PIDFD: /* no data */
__fallthrough;
case LXC_CMD_GET_LIMIT_CGROUP2_FD:
case LXC_CMD_GET_DEVPTS_FD: /* no data */
__fallthrough;
case LXC_CMD_GET_INIT_PIDFD:
__fallthrough;
case LXC_CMD_GET_DEVPTS_FD:
case LXC_CMD_GET_SECCOMP_NOTIFY_FD: /* no data */
if (!fret)
rsp->data = INT_TO_PTR(move_fd(fds->fd[0]));
/* Return for any command that doesn't expect additional data. */
return log_debug(fret ?: ret, "Finished processing \"%s\" with file descriptor %d", cur_cmdstr, PTR_TO_INT(rsp->data));
case LXC_CMD_GET_CGROUP_FD: /* data */
__fallthrough;
case LXC_CMD_GET_SECCOMP_NOTIFY_FD:
rsp->data = INT_TO_PTR(move_fd(fds->fd[0]));
return log_debug(fret ?: ret, "Finished processing \"%s\"", cur_cmdstr);
case LXC_CMD_GET_CGROUP_CTX:
if ((rsp->datalen == 0) || (rsp->datalen > sizeof(struct cgroup_ctx)))
case LXC_CMD_GET_LIMIT_CGROUP_FD: /* data */
if (rsp->datalen > sizeof(struct cgroup_fd))
return syserrno_set(fret ?: -EINVAL, "Invalid response size from server for \"%s\"", cur_cmdstr);
/* Don't pointlessly allocate. */
rsp->data = (void *)cmd->req.data;
break;
default:
case LXC_CMD_GET_CGROUP_CTX: /* data */
if (rsp->datalen > sizeof(struct cgroup_ctx))
return syserrno_set(fret ?: -EINVAL, "Invalid response size from server for \"%s\"", cur_cmdstr);
/* Don't pointlessly allocate. */
rsp->data = (void *)cmd->req.data;
break;
}
case LXC_CMD_CONSOLE: /* data */
/*
* recv() returns 0 bytes when a tty cannot be allocated,
* rsp->ret is < 0 when the peer permission check failed
*/
if (ret == 0 || rsp->ret < 0)
return 0;
if (rsp->datalen == 0)
return log_debug(fret ?: ret, "Response data length for command \"%s\" is 0", cur_cmdstr);
__private_ptr = malloc(sizeof(struct lxc_cmd_console_rsp_data));
if (!__private_ptr)
return syserrno_set(fret ?: -ENOMEM, "Failed to receive response for command \"%s\"", cur_cmdstr);
data_console = (struct lxc_cmd_console_rsp_data *)__private_ptr;
data_console->ptxfd = move_fd(fds->fd[0]);
data_console->ttynum = PTR_TO_INT(rsp->data);
if ((rsp->datalen > LXC_CMD_DATA_MAX) &&
(cur_cmd != LXC_CMD_CONSOLE_LOG))
return syserrno_set(fret ?: -E2BIG, "Response data for command \"%s\" is too long: %d bytes > %d",
cur_cmdstr, rsp->datalen, LXC_CMD_DATA_MAX);
rsp->datalen = 0;
rsp->data = data_console;
break;
case LXC_CMD_CONSOLE_LOG: /* data */
__private_ptr = zalloc(rsp->datalen + 1);
rsp->data = __private_ptr;
break;
default: /* catch any additional command */
if (rsp->datalen > 0) {
__private_ptr = zalloc(rsp->datalen);
rsp->data = __private_ptr;
}
break;
}
if (cur_cmd == LXC_CMD_CONSOLE_LOG)
rsp->data = zalloc(rsp->datalen + 1);
else if (cur_cmd != LXC_CMD_GET_CGROUP_CTX)
rsp->data = malloc(rsp->datalen);
if (rsp->datalen == 0) {
DEBUG("Command \"%s\" requested no additional data", cur_cmdstr);
/*
* Note that LXC_CMD_CONSOLE historically allocates memory to
* return info to the caller. That's why we jump to no_data so
* we ensure that the allocated data is wiped if we return
* early here.
*/
goto no_data;
}
/*
* All commands ending up here expect data so rsp->data must be valid.
* Either static or allocated memory.
*/
if (!rsp->data)
return syserrno_set(fret ?: -ENOMEM, "Failed to allocate response buffer for command \"%s\"", cur_cmdstr);
return syserrno_set(fret ?: -ENOMEM, "Failed to prepare response buffer for command \"%s\"", cur_cmdstr);
ret = lxc_recv_nointr(sock, rsp->data, rsp->datalen, 0);
if (ret != rsp->datalen)
return syserrno(-errno, "Failed to receive response data for command \"%s\"", cur_cmdstr);
return syserrno(-errno, "Failed to receive response data for command \"%s\": %d != %d", cur_cmdstr, ret, rsp->datalen);
if (cur_cmd == LXC_CMD_GET_CGROUP_CTX) {
ret = __transfer_cgroup_ctx_fds(fds, rsp->data);
if (ret < 0)
return syserrno(ret, "Failed to transfer file descriptors for \"%s\"", cur_cmdstr);
switch (cur_cmd) {
case LXC_CMD_GET_CGROUP_CTX:
if (!fret)
ret = __transfer_cgroup_ctx_fds(fds, rsp->data);
/* Make sure any received fds are wiped by us. */
break;
case LXC_CMD_GET_CGROUP_FD:
__fallthrough;
case LXC_CMD_GET_LIMIT_CGROUP_FD:
if (!fret)
ret = __transfer_cgroup_fd(fds, rsp->data);
/* Make sure any received fds are wiped by us. */
break;
}
no_data:
if (!fret && ret >= 0)
move_ptr(__private_ptr);
return fret ?: ret;
}
......@@ -1583,12 +1647,14 @@ static int lxc_cmd_unfreeze_callback(int fd, struct lxc_cmd_req *req,
}
int lxc_cmd_get_cgroup_fd(const char *name, const char *lxcpath,
const char *controller, cgroupfs_type_magic_t type)
size_t size_ret_fd, struct cgroup_fd *ret_fd)
{
int ret, stopped;
struct lxc_cmd_rr cmd = {
.req = {
.cmd = LXC_CMD_GET_CGROUP_FD,
.datalen = sizeof(struct cgroup_fd),
.data = ret_fd,
},
.rsp = {
ret = -ENOSYS,
......@@ -1597,22 +1663,23 @@ int lxc_cmd_get_cgroup_fd(const char *name, const char *lxcpath,
ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
if (ret < 0)
return -1;
return log_debug_errno(-1, errno, "Failed to process cgroup fd command");
if (cmd.rsp.ret < 0)
return syserrno_set(cmd.rsp.ret, "Failed to receive cgroup fd");
return log_debug_errno(-EBADF, errno, "Failed to receive cgroup fd");
return PTR_TO_INT(cmd.rsp.data);
return 0;
}
int lxc_cmd_get_limit_cgroup_fd(const char *name, const char *lxcpath,
const char *controller,
cgroupfs_type_magic_t type)
size_t size_ret_fd, struct cgroup_fd *ret_fd)
{
int ret, stopped;
struct lxc_cmd_rr cmd = {
.req = {
.cmd = LXC_CMD_GET_LIMIT_CGROUP_FD,
.datalen = sizeof(struct cgroup_fd),
.data = ret_fd,
},
.rsp = {
ret = -ENOSYS,
......@@ -1621,39 +1688,44 @@ int lxc_cmd_get_limit_cgroup_fd(const char *name, const char *lxcpath,
ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
if (ret < 0)
return -1;
return log_debug_errno(-1, errno, "Failed to process limit cgroup fd command");
if (cmd.rsp.ret < 0)
return syserrno_set(cmd.rsp.ret, "Failed to receive cgroup fd");
return log_debug_errno(-EBADF, errno, "Failed to receive limit cgroup fd");
return PTR_TO_INT(cmd.rsp.data);
return 0;
}
static int __lxc_cmd_get_cgroup_fd_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr,
bool limiting_cgroup)
bool limit)
{
struct lxc_cmd_rsp rsp = {
.ret = -EINVAL,
};
struct cgroup_ops *ops = handler->cgroup_ops;
int send_fd;
struct cgroup_fd fd_server = {};
int ret;
if (!pure_unified_layout(ops) || !ops->unified)
ret = copy_struct_from_client(sizeof(struct cgroup_fd), &fd_server,
req->datalen, req->data);
if (ret < 0)
return lxc_cmd_rsp_send_reap(fd, &rsp);
/* FIXME */
send_fd = limiting_cgroup ? ops->unified->dfd_lim
: ops->unified->dfd_con;
if (strnlen(fd_server.controller, MAX_CGROUP_ROOT_NAMELEN) == 0)
return lxc_cmd_rsp_send_reap(fd, &rsp);
if (send_fd < 0) {
rsp.ret = -EBADF;
ret = prepare_cgroup_fd(ops, &fd_server, limit);
if (ret < 0) {
rsp.ret = ret;
return lxc_cmd_rsp_send_reap(fd, &rsp);
}
rsp.ret = 0;
return rsp_one_fd(fd, send_fd, &rsp);
rsp.data = &fd_server;
rsp.datalen = min(sizeof(struct cgroup_fd), (size_t)req->datalen);
return rsp_one_fd(fd, fd_server.fd, &rsp);
}
static int lxc_cmd_get_cgroup_fd_callback(int fd, struct lxc_cmd_req *req,
......
......@@ -140,7 +140,8 @@ __hidden extern int lxc_cmd_freeze(const char *name, const char *lxcpath, int ti
__hidden extern int lxc_cmd_unfreeze(const char *name, const char *lxcpath, int timeout);
__hidden extern int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath);
__hidden extern int lxc_cmd_get_cgroup_fd(const char *name, const char *lxcpath,
const char *controller, cgroupfs_type_magic_t type);
size_t size_ret_fd,
struct cgroup_fd *ret_fd);
__hidden extern char *lxc_cmd_get_limit_cgroup_path(const char *name,
const char *lxcpath,
const char *subsystem);
......@@ -148,8 +149,8 @@ __hidden extern int lxc_cmd_get_limit_cgroup2_fd(const char *name,
const char *lxcpath);
__hidden extern int lxc_cmd_get_limit_cgroup_fd(const char *name,
const char *lxcpath,
const char *controller,
cgroupfs_type_magic_t type);
size_t size_ret_fd,
struct cgroup_fd *ret_fd);
__hidden extern int lxc_cmd_get_devpts_fd(const char *name, const char *lxcpath);
#endif /* __commands_h */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment