cgfsng: rework cgroup2 attach

On pure unified systemd we can use a single file descriptor to interact with the cgroup filesystem. Add a method to retrieve it and as a start use it in our unified attach codepath. Signed-off-by: 's avatarChristian Brauner <christian.brauner@ubuntu.com>
parent 60a22a65
...@@ -1432,7 +1432,7 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, ...@@ -1432,7 +1432,7 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
struct lxc_handler *handler) struct lxc_handler *handler)
{ {
__do_free char *container_cgroup = NULL, *tmp = NULL; __do_free char *container_cgroup = NULL, *tmp = NULL;
int i; int i, ret;
size_t len; size_t len;
char *offset; char *offset;
int idx = 0; int idx = 0;
...@@ -1463,7 +1463,7 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, ...@@ -1463,7 +1463,7 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
do { do {
if (idx) { if (idx) {
int ret = snprintf(offset, 5, "-%d", idx); ret = snprintf(offset, 5, "-%d", idx);
if (ret < 0 || (size_t)ret >= 5) if (ret < 0 || (size_t)ret >= 5)
return false; return false;
} }
...@@ -1488,6 +1488,16 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, ...@@ -1488,6 +1488,16 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
INFO("The container process uses \"%s\" as cgroup", container_cgroup); INFO("The container process uses \"%s\" as cgroup", container_cgroup);
ops->container_cgroup = move_ptr(container_cgroup); ops->container_cgroup = move_ptr(container_cgroup);
if (ops->unified && ops->unified->container_full_path) {
ret = open(ops->unified->container_full_path,
O_DIRECTORY | O_RDONLY | O_CLOEXEC);
if (ret < 0)
return log_error_errno(false,
errno, "Failed to open file descriptor for unified hierarchy");
ops->unified_fd = ret;
}
return true; return true;
} }
...@@ -2205,61 +2215,64 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name, ...@@ -2205,61 +2215,64 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name,
const char *lxcpath, const char *pidstr, const char *lxcpath, const char *pidstr,
size_t pidstr_len, const char *controller) size_t pidstr_len, const char *controller)
{ {
__do_free char *base_path = NULL, *container_cgroup = NULL, __do_close_prot_errno int unified_fd = -EBADF;
*full_path = NULL; int idx = 0;
int ret; int ret;
size_t len;
int fret = -1, idx = 0;
container_cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller); unified_fd = lxc_cmd_get_cgroup2_fd(name, lxcpath);
/* not running */ if (unified_fd < 0) {
if (!container_cgroup) __do_free char *base_path = NULL, *container_cgroup = NULL;
return 0;
base_path = must_make_path(h->mountpoint, container_cgroup, NULL); container_cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
full_path = must_make_path(base_path, "cgroup.procs", NULL); /* not running */
/* cgroup is populated */ if (!container_cgroup)
ret = lxc_write_to_file(full_path, pidstr, pidstr_len, false, 0666); return 0;
if (ret < 0 && errno != EBUSY)
goto on_error;
base_path = must_make_path(h->mountpoint, container_cgroup, NULL);
unified_fd = open(base_path, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
}
if (unified_fd < 0)
return -1;
ret = lxc_writeat(unified_fd, "cgroup.procs", pidstr, pidstr_len);
if (ret == 0) if (ret == 0)
goto on_success; return 0;
/* this is a non-leaf node */
if (errno != EBUSY)
return error_log_errno(errno, "Failed to attach to unified cgroup");
len = strlen(base_path) + STRLITERALLEN("/lxc-1000") +
STRLITERALLEN("/cgroup-procs");
full_path = must_realloc(NULL, len + 1);
do { do {
char *slash;
char attach_cgroup[STRLITERALLEN("lxc-1000/cgroup.procs") + 1];
if (idx) if (idx)
ret = snprintf(full_path, len + 1, "%s/lxc-%d", ret = snprintf(attach_cgroup, sizeof(attach_cgroup),
base_path, idx); "lxc-%d/cgroup.procs", idx);
else else
ret = snprintf(full_path, len + 1, "%s/lxc", base_path); ret = snprintf(attach_cgroup, sizeof(attach_cgroup),
if (ret < 0 || (size_t)ret >= len + 1) "lxc/cgroup.procs");
goto on_error; if (ret < 0 || (size_t)ret >= sizeof(attach_cgroup))
return -1;
ret = mkdir_p(full_path, 0755); slash = &attach_cgroup[ret] - STRLITERALLEN("/cgroup.procs");
*slash = '\0';
ret = mkdirat(unified_fd, attach_cgroup, 0755);
if (ret < 0 && errno != EEXIST) if (ret < 0 && errno != EEXIST)
goto on_error; return error_log_errno(errno, "Failed to create cgroup %s", attach_cgroup);
(void)strlcat(full_path, "/cgroup.procs", len + 1); *slash = '/';
ret = lxc_write_to_file(full_path, pidstr, len, false, 0666); ret = lxc_writeat(unified_fd, attach_cgroup, pidstr, pidstr_len);
if (ret == 0) if (ret == 0)
goto on_success; return 0;
/* this is a non-leaf node */ /* this is a non-leaf node */
if (errno != EBUSY) if (errno != EBUSY)
goto on_error; return error_log_errno(errno, "Failed to attach to unified cgroup");
idx++; idx++;
} while (idx < 1000); } while (idx < 1000);
on_success: return -1;
if (idx < 1000)
fret = 0;
on_error:
return fret;
} }
__cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, const char *name, __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, const char *name,
...@@ -3145,6 +3158,8 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) ...@@ -3145,6 +3158,8 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
if (!cg_init(cgfsng_ops, conf)) if (!cg_init(cgfsng_ops, conf))
return NULL; return NULL;
cgfsng_ops->unified_fd = -EBADF;
cgfsng_ops->data_init = cgfsng_data_init; cgfsng_ops->data_init = cgfsng_data_init;
cgfsng_ops->payload_destroy = cgfsng_payload_destroy; cgfsng_ops->payload_destroy = cgfsng_payload_destroy;
cgfsng_ops->monitor_destroy = cgfsng_monitor_destroy; cgfsng_ops->monitor_destroy = cgfsng_monitor_destroy;
......
...@@ -90,6 +90,9 @@ void cgroup_exit(struct cgroup_ops *ops) ...@@ -90,6 +90,9 @@ void cgroup_exit(struct cgroup_ops *ops)
if (ops->cgroup2_devices) if (ops->cgroup2_devices)
bpf_program_free(ops->cgroup2_devices); bpf_program_free(ops->cgroup2_devices);
if (ops->unified_fd >= 0)
close(ops->unified_fd);
for (it = ops->hierarchies; it && *it; it++) { for (it = ops->hierarchies; it && *it; it++) {
char **p; char **p;
......
...@@ -121,6 +121,8 @@ struct cgroup_ops { ...@@ -121,6 +121,8 @@ struct cgroup_ops {
struct hierarchy **hierarchies; struct hierarchy **hierarchies;
/* Pointer to the unified hierarchy. Do not free! */ /* Pointer to the unified hierarchy. Do not free! */
struct hierarchy *unified; struct hierarchy *unified;
/* File descriptor to the container's cgroup. */
int unified_fd;
/* /*
* @cgroup2_devices * @cgroup2_devices
......
...@@ -103,6 +103,7 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd) ...@@ -103,6 +103,7 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd)
[LXC_CMD_ADD_BPF_DEVICE_CGROUP] = "add_bpf_device_cgroup", [LXC_CMD_ADD_BPF_DEVICE_CGROUP] = "add_bpf_device_cgroup",
[LXC_CMD_FREEZE] = "freeze", [LXC_CMD_FREEZE] = "freeze",
[LXC_CMD_UNFREEZE] = "unfreeze", [LXC_CMD_UNFREEZE] = "unfreeze",
[LXC_CMD_GET_CGROUP2_FD] = "get_cgroup2_fd",
}; };
if (cmd >= LXC_CMD_MAX) if (cmd >= LXC_CMD_MAX)
...@@ -167,6 +168,9 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd) ...@@ -167,6 +168,9 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
rsp->data = rspdata; rsp->data = rspdata;
} }
if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD)
rsp->data = INT_TO_PTR(rspfd);
if (rsp->datalen == 0) { if (rsp->datalen == 0) {
DEBUG("Response data length for command \"%s\" is 0", DEBUG("Response data length for command \"%s\" is 0",
lxc_cmd_str(cmd->req.cmd)); lxc_cmd_str(cmd->req.cmd));
...@@ -1321,6 +1325,44 @@ static int lxc_cmd_unfreeze_callback(int fd, struct lxc_cmd_req *req, ...@@ -1321,6 +1325,44 @@ static int lxc_cmd_unfreeze_callback(int fd, struct lxc_cmd_req *req,
return lxc_cmd_rsp_send(fd, &rsp); return lxc_cmd_rsp_send(fd, &rsp);
} }
int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath)
{
int ret, stopped;
struct lxc_cmd_rr cmd = {
.req = {
.cmd = LXC_CMD_GET_CGROUP2_FD,
},
};
ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
if (ret <= 0 || cmd.rsp.ret < 0)
return error_log_errno(errno, "Failed to retrieve cgroup2 fd");
return PTR_TO_INT(cmd.rsp.data);
}
static int lxc_cmd_get_cgroup2_fd_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
{
struct lxc_cmd_rsp rsp = {
.ret = -EINVAL,
};
struct cgroup_ops *ops = handler->cgroup_ops;
int ret;
if (ops->cgroup_layout != CGROUP_LAYOUT_UNIFIED)
return lxc_cmd_rsp_send(fd, &rsp);
rsp.ret = 0;
ret = lxc_abstract_unix_send_fds(fd, &ops->unified_fd, 1, &rsp,
sizeof(rsp));
if (ret < 0)
return log_error(1, "Failed to send cgroup2 fd");
return 0;
}
static int lxc_cmd_process(int fd, struct lxc_cmd_req *req, static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler, struct lxc_handler *handler,
struct lxc_epoll_descr *descr) struct lxc_epoll_descr *descr)
...@@ -1346,6 +1388,7 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req, ...@@ -1346,6 +1388,7 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
[LXC_CMD_ADD_BPF_DEVICE_CGROUP] = lxc_cmd_add_bpf_device_cgroup_callback, [LXC_CMD_ADD_BPF_DEVICE_CGROUP] = lxc_cmd_add_bpf_device_cgroup_callback,
[LXC_CMD_FREEZE] = lxc_cmd_freeze_callback, [LXC_CMD_FREEZE] = lxc_cmd_freeze_callback,
[LXC_CMD_UNFREEZE] = lxc_cmd_unfreeze_callback, [LXC_CMD_UNFREEZE] = lxc_cmd_unfreeze_callback,
[LXC_CMD_GET_CGROUP2_FD] = lxc_cmd_get_cgroup2_fd_callback,
}; };
if (req->cmd >= LXC_CMD_MAX) { if (req->cmd >= LXC_CMD_MAX) {
......
...@@ -50,6 +50,7 @@ typedef enum { ...@@ -50,6 +50,7 @@ typedef enum {
LXC_CMD_ADD_BPF_DEVICE_CGROUP, LXC_CMD_ADD_BPF_DEVICE_CGROUP,
LXC_CMD_FREEZE, LXC_CMD_FREEZE,
LXC_CMD_UNFREEZE, LXC_CMD_UNFREEZE,
LXC_CMD_GET_CGROUP2_FD,
LXC_CMD_MAX, LXC_CMD_MAX,
} lxc_cmd_t; } lxc_cmd_t;
...@@ -139,5 +140,6 @@ extern int lxc_cmd_add_bpf_device_cgroup(const char *name, const char *lxcpath, ...@@ -139,5 +140,6 @@ extern int lxc_cmd_add_bpf_device_cgroup(const char *name, const char *lxcpath,
struct device_item *device); struct device_item *device);
extern int lxc_cmd_freeze(const char *name, const char *lxcpath, int timeout); extern int lxc_cmd_freeze(const char *name, const char *lxcpath, int timeout);
extern int lxc_cmd_unfreeze(const char *name, const char *lxcpath, int timeout); extern int lxc_cmd_unfreeze(const char *name, const char *lxcpath, int timeout);
extern int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath);
#endif /* __commands_h */ #endif /* __commands_h */
...@@ -36,6 +36,22 @@ ...@@ -36,6 +36,22 @@
#include "string_utils.h" #include "string_utils.h"
#include "utils.h" #include "utils.h"
int lxc_writeat(int dirfd, const char *filename, const void *buf, size_t count)
{
__do_close_prot_errno int fd = -EBADF;
ssize_t ret;
fd = openat(dirfd, filename, O_WRONLY | O_CLOEXEC);
if (fd < 0)
return -1;
ret = lxc_write_nointr(fd, buf, count);
if (ret < 0 || (size_t)ret != count)
return -1;
return 0;
}
int lxc_write_to_file(const char *filename, const void *buf, size_t count, int lxc_write_to_file(const char *filename, const void *buf, size_t count,
bool add_newline, mode_t mode) bool add_newline, mode_t mode)
{ {
......
...@@ -32,6 +32,8 @@ ...@@ -32,6 +32,8 @@
/* read and write whole files */ /* read and write whole files */
extern int lxc_write_to_file(const char *filename, const void *buf, extern int lxc_write_to_file(const char *filename, const void *buf,
size_t count, bool add_newline, mode_t mode); size_t count, bool add_newline, mode_t mode);
extern int lxc_writeat(int dirfd, const char *filename, const void *buf,
size_t count);
extern int lxc_read_from_file(const char *filename, void *buf, size_t count); extern int lxc_read_from_file(const char *filename, void *buf, size_t count);
/* send and receive buffers completely */ /* send and receive buffers completely */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment