Unverified Commit 8a7d02b7 by Stéphane Graber Committed by GitHub

Merge pull request #3207 from brauner/cgroup2_improvements_2

cgroups: improve container cgroup attaching
parents c10ac1b1 9994db51
...@@ -1230,16 +1230,21 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, ...@@ -1230,16 +1230,21 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function,
/* Attach to cgroup, if requested. */ /* Attach to cgroup, if requested. */
if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) { if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
struct cgroup_ops *cgroup_ops; /*
* If this is the unified hierarchy cgroup_attach() is
* enough.
*/
ret = cgroup_attach(name, lxcpath, pid);
if (ret) {
__do_cgroup_exit struct cgroup_ops *cgroup_ops = NULL;
cgroup_ops = cgroup_init(conf); cgroup_ops = cgroup_init(conf);
if (!cgroup_ops) if (!cgroup_ops)
goto on_error; goto on_error;
if (!cgroup_ops->attach(cgroup_ops, name, lxcpath, pid)) if (!cgroup_ops->attach(cgroup_ops, name, lxcpath, pid))
goto on_error; goto on_error;
}
cgroup_exit(cgroup_ops);
TRACE("Moved intermediate process %d into container's cgroups", pid); TRACE("Moved intermediate process %d into container's cgroups", pid);
} }
......
...@@ -2202,38 +2202,14 @@ static inline char *build_full_cgpath_from_monitorpath(struct hierarchy *h, ...@@ -2202,38 +2202,14 @@ static inline char *build_full_cgpath_from_monitorpath(struct hierarchy *h,
return must_make_path(h->mountpoint, inpath, filename, NULL); return must_make_path(h->mountpoint, inpath, filename, NULL);
} }
/* Technically, we're always at a delegation boundary here (This is especially static int cgroup_attach_leaf(int unified_fd, int64_t pid)
* true when cgroup namespaces are available.). The reasoning is that in order
* for us to have been able to start a container in the first place the root
* cgroup must have been a leaf node. Now, either the container's init system
* has populated the cgroup and kept it as a leaf node or it has created
* subtrees. In the former case we will simply attach to the leaf node we
* created when we started the container in the latter case we create our own
* cgroup for the attaching process.
*/
static int __cg_unified_attach(const struct hierarchy *h, const char *name,
const char *lxcpath, const char *pidstr,
size_t pidstr_len, const char *controller)
{ {
__do_close_prot_errno int unified_fd = -EBADF;
int idx = 0; int idx = 0;
int ret; int ret;
char pidstr[INTTYPE_TO_STRLEN(int64_t) + 1];
size_t pidstr_len;
unified_fd = lxc_cmd_get_cgroup2_fd(name, lxcpath); pidstr_len = sprintf(pidstr, INT64_FMT, pid);
if (unified_fd < 0) {
__do_free char *base_path = NULL, *container_cgroup = NULL;
container_cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
/* not running */
if (!container_cgroup)
return 0;
base_path = must_make_path(h->mountpoint, container_cgroup, NULL);
unified_fd = open(base_path, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
}
if (unified_fd < 0)
return -1;
ret = lxc_writeat(unified_fd, "cgroup.procs", pidstr, pidstr_len); ret = lxc_writeat(unified_fd, "cgroup.procs", pidstr, pidstr_len);
if (ret == 0) if (ret == 0)
return 0; return 0;
...@@ -2275,6 +2251,51 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name, ...@@ -2275,6 +2251,51 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name,
return -1; return -1;
} }
int cgroup_attach(const char *name, const char *lxcpath, int64_t pid)
{
__do_close_prot_errno int unified_fd = -EBADF;
unified_fd = lxc_cmd_get_cgroup2_fd(name, lxcpath);
if (unified_fd < 0)
return -1;
return cgroup_attach_leaf(unified_fd, pid);
}
/* Technically, we're always at a delegation boundary here (This is especially
* true when cgroup namespaces are available.). The reasoning is that in order
* for us to have been able to start a container in the first place the root
* cgroup must have been a leaf node. Now, either the container's init system
* has populated the cgroup and kept it as a leaf node or it has created
* subtrees. In the former case we will simply attach to the leaf node we
* created when we started the container in the latter case we create our own
* cgroup for the attaching process.
*/
static int __cg_unified_attach(const struct hierarchy *h, const char *name,
const char *lxcpath, pid_t pid,
const char *controller)
{
__do_close_prot_errno int unified_fd = -EBADF;
int ret;
ret = cgroup_attach(name, lxcpath, pid);
if (ret < 0) {
__do_free char *path = NULL, *cgroup = NULL;
cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
/* not running */
if (!cgroup)
return 0;
path = must_make_path(h->mountpoint, cgroup, NULL);
unified_fd = open(path, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
}
if (unified_fd < 0)
return -1;
return cgroup_attach_leaf(unified_fd, pid);
}
__cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, const char *name, __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, const char *name,
const char *lxcpath, pid_t pid) const char *lxcpath, pid_t pid)
{ {
...@@ -2293,7 +2314,7 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, const char *name, ...@@ -2293,7 +2314,7 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, const char *name,
struct hierarchy *h = ops->hierarchies[i]; struct hierarchy *h = ops->hierarchies[i];
if (h->version == CGROUP2_SUPER_MAGIC) { if (h->version == CGROUP2_SUPER_MAGIC) {
ret = __cg_unified_attach(h, name, lxcpath, pidstr, len, ret = __cg_unified_attach(h, name, lxcpath, pid,
h->controllers[0]); h->controllers[0]);
if (ret < 0) if (ret < 0)
return false; return false;
...@@ -2750,10 +2771,8 @@ __cgfsng_ops bool cgfsng_devices_activate(struct cgroup_ops *ops, ...@@ -2750,10 +2771,8 @@ __cgfsng_ops bool cgfsng_devices_activate(struct cgroup_ops *ops,
struct lxc_list *it; struct lxc_list *it;
struct bpf_program *devices_old; struct bpf_program *devices_old;
if (!unified) if (!unified || !unified->bpf_device_controller ||
return false; !unified->container_full_path || lxc_list_empty(&conf->devices))
if (lxc_list_empty(&conf->devices))
return true; return true;
devices = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE); devices = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
......
...@@ -192,6 +192,8 @@ static inline void __auto_cgroup_exit__(struct cgroup_ops **ops) ...@@ -192,6 +192,8 @@ static inline void __auto_cgroup_exit__(struct cgroup_ops **ops)
cgroup_exit(*ops); cgroup_exit(*ops);
} }
extern int cgroup_attach(const char *name, const char *lxcpath, int64_t pid);
#define __do_cgroup_exit __attribute__((__cleanup__(__auto_cgroup_exit__))) #define __do_cgroup_exit __attribute__((__cleanup__(__auto_cgroup_exit__)))
#endif #endif
...@@ -131,19 +131,15 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd) ...@@ -131,19 +131,15 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd)
*/ */
static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd) static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
{ {
int ret, rspfd; __do_close_prot_errno int fd_rsp = -EBADF;
int ret;
struct lxc_cmd_rsp *rsp = &cmd->rsp; struct lxc_cmd_rsp *rsp = &cmd->rsp;
ret = lxc_abstract_unix_recv_fds(sock, &rspfd, 1, rsp, sizeof(*rsp)); ret = lxc_abstract_unix_recv_fds(sock, &fd_rsp, 1, rsp, sizeof(*rsp));
if (ret < 0) { if (ret < 0)
SYSWARN("Failed to receive response for command \"%s\"", return log_warn_errno(-1,
lxc_cmd_str(cmd->req.cmd)); errno, "Failed to receive response for command \"%s\"",
lxc_cmd_str(cmd->req.cmd));
if (errno == ECONNRESET)
return -1;
return -1;
}
TRACE("Command \"%s\" received response", lxc_cmd_str(cmd->req.cmd)); TRACE("Command \"%s\" received response", lxc_cmd_str(cmd->req.cmd));
if (cmd->req.cmd == LXC_CMD_CONSOLE) { if (cmd->req.cmd == LXC_CMD_CONSOLE) {
...@@ -156,33 +152,31 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd) ...@@ -156,33 +152,31 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
return 0; return 0;
rspdata = malloc(sizeof(*rspdata)); rspdata = malloc(sizeof(*rspdata));
if (!rspdata) { if (!rspdata)
errno = ENOMEM; return log_warn_errno(-1,
ERROR("Failed to allocate response buffer for command \"%s\"", ENOMEM, "Failed to receive response for command \"%s\"",
lxc_cmd_str(cmd->req.cmd)); lxc_cmd_str(cmd->req.cmd));
return -1;
}
rspdata->masterfd = rspfd; rspdata->masterfd = move_fd(fd_rsp);
rspdata->ttynum = PTR_TO_INT(rsp->data); rspdata->ttynum = PTR_TO_INT(rsp->data);
rsp->data = rspdata; rsp->data = rspdata;
} }
if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD) if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD) {
rsp->data = INT_TO_PTR(rspfd); int cgroup2_fd = move_fd(fd_rsp);
rsp->data = INT_TO_PTR(cgroup2_fd);
if (rsp->datalen == 0) {
DEBUG("Response data length for command \"%s\" is 0",
lxc_cmd_str(cmd->req.cmd));
return ret;
} }
if (rsp->datalen == 0)
return log_debug(ret,
"Response data length for command \"%s\" is 0",
lxc_cmd_str(cmd->req.cmd));
if ((rsp->datalen > LXC_CMD_DATA_MAX) && if ((rsp->datalen > LXC_CMD_DATA_MAX) &&
(cmd->req.cmd != LXC_CMD_CONSOLE_LOG)) { (cmd->req.cmd != LXC_CMD_CONSOLE_LOG))
ERROR("Response data for command \"%s\" is too long: %d bytes > %d", return log_error(-1, "Response data for command \"%s\" is too long: %d bytes > %d",
lxc_cmd_str(cmd->req.cmd), rsp->datalen, LXC_CMD_DATA_MAX); lxc_cmd_str(cmd->req.cmd), rsp->datalen,
return -1; LXC_CMD_DATA_MAX);
}
if (cmd->req.cmd == LXC_CMD_CONSOLE_LOG) { if (cmd->req.cmd == LXC_CMD_CONSOLE_LOG) {
rsp->data = malloc(rsp->datalen + 1); rsp->data = malloc(rsp->datalen + 1);
...@@ -190,19 +184,16 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd) ...@@ -190,19 +184,16 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
} else { } else {
rsp->data = malloc(rsp->datalen); rsp->data = malloc(rsp->datalen);
} }
if (!rsp->data) { if (!rsp->data)
errno = ENOMEM; return log_error_errno(-1,
ERROR("Failed to allocate response buffer for command \"%s\"", ENOMEM, "Failed to allocate response buffer for command \"%s\"",
lxc_cmd_str(cmd->req.cmd)); lxc_cmd_str(cmd->req.cmd));
return -1;
}
ret = lxc_recv_nointr(sock, rsp->data, rsp->datalen, 0); ret = lxc_recv_nointr(sock, rsp->data, rsp->datalen, 0);
if (ret != rsp->datalen) { if (ret != rsp->datalen)
SYSERROR("Failed to receive response data for command \"%s\"", return log_error_errno(-1,
lxc_cmd_str(cmd->req.cmd)); errno, "Failed to receive response data for command \"%s\"",
return -1; lxc_cmd_str(cmd->req.cmd));
}
return ret; return ret;
} }
...@@ -1305,8 +1296,11 @@ int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath) ...@@ -1305,8 +1296,11 @@ int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath)
}; };
ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL); ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
if (ret <= 0 || cmd.rsp.ret < 0) if (ret < 0)
return error_log_errno(errno, "Failed to retrieve cgroup2 fd"); return -1;
if (cmd.rsp.ret < 0)
return log_debug_errno(-1, errno, "Failed to receive cgroup2 fd");
return PTR_TO_INT(cmd.rsp.data); return PTR_TO_INT(cmd.rsp.data);
} }
...@@ -1361,10 +1355,9 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req, ...@@ -1361,10 +1355,9 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
[LXC_CMD_GET_CGROUP2_FD] = lxc_cmd_get_cgroup2_fd_callback, [LXC_CMD_GET_CGROUP2_FD] = lxc_cmd_get_cgroup2_fd_callback,
}; };
if (req->cmd >= LXC_CMD_MAX) { if (req->cmd >= LXC_CMD_MAX)
ERROR("Undefined command id %d", req->cmd); return log_error_errno(-1, ENOENT, "Undefined command id %d", req->cmd);
return -1;
}
return cb[req->cmd](fd, req, handler, descr); return cb[req->cmd](fd, req, handler, descr);
} }
......
...@@ -531,6 +531,25 @@ ATTR_UNUSED static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ ...@@ -531,6 +531,25 @@ ATTR_UNUSED static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \
__ret__; \ __ret__; \
}) })
#define log_warn_errno(__ret__, __errno__, format, ...) \
({ \
errno = __errno__; \
SYSWARN(format, ##__VA_ARGS__); \
__ret__; \
})
#define log_debug(__ret__, format, ...) \
({ \
DEBUG(format, ##__VA_ARGS__); \
__ret__; \
})
#define log_debug_errno(__ret__, __errno__, format, ...) \
({ \
SYSDEBUG(format, ##__VA_ARGS__); \
__ret__; \
})
extern int lxc_log_fd; extern int lxc_log_fd;
extern int lxc_log_syslog(int facility); extern int lxc_log_syslog(int facility);
......
...@@ -21,6 +21,10 @@ ...@@ -21,6 +21,10 @@
#ifndef __LXC_MACRO_H #ifndef __LXC_MACRO_H
#define __LXC_MACRO_H #define __LXC_MACRO_H
#ifndef _GNU_SOURCE
#define _GNU_SOURCE 1
#endif
#define __STDC_FORMAT_MACROS
#include <asm/types.h> #include <asm/types.h>
#include <limits.h> #include <limits.h>
#include <linux/if_link.h> #include <linux/if_link.h>
...@@ -39,6 +43,8 @@ ...@@ -39,6 +43,8 @@
#define PATH_MAX 4096 #define PATH_MAX 4096
#endif #endif
#define INT64_FMT "%" PRId64
/* Define __S_ISTYPE if missing from the C library. */ /* Define __S_ISTYPE if missing from the C library. */
#ifndef __S_ISTYPE #ifndef __S_ISTYPE
#define __S_ISTYPE(mode, mask) (((mode)&S_IFMT) == (mask)) #define __S_ISTYPE(mode, mask) (((mode)&S_IFMT) == (mask))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment