cgroups: rework cgroup tree creation

parent 6c880cdf
...@@ -859,6 +859,11 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops, ...@@ -859,6 +859,11 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
return; return;
} }
if (!ops->container_limit_cgroup) {
WARN("Uninitialized limit cgroup");
return;
}
#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX #ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
ret = bpf_program_cgroup_detach(handler->cgroup_ops->cgroup2_devices); ret = bpf_program_cgroup_detach(handler->cgroup_ops->cgroup2_devices);
if (ret < 0) if (ret < 0)
...@@ -1097,30 +1102,27 @@ static int __cgroup_tree_create(int dfd_base, const char *path, mode_t mode, ...@@ -1097,30 +1102,27 @@ static int __cgroup_tree_create(int dfd_base, const char *path, mode_t mode,
} }
static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf, static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
struct hierarchy *h, const char *cgroup_leaf, struct hierarchy *h, const char *cgroup_limit_dir,
const char *cgroup_limit_dir, bool payload) const char *cgroup_leaf, bool payload)
{ {
__do_close int fd_limit = -EBADF, fd_final = -EBADF; __do_close int fd_limit = -EBADF, fd_final = -EBADF;
__do_free char *path = NULL, *limit_path = NULL; __do_free char *path = NULL, *limit_path = NULL;
bool cpuset_v1 = false; bool cpuset_v1 = false;
/* Don't bother with all the rest if the final cgroup already exists. */
if (exists_dir_at(h->dfd_base, cgroup_leaf))
return syswarn(false, "The %d(%s) cgroup already existed", h->dfd_base, cgroup_leaf);
/* /*
* The legacy cpuset controller needs massaging in case inheriting * The legacy cpuset controller needs massaging in case inheriting
* settings from its immediate ancestor cgroup hasn't been turned on. * settings from its immediate ancestor cgroup hasn't been turned on.
*/ */
cpuset_v1 = !is_unified_hierarchy(h) && string_in_list(h->controllers, "cpuset"); cpuset_v1 = !is_unified_hierarchy(h) && string_in_list(h->controllers, "cpuset");
if (payload && cgroup_limit_dir) { if (payload && cgroup_leaf) {
/* With isolation both parts need to not already exist. */ /* With isolation both parts need to not already exist. */
fd_limit = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, cpuset_v1, false); fd_limit = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, cpuset_v1, false);
if (fd_limit < 0) if (fd_limit < 0)
return syserrno(false, "Failed to create limiting cgroup %d(%s)", h->dfd_base, cgroup_limit_dir); return syserrno(false, "Failed to create limiting cgroup %d(%s)", h->dfd_base, cgroup_limit_dir);
limit_path = must_make_path(h->mountpoint, h->container_base_path, cgroup_limit_dir, NULL); TRACE("Created limit cgroup %d->%d(%s)",
fd_limit, h->dfd_base, cgroup_limit_dir);
/* /*
* With isolation the devices legacy cgroup needs to be * With isolation the devices legacy cgroup needs to be
...@@ -1131,13 +1133,27 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf, ...@@ -1131,13 +1133,27 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
if (string_in_list(h->controllers, "devices") && if (string_in_list(h->controllers, "devices") &&
!ops->setup_limits_legacy(ops, conf, true)) !ops->setup_limits_legacy(ops, conf, true))
return log_error(false, "Failed to setup legacy device limits"); return log_error(false, "Failed to setup legacy device limits");
}
fd_final = __cgroup_tree_create(h->dfd_base, cgroup_leaf, 0755, cpuset_v1, false); limit_path = must_make_path(h->mountpoint, h->container_base_path, cgroup_limit_dir, NULL);
path = must_make_path(limit_path, cgroup_leaf, NULL);
/*
* If we use a separate limit cgroup, the leaf cgroup, i.e. the
* cgroup the container actually resides in, is below fd_limit.
*/
fd_final = __cgroup_tree_create(fd_limit, cgroup_leaf, 0755, cpuset_v1, false);
TRACE("Created container cgroup %d->%d(%s)",
fd_final, fd_limit, cgroup_leaf);
} else {
fd_final = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, cpuset_v1, false);
TRACE("Created %s cgroup %d->%d(%s)", payload ? "payload" : "monitor",
fd_final, h->dfd_base, cgroup_leaf);
path = must_make_path(h->mountpoint, h->container_base_path, cgroup_limit_dir, NULL);
}
if (fd_final < 0) if (fd_final < 0)
return syserrno(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir); return syserrno(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
path = must_make_path(h->mountpoint, h->container_base_path, cgroup_leaf, NULL);
if (payload) { if (payload) {
h->cgfd_con = move_fd(fd_final); h->cgfd_con = move_fd(fd_final);
h->container_full_path = move_ptr(path); h->container_full_path = move_ptr(path);
...@@ -1147,10 +1163,10 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf, ...@@ -1147,10 +1163,10 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
else else
h->cgfd_limit = move_fd(fd_limit); h->cgfd_limit = move_fd(fd_limit);
if (!limit_path) if (limit_path)
h->container_limit_path = h->container_full_path;
else
h->container_limit_path = move_ptr(limit_path); h->container_limit_path = move_ptr(limit_path);
else
h->container_limit_path = h->container_full_path;
} else { } else {
h->cgfd_mon = move_fd(fd_final); h->cgfd_mon = move_fd(fd_final);
h->monitor_full_path = move_ptr(path); h->monitor_full_path = move_ptr(path);
...@@ -1379,7 +1395,8 @@ __cgfsng_ops static bool cgfsng_monitor_create(struct cgroup_ops *ops, struct lx ...@@ -1379,7 +1395,8 @@ __cgfsng_ops static bool cgfsng_monitor_create(struct cgroup_ops *ops, struct lx
*/ */
__cgfsng_ops static bool cgfsng_payload_create(struct cgroup_ops *ops, struct lxc_handler *handler) __cgfsng_ops static bool cgfsng_payload_create(struct cgroup_ops *ops, struct lxc_handler *handler)
{ {
__do_free char *container_cgroup = NULL, *limiting_cgroup = NULL; __do_free char *container_cgroup = NULL, *__limit_cgroup = NULL;
char *limit_cgroup;
int idx = 0; int idx = 0;
int i; int i;
size_t len; size_t len;
...@@ -1404,23 +1421,26 @@ __cgfsng_ops static bool cgfsng_payload_create(struct cgroup_ops *ops, struct lx ...@@ -1404,23 +1421,26 @@ __cgfsng_ops static bool cgfsng_payload_create(struct cgroup_ops *ops, struct lx
return false; return false;
if (conf->cgroup_meta.container_dir) { if (conf->cgroup_meta.container_dir) {
limiting_cgroup = strdup(conf->cgroup_meta.container_dir); __limit_cgroup = strdup(conf->cgroup_meta.container_dir);
if (!limiting_cgroup) if (!__limit_cgroup)
return ret_set_errno(false, ENOMEM); return ret_set_errno(false, ENOMEM);
if (conf->cgroup_meta.namespace_dir) { if (conf->cgroup_meta.namespace_dir) {
container_cgroup = must_make_path(limiting_cgroup, container_cgroup = must_make_path(__limit_cgroup,
conf->cgroup_meta.namespace_dir, conf->cgroup_meta.namespace_dir,
NULL); NULL);
limit_cgroup = __limit_cgroup;
} else { } else {
/* explicit paths but without isolation */ /* explicit paths but without isolation */
container_cgroup = move_ptr(limiting_cgroup); limit_cgroup = move_ptr(__limit_cgroup);
container_cgroup = limit_cgroup;
} }
} else if (conf->cgroup_meta.dir) { } else if (conf->cgroup_meta.dir) {
container_cgroup = must_concat(&len, conf->cgroup_meta.dir, "/", limit_cgroup = must_concat(&len, conf->cgroup_meta.dir, "/",
DEFAULT_PAYLOAD_CGROUP_PREFIX, DEFAULT_PAYLOAD_CGROUP_PREFIX,
handler->name, handler->name,
CGROUP_CREATE_RETRY, NULL); CGROUP_CREATE_RETRY, NULL);
container_cgroup = limit_cgroup;
} else if (ops->cgroup_pattern) { } else if (ops->cgroup_pattern) {
__do_free char *cgroup_tree = NULL; __do_free char *cgroup_tree = NULL;
...@@ -1428,15 +1448,17 @@ __cgfsng_ops static bool cgfsng_payload_create(struct cgroup_ops *ops, struct lx ...@@ -1428,15 +1448,17 @@ __cgfsng_ops static bool cgfsng_payload_create(struct cgroup_ops *ops, struct lx
if (!cgroup_tree) if (!cgroup_tree)
return ret_set_errno(false, ENOMEM); return ret_set_errno(false, ENOMEM);
container_cgroup = must_concat(&len, cgroup_tree, "/", limit_cgroup = must_concat(&len, cgroup_tree, "/",
DEFAULT_PAYLOAD_CGROUP, DEFAULT_PAYLOAD_CGROUP,
CGROUP_CREATE_RETRY, NULL); CGROUP_CREATE_RETRY, NULL);
container_cgroup = limit_cgroup;
} else { } else {
container_cgroup = must_concat(&len, DEFAULT_PAYLOAD_CGROUP_PREFIX, limit_cgroup = must_concat(&len, DEFAULT_PAYLOAD_CGROUP_PREFIX,
handler->name, handler->name,
CGROUP_CREATE_RETRY, NULL); CGROUP_CREATE_RETRY, NULL);
container_cgroup = limit_cgroup;
} }
if (!container_cgroup) if (!limit_cgroup)
return ret_set_errno(false, ENOMEM); return ret_set_errno(false, ENOMEM);
if (!conf->cgroup_meta.container_dir) { if (!conf->cgroup_meta.container_dir) {
...@@ -1449,17 +1471,15 @@ __cgfsng_ops static bool cgfsng_payload_create(struct cgroup_ops *ops, struct lx ...@@ -1449,17 +1471,15 @@ __cgfsng_ops static bool cgfsng_payload_create(struct cgroup_ops *ops, struct lx
for (i = 0; ops->hierarchies[i]; i++) { for (i = 0; ops->hierarchies[i]; i++) {
if (cgroup_tree_create(ops, handler->conf, if (cgroup_tree_create(ops, handler->conf,
ops->hierarchies[i], ops->hierarchies[i], limit_cgroup,
container_cgroup, conf->cgroup_meta.namespace_dir,
limiting_cgroup,
true)) true))
continue; continue;
DEBUG("Failed to create cgroup \"%s\"", ops->hierarchies[i]->container_full_path ?: "(null)"); DEBUG("Failed to create cgroup \"%s\"", ops->hierarchies[i]->container_full_path ?: "(null)");
for (int j = 0; j <= i; j++) for (int j = 0; j <= i; j++)
cgroup_tree_prune_leaf(ops->hierarchies[j], cgroup_tree_prune_leaf(ops->hierarchies[j],
limiting_cgroup ?: container_cgroup, limit_cgroup, true);
true);
idx++; idx++;
break; break;
...@@ -1470,11 +1490,12 @@ __cgfsng_ops static bool cgfsng_payload_create(struct cgroup_ops *ops, struct lx ...@@ -1470,11 +1490,12 @@ __cgfsng_ops static bool cgfsng_payload_create(struct cgroup_ops *ops, struct lx
return log_error_errno(false, ERANGE, "Failed to create container cgroup"); return log_error_errno(false, ERANGE, "Failed to create container cgroup");
ops->container_cgroup = move_ptr(container_cgroup); ops->container_cgroup = move_ptr(container_cgroup);
if (limiting_cgroup) if (__limit_cgroup)
ops->container_limit_cgroup = move_ptr(limiting_cgroup); ops->container_limit_cgroup = move_ptr(__limit_cgroup);
else else
ops->container_limit_cgroup = ops->container_cgroup; ops->container_limit_cgroup = ops->container_cgroup;
INFO("The container process uses \"%s\" as cgroup", ops->container_cgroup); INFO("The container process uses \"%s\" as inner and \"%s\" as limit cgroup",
ops->container_cgroup, ops->container_limit_cgroup);
return true; return true;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment