cgroups: stash host's cgroupfs file descriptor

parent 7414bc72
...@@ -3474,18 +3474,9 @@ static char *cg_unified_get_current_cgroup(bool relative) ...@@ -3474,18 +3474,9 @@ static char *cg_unified_get_current_cgroup(bool relative)
static int cg_unified_init(struct cgroup_ops *ops, bool relative, static int cg_unified_init(struct cgroup_ops *ops, bool relative,
bool unprivileged) bool unprivileged)
{ {
__do_close int cgroup_root_fd = -EBADF;
__do_free char *base_cgroup = NULL, *controllers_path = NULL; __do_free char *base_cgroup = NULL, *controllers_path = NULL;
__do_free_string_list char **delegatable = NULL; __do_free_string_list char **delegatable = NULL;
__do_free struct hierarchy *new = NULL; __do_free struct hierarchy *new = NULL;
int ret;
ret = unified_cgroup_hierarchy();
if (ret == -ENOMEDIUM)
return ret_errno(ENOMEDIUM);
if (ret != CGROUP2_SUPER_MAGIC)
return 0;
base_cgroup = cg_unified_get_current_cgroup(relative); base_cgroup = cg_unified_get_current_cgroup(relative);
if (!base_cgroup) if (!base_cgroup)
...@@ -3493,18 +3484,13 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative, ...@@ -3493,18 +3484,13 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative,
if (!relative) if (!relative)
prune_init_scope(base_cgroup); prune_init_scope(base_cgroup);
cgroup_root_fd = openat(-EBADF, DEFAULT_CGROUP_MOUNTPOINT,
O_NOCTTY | O_CLOEXEC | O_NOFOLLOW | O_DIRECTORY);
if (cgroup_root_fd < 0)
return -errno;
/* /*
* We assume that the cgroup we're currently in has been delegated to * We assume that the cgroup we're currently in has been delegated to
* us and we are free to further delege all of the controllers listed * us and we are free to further delege all of the controllers listed
* in cgroup.controllers further down the hierarchy. * in cgroup.controllers further down the hierarchy.
*/ */
controllers_path = must_make_path_relative(base_cgroup, "cgroup.controllers", NULL); controllers_path = must_make_path_relative(base_cgroup, "cgroup.controllers", NULL);
delegatable = cg_unified_get_controllers(cgroup_root_fd, controllers_path); delegatable = cg_unified_get_controllers(ops->dfd_mnt_cgroupfs_host, controllers_path);
if (!delegatable) if (!delegatable)
delegatable = cg_unified_make_empty_controller(); delegatable = cg_unified_make_empty_controller();
if (!delegatable[0]) if (!delegatable[0])
...@@ -3539,9 +3525,23 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative, ...@@ -3539,9 +3525,23 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative,
static int __cgroup_init(struct cgroup_ops *ops, struct lxc_conf *conf) static int __cgroup_init(struct cgroup_ops *ops, struct lxc_conf *conf)
{ {
__do_close int dfd = -EBADF;
bool relative = conf->cgroup_meta.relative;
int ret; int ret;
const char *tmp; const char *tmp;
bool relative = conf->cgroup_meta.relative;
if (ops->dfd_mnt_cgroupfs_host >= 0)
return ret_errno(EINVAL);
/*
* I don't see the need for allowing symlinks here. If users want to
* have their hierarchy available in different locations I strongly
* suggest bind-mounts.
*/
dfd = open_at(-EBADF, DEFAULT_CGROUP_MOUNTPOINT,
PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_ABSOLUTE_XDEV, 0);
if (dfd < 0)
return syserrno(-errno, "Failed to open " DEFAULT_CGROUP_MOUNTPOINT);
tmp = lxc_global_config_value("lxc.cgroup.use"); tmp = lxc_global_config_value("lxc.cgroup.use");
if (tmp) { if (tmp) {
...@@ -3555,14 +3555,23 @@ static int __cgroup_init(struct cgroup_ops *ops, struct lxc_conf *conf) ...@@ -3555,14 +3555,23 @@ static int __cgroup_init(struct cgroup_ops *ops, struct lxc_conf *conf)
must_append_string(&ops->cgroup_use, cur); must_append_string(&ops->cgroup_use, cur);
} }
ret = cg_unified_init(ops, relative, !lxc_list_empty(&conf->id_map)); /*
if (ret < 0) * Keep dfd referenced by the cleanup function and actually move the fd
return -1; * once we know the initialization succeeded. So if we fail we clean up
* the dfd.
*/
ops->dfd_mnt_cgroupfs_host = dfd;
if (ret == CGROUP2_SUPER_MAGIC) if (unified_cgroup_fd(dfd))
return 0; ret = cg_unified_init(ops, relative, !lxc_list_empty(&conf->id_map));
else
ret = cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map));
if (ret < 0)
return syserrno(ret, "Failed to initialize cgroups");
return cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map)); /* Transfer ownership to cgroup_ops. */
move_fd(dfd);
return 0;
} }
__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops) __cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops)
...@@ -3589,6 +3598,7 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) ...@@ -3589,6 +3598,7 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
return ret_set_errno(NULL, ENOMEM); return ret_set_errno(NULL, ENOMEM);
cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN; cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN;
cgfsng_ops->dfd_mnt_cgroupfs_host = -EBADF;
if (__cgroup_init(cgfsng_ops, conf)) if (__cgroup_init(cgfsng_ops, conf))
return NULL; return NULL;
......
...@@ -68,6 +68,9 @@ void cgroup_exit(struct cgroup_ops *ops) ...@@ -68,6 +68,9 @@ void cgroup_exit(struct cgroup_ops *ops)
if (ops->cgroup2_devices) if (ops->cgroup2_devices)
bpf_program_free(ops->cgroup2_devices); bpf_program_free(ops->cgroup2_devices);
if (ops->dfd_mnt_cgroupfs_host >= 0)
close(ops->dfd_mnt_cgroupfs_host);
for (struct hierarchy **it = ops->hierarchies; it && *it; it++) { for (struct hierarchy **it = ops->hierarchies; it && *it; it++) {
for (char **p = (*it)->controllers; p && *p; p++) for (char **p = (*it)->controllers; p && *p; p++)
free(*p); free(*p);
......
...@@ -106,6 +106,18 @@ struct cgroup_ops { ...@@ -106,6 +106,18 @@ struct cgroup_ops {
/* string constant */ /* string constant */
const char *version; const char *version;
/*
* File descriptor for the host's cgroupfs mount. On
* CGROUP_LAYOUT_LEGACY or CGROUP_LAYOUT_HYBRID hybrid systems
* @dfd_mnt_cgroupfs_host will be a tmpfs fd and the individual
* controllers will be cgroupfs fds. On CGROUP_LAYOUT_UNIFIED it will
* be a cgroupfs fd itself.
*
* So for CGROUP_LAYOUT_LEGACY or CGROUP_LAYOUT_HYBRID we allow
* mountpoint crossing iff we cross from a tmpfs into a cgroupfs mount.
* */
int dfd_mnt_cgroupfs_host;
/* What controllers is the container supposed to use. */ /* What controllers is the container supposed to use. */
char **cgroup_use; char **cgroup_use;
char *cgroup_pattern; char *cgroup_pattern;
......
...@@ -83,22 +83,6 @@ bool test_writeable_v2(char *mountpoint, char *path) ...@@ -83,22 +83,6 @@ bool test_writeable_v2(char *mountpoint, char *path)
return (access(cgroup_threads_file, W_OK) == 0); return (access(cgroup_threads_file, W_OK) == 0);
} }
int unified_cgroup_hierarchy(void)
{
int ret;
struct statfs fs;
ret = statfs(DEFAULT_CGROUP_MOUNTPOINT, &fs);
if (ret < 0)
return -ENOMEDIUM;
if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC))
return CGROUP2_SUPER_MAGIC;
return 0;
}
int unified_cgroup_fd(int fd) int unified_cgroup_fd(int fd)
{ {
......
...@@ -29,8 +29,6 @@ __hidden extern bool test_writeable_v1(char *mountpoint, char *path); ...@@ -29,8 +29,6 @@ __hidden extern bool test_writeable_v1(char *mountpoint, char *path);
*/ */
__hidden extern bool test_writeable_v2(char *mountpoint, char *path); __hidden extern bool test_writeable_v2(char *mountpoint, char *path);
__hidden extern int unified_cgroup_hierarchy(void);
__hidden extern int unified_cgroup_fd(int fd); __hidden extern int unified_cgroup_fd(int fd);
static inline bool cgns_supported(void) static inline bool cgns_supported(void)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment