cgroups: switch to fd-based cgroup mounting

parent c689b58a
......@@ -1759,50 +1759,72 @@ static int cg_legacy_mount_controllers(int type, struct hierarchy *h,
* cgroups for the LXC_AUTO_CGROUP_FULL option.
*/
static int __cg_mount_direct(int type, struct hierarchy *h,
const char *controllerpath)
struct lxc_rootfs *rootfs,
int dfd_mnt_cgroupfs, const char *hierarchy_mnt)
{
__do_free char *controllers = NULL;
char *fstype = "cgroup2";
unsigned long flags = 0;
int ret;
__do_free char *controllers = NULL;
unsigned long flags = 0;
char *fstype;
int ret;
if (dfd_mnt_cgroupfs < 0)
return ret_errno(EINVAL);
flags |= MS_NOSUID;
flags |= MS_NOEXEC;
flags |= MS_NODEV;
flags |= MS_RELATIME;
flags |= MS_NOSUID;
flags |= MS_NOEXEC;
flags |= MS_NODEV;
flags |= MS_RELATIME;
if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO)
flags |= MS_RDONLY;
if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO)
flags |= MS_RDONLY;
if (is_unified_hierarchy(h)) {
fstype = "cgroup2";
} else {
fstype = "cgroup";
if (h->version != CGROUP2_SUPER_MAGIC) {
controllers = lxc_string_join(",", (const char **)h->controllers, false);
if (!controllers)
return -ENOMEM;
fstype = "cgroup";
controllers = lxc_string_join(",", (const char **)h->controllers, false);
if (!controllers)
return ret_errno(ENOMEM);
}
ret = mount("cgroup", controllerpath, fstype, flags, controllers);
ret = mount_at(dfd_mnt_cgroupfs, NULL, hierarchy_mnt,
PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH, fstype,
flags, controllers);
if (ret < 0 && errno == ENOSYS) {
__do_free char *target = NULL;
const char *rootfs_mnt;
rootfs_mnt = get_rootfs_mnt(rootfs);
target = must_make_path(rootfs_mnt, DEFAULT_CGROUP_MOUNTPOINT, hierarchy_mnt, NULL);
ret = safe_mount(NULL, target, fstype, flags, controllers, rootfs_mnt);
}
if (ret < 0)
return log_error_errno(-1, errno, "Failed to mount \"%s\" with cgroup filesystem type %s",
controllerpath, fstype);
return log_error_errno(ret, errno, "Failed to mount %s filesystem onto %d(%s)",
fstype, dfd_mnt_cgroupfs, maybe_empty(hierarchy_mnt));
DEBUG("Mounted \"%s\" with cgroup filesystem type %s", controllerpath, fstype);
DEBUG("Mounted cgroup filesystem %s onto %d(%s)",
fstype, dfd_mnt_cgroupfs, maybe_empty(hierarchy_mnt));
return 0;
}
static inline int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h,
const char *controllerpath)
struct lxc_rootfs *rootfs,
int dfd_mnt_cgroupfs,
const char *hierarchy_mnt)
{
return __cg_mount_direct(type, h, controllerpath);
return __cg_mount_direct(type, h, rootfs, dfd_mnt_cgroupfs, hierarchy_mnt);
}
static inline int cg_mount_cgroup_full(int type, struct hierarchy *h,
const char *controllerpath)
struct lxc_rootfs *rootfs,
int dfd_mnt_cgroupfs,
const char *hierarchy_mnt)
{
if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED)
return 0;
return __cg_mount_direct(type, h, controllerpath);
return __cg_mount_direct(type, h, rootfs, dfd_mnt_cgroupfs, hierarchy_mnt);
}
__cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
......@@ -1812,7 +1834,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
__do_free char *cgroup_root = NULL;
bool has_cgns = false, wants_force_mount = false;
struct lxc_rootfs *rootfs = &conf->rootfs;
const char *root = rootfs->path ? rootfs->mount : "";
const char *rootfs_mnt = get_rootfs_mnt(rootfs);
int ret;
if (!ops)
......@@ -1858,18 +1880,26 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC)
type = LXC_AUTO_CGROUP_FULL_MIXED;
cgroup_root = must_make_path(root, DEFAULT_CGROUP_MOUNTPOINT, NULL);
if (ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED) {
/* This is really the codepath that we want. */
if (pure_unified_layout(ops)) {
dfd_mnt_cgroupfs = open_at(rootfs->mntpt_fd,
DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
PROTECT_OPATH_DIRECTORY,
PROTECT_LOOKUP_BENEATH_XDEV, 0);
if (dfd_mnt_cgroupfs < 0)
return log_error_errno(-errno, errno, "Failed to open %d(%s)",
rootfs->mntpt_fd, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
if (has_cgns && wants_force_mount) {
/*
* If cgroup namespaces are supported but the container
* will not have CAP_SYS_ADMIN after it has started we
* need to mount the cgroups manually.
*/
return cg_mount_in_cgroup_namespace(type, ops->unified, cgroup_root) == 0;
return cg_mount_in_cgroup_namespace(type, ops->unified, rootfs, dfd_mnt_cgroupfs, "") == 0;
}
return cg_mount_cgroup_full(type, ops->unified, cgroup_root) == 0;
return cg_mount_cgroup_full(type, ops->unified, rootfs, dfd_mnt_cgroupfs, "") == 0;
}
/*
......@@ -1881,18 +1911,16 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV,
"tmpfs", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
"size=10240k,mode=755");
if (ret < 0) {
if (errno != ENOSYS)
return log_error_errno(false, errno,
"Failed to mount tmpfs on %s",
DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
if (ret < 0 && errno == ENOSYS) {
cgroup_root = must_make_path(rootfs_mnt, DEFAULT_CGROUP_MOUNTPOINT, NULL);
ret = safe_mount(NULL, cgroup_root, "tmpfs",
MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
"size=10240k,mode=755", root);
"size=10240k,mode=755", rootfs_mnt);
}
if (ret < 0)
return false;
return log_error_errno(false, errno, "Failed to mount tmpfs on %s",
DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
dfd_mnt_cgroupfs = open_at(rootfs->mntpt_fd,
DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
......@@ -1911,41 +1939,41 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
continue;
controller++;
controllerpath = must_make_path(cgroup_root, controller, NULL);
if (dir_exists(controllerpath))
continue;
ret = mkdirat(dfd_mnt_cgroupfs, controller, 0000);
if (ret < 0)
return log_error_errno(false, errno, "Error creating cgroup path: %s", controllerpath);
return log_error_errno(false, errno, "Failed to create cgroup mountpoint %d(%s)", dfd_mnt_cgroupfs, controller);
if (has_cgns && wants_force_mount) {
/* If cgroup namespaces are supported but the container
/*
* If cgroup namespaces are supported but the container
* will not have CAP_SYS_ADMIN after it has started we
* need to mount the cgroups manually.
*/
ret = cg_mount_in_cgroup_namespace(type, h, controllerpath);
ret = cg_mount_in_cgroup_namespace(type, h, rootfs, dfd_mnt_cgroupfs, controller);
if (ret < 0)
return false;
continue;
}
ret = cg_mount_cgroup_full(type, h, controllerpath);
/* Here is where the ancient kernel section begins. */
ret = cg_mount_cgroup_full(type, h, rootfs, dfd_mnt_cgroupfs, controller);
if (ret < 0)
return false;
if (!cg_mount_needs_subdirs(type))
continue;
path2 = must_make_path(controllerpath, h->container_base_path,
ops->container_cgroup, NULL);
controllerpath = must_make_path(cgroup_root, controller, NULL);
if (dir_exists(controllerpath))
continue;
path2 = must_make_path(controllerpath, h->container_base_path, ops->container_cgroup, NULL);
ret = mkdir_p(path2, 0755);
if (ret < 0)
return false;
ret = cg_legacy_mount_controllers(type, h, controllerpath,
path2, ops->container_cgroup);
ret = cg_legacy_mount_controllers(type, h, controllerpath, path2, ops->container_cgroup);
if (ret < 0)
return false;
}
......
......@@ -23,6 +23,7 @@
#include "memory_utils.h"
#include "ringbuf.h"
#include "start.h"
#include "string_utils.h"
#include "terminal.h"
#if HAVE_SYS_RESOURCE_H
......@@ -547,4 +548,11 @@ static inline int chown_mapped_root(const char *path, const struct lxc_conf *con
__hidden int lxc_setup_devpts_parent(struct lxc_handler *handler);
static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs)
{
static const char *s = "/";
return !is_empty_string(rootfs->path) ? rootfs->mount : s;
}
#endif /* __LXC_CONF_H */
......@@ -117,6 +117,8 @@ static inline bool is_empty_string(const char *s)
return !s || strcmp(s, "") == 0;
}
#define maybe_empty(s) ((!is_empty_string(s)) ? (s) : ("(null)"))
static inline ssize_t safe_strlcat(char *src, const char *append, size_t len)
{
size_t new_len;
......
......@@ -1231,9 +1231,6 @@ int mount_at(int dfd,
if (!is_empty_string(src_buf) && *src_buf == '/')
return log_error_errno(-EINVAL, EINVAL, "Absolute path specified");
if (is_empty_string(dst_under_dfd))
return log_error_errno(-EINVAL, EINVAL, "No target path specified");
if (!is_empty_string(src_under_dfd)) {
source_fd = openat2(dfd, src_under_dfd, &how, sizeof(how));
if (source_fd < 0)
......@@ -1244,11 +1241,17 @@ int mount_at(int dfd,
return -EIO;
}
target_fd = openat2(dfd, dst_under_dfd, &how, sizeof(how));
if (target_fd < 0)
return log_error_errno(-errno, errno, "Failed to open %d(%s)", dfd, dst_under_dfd);
if (!is_empty_string(dst_under_dfd)) {
target_fd = openat2(dfd, dst_under_dfd, &how, sizeof(how));
if (target_fd < 0)
return log_error_errno(-errno, errno, "Failed to open %d(%s)", dfd, dst_under_dfd);
ret = snprintf(dst_buf, sizeof(dst_buf), "/proc/self/fd/%d", target_fd);
TRACE("Mounting %d(%s) through /proc/self/fd/%d", target_fd, dst_under_dfd, target_fd);
ret = snprintf(dst_buf, sizeof(dst_buf), "/proc/self/fd/%d", target_fd);
} else {
TRACE("Mounting %d through /proc/self/fd/%d", dfd, dfd);
ret = snprintf(dst_buf, sizeof(dst_buf), "/proc/self/fd/%d", dfd);
}
if (ret < 0 || ret >= sizeof(dst_buf))
return -EIO;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment