cgroups: switch to fd-based cgroup mounting

parent c689b58a
...@@ -1759,50 +1759,72 @@ static int cg_legacy_mount_controllers(int type, struct hierarchy *h, ...@@ -1759,50 +1759,72 @@ static int cg_legacy_mount_controllers(int type, struct hierarchy *h,
* cgroups for the LXC_AUTO_CGROUP_FULL option. * cgroups for the LXC_AUTO_CGROUP_FULL option.
*/ */
static int __cg_mount_direct(int type, struct hierarchy *h, static int __cg_mount_direct(int type, struct hierarchy *h,
const char *controllerpath) struct lxc_rootfs *rootfs,
int dfd_mnt_cgroupfs, const char *hierarchy_mnt)
{ {
__do_free char *controllers = NULL; __do_free char *controllers = NULL;
char *fstype = "cgroup2"; unsigned long flags = 0;
unsigned long flags = 0; char *fstype;
int ret; int ret;
if (dfd_mnt_cgroupfs < 0)
return ret_errno(EINVAL);
flags |= MS_NOSUID;
flags |= MS_NOEXEC;
flags |= MS_NODEV;
flags |= MS_RELATIME;
flags |= MS_NOSUID; if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO)
flags |= MS_NOEXEC; flags |= MS_RDONLY;
flags |= MS_NODEV;
flags |= MS_RELATIME;
if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO) if (is_unified_hierarchy(h)) {
flags |= MS_RDONLY; fstype = "cgroup2";
} else {
fstype = "cgroup";
if (h->version != CGROUP2_SUPER_MAGIC) { controllers = lxc_string_join(",", (const char **)h->controllers, false);
controllers = lxc_string_join(",", (const char **)h->controllers, false); if (!controllers)
if (!controllers) return ret_errno(ENOMEM);
return -ENOMEM;
fstype = "cgroup";
} }
ret = mount("cgroup", controllerpath, fstype, flags, controllers); ret = mount_at(dfd_mnt_cgroupfs, NULL, hierarchy_mnt,
PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH, fstype,
flags, controllers);
if (ret < 0 && errno == ENOSYS) {
__do_free char *target = NULL;
const char *rootfs_mnt;
rootfs_mnt = get_rootfs_mnt(rootfs);
target = must_make_path(rootfs_mnt, DEFAULT_CGROUP_MOUNTPOINT, hierarchy_mnt, NULL);
ret = safe_mount(NULL, target, fstype, flags, controllers, rootfs_mnt);
}
if (ret < 0) if (ret < 0)
return log_error_errno(-1, errno, "Failed to mount \"%s\" with cgroup filesystem type %s", return log_error_errno(ret, errno, "Failed to mount %s filesystem onto %d(%s)",
controllerpath, fstype); fstype, dfd_mnt_cgroupfs, maybe_empty(hierarchy_mnt));
DEBUG("Mounted \"%s\" with cgroup filesystem type %s", controllerpath, fstype); DEBUG("Mounted cgroup filesystem %s onto %d(%s)",
fstype, dfd_mnt_cgroupfs, maybe_empty(hierarchy_mnt));
return 0; return 0;
} }
static inline int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h, static inline int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h,
const char *controllerpath) struct lxc_rootfs *rootfs,
int dfd_mnt_cgroupfs,
const char *hierarchy_mnt)
{ {
return __cg_mount_direct(type, h, controllerpath); return __cg_mount_direct(type, h, rootfs, dfd_mnt_cgroupfs, hierarchy_mnt);
} }
static inline int cg_mount_cgroup_full(int type, struct hierarchy *h, static inline int cg_mount_cgroup_full(int type, struct hierarchy *h,
const char *controllerpath) struct lxc_rootfs *rootfs,
int dfd_mnt_cgroupfs,
const char *hierarchy_mnt)
{ {
if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED) if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED)
return 0; return 0;
return __cg_mount_direct(type, h, controllerpath); return __cg_mount_direct(type, h, rootfs, dfd_mnt_cgroupfs, hierarchy_mnt);
} }
__cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
...@@ -1812,7 +1834,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, ...@@ -1812,7 +1834,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
__do_free char *cgroup_root = NULL; __do_free char *cgroup_root = NULL;
bool has_cgns = false, wants_force_mount = false; bool has_cgns = false, wants_force_mount = false;
struct lxc_rootfs *rootfs = &conf->rootfs; struct lxc_rootfs *rootfs = &conf->rootfs;
const char *root = rootfs->path ? rootfs->mount : ""; const char *rootfs_mnt = get_rootfs_mnt(rootfs);
int ret; int ret;
if (!ops) if (!ops)
...@@ -1858,18 +1880,26 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, ...@@ -1858,18 +1880,26 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC) else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC)
type = LXC_AUTO_CGROUP_FULL_MIXED; type = LXC_AUTO_CGROUP_FULL_MIXED;
cgroup_root = must_make_path(root, DEFAULT_CGROUP_MOUNTPOINT, NULL); /* This is really the codepath that we want. */
if (ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED) { if (pure_unified_layout(ops)) {
dfd_mnt_cgroupfs = open_at(rootfs->mntpt_fd,
DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
PROTECT_OPATH_DIRECTORY,
PROTECT_LOOKUP_BENEATH_XDEV, 0);
if (dfd_mnt_cgroupfs < 0)
return log_error_errno(-errno, errno, "Failed to open %d(%s)",
rootfs->mntpt_fd, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
if (has_cgns && wants_force_mount) { if (has_cgns && wants_force_mount) {
/* /*
* If cgroup namespaces are supported but the container * If cgroup namespaces are supported but the container
* will not have CAP_SYS_ADMIN after it has started we * will not have CAP_SYS_ADMIN after it has started we
* need to mount the cgroups manually. * need to mount the cgroups manually.
*/ */
return cg_mount_in_cgroup_namespace(type, ops->unified, cgroup_root) == 0; return cg_mount_in_cgroup_namespace(type, ops->unified, rootfs, dfd_mnt_cgroupfs, "") == 0;
} }
return cg_mount_cgroup_full(type, ops->unified, cgroup_root) == 0; return cg_mount_cgroup_full(type, ops->unified, rootfs, dfd_mnt_cgroupfs, "") == 0;
} }
/* /*
...@@ -1881,18 +1911,16 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, ...@@ -1881,18 +1911,16 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV,
"tmpfs", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, "tmpfs", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
"size=10240k,mode=755"); "size=10240k,mode=755");
if (ret < 0) { if (ret < 0 && errno == ENOSYS) {
if (errno != ENOSYS) cgroup_root = must_make_path(rootfs_mnt, DEFAULT_CGROUP_MOUNTPOINT, NULL);
return log_error_errno(false, errno,
"Failed to mount tmpfs on %s",
DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
ret = safe_mount(NULL, cgroup_root, "tmpfs", ret = safe_mount(NULL, cgroup_root, "tmpfs",
MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
"size=10240k,mode=755", root); "size=10240k,mode=755", rootfs_mnt);
} }
if (ret < 0) if (ret < 0)
return false; return log_error_errno(false, errno, "Failed to mount tmpfs on %s",
DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
dfd_mnt_cgroupfs = open_at(rootfs->mntpt_fd, dfd_mnt_cgroupfs = open_at(rootfs->mntpt_fd,
DEFAULT_CGROUP_MOUNTPOINT_RELATIVE, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
...@@ -1911,41 +1939,41 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, ...@@ -1911,41 +1939,41 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
continue; continue;
controller++; controller++;
controllerpath = must_make_path(cgroup_root, controller, NULL);
if (dir_exists(controllerpath))
continue;
ret = mkdirat(dfd_mnt_cgroupfs, controller, 0000); ret = mkdirat(dfd_mnt_cgroupfs, controller, 0000);
if (ret < 0) if (ret < 0)
return log_error_errno(false, errno, "Error creating cgroup path: %s", controllerpath); return log_error_errno(false, errno, "Failed to create cgroup mountpoint %d(%s)", dfd_mnt_cgroupfs, controller);
if (has_cgns && wants_force_mount) { if (has_cgns && wants_force_mount) {
/* If cgroup namespaces are supported but the container /*
* If cgroup namespaces are supported but the container
* will not have CAP_SYS_ADMIN after it has started we * will not have CAP_SYS_ADMIN after it has started we
* need to mount the cgroups manually. * need to mount the cgroups manually.
*/ */
ret = cg_mount_in_cgroup_namespace(type, h, controllerpath); ret = cg_mount_in_cgroup_namespace(type, h, rootfs, dfd_mnt_cgroupfs, controller);
if (ret < 0) if (ret < 0)
return false; return false;
continue; continue;
} }
ret = cg_mount_cgroup_full(type, h, controllerpath); /* Here is where the ancient kernel section begins. */
ret = cg_mount_cgroup_full(type, h, rootfs, dfd_mnt_cgroupfs, controller);
if (ret < 0) if (ret < 0)
return false; return false;
if (!cg_mount_needs_subdirs(type)) if (!cg_mount_needs_subdirs(type))
continue; continue;
path2 = must_make_path(controllerpath, h->container_base_path, controllerpath = must_make_path(cgroup_root, controller, NULL);
ops->container_cgroup, NULL); if (dir_exists(controllerpath))
continue;
path2 = must_make_path(controllerpath, h->container_base_path, ops->container_cgroup, NULL);
ret = mkdir_p(path2, 0755); ret = mkdir_p(path2, 0755);
if (ret < 0) if (ret < 0)
return false; return false;
ret = cg_legacy_mount_controllers(type, h, controllerpath, ret = cg_legacy_mount_controllers(type, h, controllerpath, path2, ops->container_cgroup);
path2, ops->container_cgroup);
if (ret < 0) if (ret < 0)
return false; return false;
} }
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "memory_utils.h" #include "memory_utils.h"
#include "ringbuf.h" #include "ringbuf.h"
#include "start.h" #include "start.h"
#include "string_utils.h"
#include "terminal.h" #include "terminal.h"
#if HAVE_SYS_RESOURCE_H #if HAVE_SYS_RESOURCE_H
...@@ -547,4 +548,11 @@ static inline int chown_mapped_root(const char *path, const struct lxc_conf *con ...@@ -547,4 +548,11 @@ static inline int chown_mapped_root(const char *path, const struct lxc_conf *con
__hidden int lxc_setup_devpts_parent(struct lxc_handler *handler); __hidden int lxc_setup_devpts_parent(struct lxc_handler *handler);
static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs)
{
static const char *s = "/";
return !is_empty_string(rootfs->path) ? rootfs->mount : s;
}
#endif /* __LXC_CONF_H */ #endif /* __LXC_CONF_H */
...@@ -117,6 +117,8 @@ static inline bool is_empty_string(const char *s) ...@@ -117,6 +117,8 @@ static inline bool is_empty_string(const char *s)
return !s || strcmp(s, "") == 0; return !s || strcmp(s, "") == 0;
} }
#define maybe_empty(s) ((!is_empty_string(s)) ? (s) : ("(null)"))
static inline ssize_t safe_strlcat(char *src, const char *append, size_t len) static inline ssize_t safe_strlcat(char *src, const char *append, size_t len)
{ {
size_t new_len; size_t new_len;
......
...@@ -1231,9 +1231,6 @@ int mount_at(int dfd, ...@@ -1231,9 +1231,6 @@ int mount_at(int dfd,
if (!is_empty_string(src_buf) && *src_buf == '/') if (!is_empty_string(src_buf) && *src_buf == '/')
return log_error_errno(-EINVAL, EINVAL, "Absolute path specified"); return log_error_errno(-EINVAL, EINVAL, "Absolute path specified");
if (is_empty_string(dst_under_dfd))
return log_error_errno(-EINVAL, EINVAL, "No target path specified");
if (!is_empty_string(src_under_dfd)) { if (!is_empty_string(src_under_dfd)) {
source_fd = openat2(dfd, src_under_dfd, &how, sizeof(how)); source_fd = openat2(dfd, src_under_dfd, &how, sizeof(how));
if (source_fd < 0) if (source_fd < 0)
...@@ -1244,11 +1241,17 @@ int mount_at(int dfd, ...@@ -1244,11 +1241,17 @@ int mount_at(int dfd,
return -EIO; return -EIO;
} }
target_fd = openat2(dfd, dst_under_dfd, &how, sizeof(how)); if (!is_empty_string(dst_under_dfd)) {
if (target_fd < 0) target_fd = openat2(dfd, dst_under_dfd, &how, sizeof(how));
return log_error_errno(-errno, errno, "Failed to open %d(%s)", dfd, dst_under_dfd); if (target_fd < 0)
return log_error_errno(-errno, errno, "Failed to open %d(%s)", dfd, dst_under_dfd);
ret = snprintf(dst_buf, sizeof(dst_buf), "/proc/self/fd/%d", target_fd); TRACE("Mounting %d(%s) through /proc/self/fd/%d", target_fd, dst_under_dfd, target_fd);
ret = snprintf(dst_buf, sizeof(dst_buf), "/proc/self/fd/%d", target_fd);
} else {
TRACE("Mounting %d through /proc/self/fd/%d", dfd, dfd);
ret = snprintf(dst_buf, sizeof(dst_buf), "/proc/self/fd/%d", dfd);
}
if (ret < 0 || ret >= sizeof(dst_buf)) if (ret < 0 || ret >= sizeof(dst_buf))
return -EIO; return -EIO;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment