Merge pull request #3680 from brauner/2021-02-17/cgroups_2

cgroups: fourth batch of cgroup fixes

Merge pull request #3680 from brauner/2021-02-17/cgroups_2
cca7d405 · Stéphane Graber · GitHub · b3ad27fb · 77410c98 · cca7d405
Unverified Commit cca7d405 authored Feb 17, 2021 by Stéphane Graber Committed by GitHub Feb 17, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 80 additions and 38 deletions

cgfsng.c src/lxc/cgroups/cgfsng.c +79 -37

conf.c src/lxc/conf.c +1 -1

No files found.
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -1142,8 +1142,13 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
 		 * cgroup the container actually resides in, is below fd_limit.
 		 */
 		fd_final = __cgroup_tree_create(fd_limit, cgroup_leaf, 0755, cpuset_v1, false);
-		if (fd_final < 0) /* Ensure we don't leave any garbage behind. */
-			cgroup_tree_prune(h->dfd_base, cgroup_limit_dir);
+		if (fd_final < 0) {
+			/* Ensure we don't leave any garbage behind. */
+			if (cgroup_tree_prune(h->dfd_base, cgroup_limit_dir))
+				SYSWARN("Failed to destroy %d(%s)", h->dfd_base, cgroup_limit_dir);
+			else
+				TRACE("Removed cgroup tree %d(%s)", h->dfd_base, cgroup_limit_dir);
+		}
 	} else {
 		path = must_make_path(h->mountpoint, h->container_base_path, cgroup_limit_dir, NULL);

@@ -1767,7 +1772,16 @@ __cgfsng_ops static void cgfsng_payload_finalize(struct cgroup_ops *ops)
 /* cgroup-full:* is done, no need to create subdirs */
 static inline bool cg_mount_needs_subdirs(int cg_flags)
 {
-	return !(cg_flags >= LXC_AUTO_CGROUP_FULL_RO);
+	switch (cg_flags) {
+	case LXC_AUTO_CGROUP_RO:
+		return true;
+	case LXC_AUTO_CGROUP_RW:
+		return true;
+	case LXC_AUTO_CGROUP_MIXED:
+		return true;
+	}
+
+	return false;
 }

 /* After $rootfs/sys/fs/container/controller/the/cg/path has been created,
@@ -1782,7 +1796,7 @@ static int cg_legacy_mount_controllers(int cg_flags, struct hierarchy *h,
 	int ret, remount_flags;
 	int flags = MS_BIND;

-	if ((cg_flags & LXC_AUTO_CGROUP_RO) || (cg_flags & LXC_AUTO_CGROUP_MIXED)) {
+	if ((cg_flags == LXC_AUTO_CGROUP_RO) || (cg_flags == LXC_AUTO_CGROUP_MIXED)) {
 		ret = mount(controllerpath, controllerpath, "cgroup", MS_BIND, NULL);
 		if (ret < 0)
 			return log_error_errno(-1, errno, "Failed to bind mount \"%s\" onto \"%s\"",
@@ -1802,7 +1816,7 @@ static int cg_legacy_mount_controllers(int cg_flags, struct hierarchy *h,

 	sourcepath = must_make_path(h->mountpoint, h->container_base_path,
 				    container_cgroup, NULL);
-	if ((cg_flags & LXC_AUTO_CGROUP_RO))
+	if (cg_flags == LXC_AUTO_CGROUP_RO)
 		flags |= MS_RDONLY;

 	ret = mount(sourcepath, cgpath, "cgroup", flags, NULL);
@@ -1847,8 +1861,8 @@ static int __cgroupfs_mount(int cg_flags, struct hierarchy *h,
 	flags |= MOUNT_ATTR_NODEV;
 	flags |= MOUNT_ATTR_RELATIME;

-	if ((cg_flags & LXC_AUTO_CGROUP_RO) ||
-	    (cg_flags & LXC_AUTO_CGROUP_FULL_RO))
+	if ((cg_flags == LXC_AUTO_CGROUP_RO) ||
+	    (cg_flags == LXC_AUTO_CGROUP_FULL_RO))
 		flags |= MOUNT_ATTR_RDONLY;

 	if (is_unified_hierarchy(h)) {
@@ -1916,9 +1930,16 @@ static inline int cgroupfs_bind_mount(int cg_flags, struct hierarchy *h,
 				      int dfd_mnt_cgroupfs,
 				      const char *hierarchy_mnt)
 {
-	if (!(cg_flags & LXC_AUTO_CGROUP_FULL_RO) &&
-	    !(cg_flags & LXC_AUTO_CGROUP_FULL_MIXED))
+	switch (cg_flags) {
+	case LXC_AUTO_CGROUP_FULL_RO:
+		break;
+	case LXC_AUTO_CGROUP_FULL_RW:
+		break;
+	case LXC_AUTO_CGROUP_FULL_MIXED:
+		break;
+	default:
 		return 0;
+	}

 	return __cgroupfs_mount(cg_flags, h, rootfs, dfd_mnt_cgroupfs, hierarchy_mnt);
 }
@@ -1926,7 +1947,7 @@ static inline int cgroupfs_bind_mount(int cg_flags, struct hierarchy *h,
 __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
 				      struct lxc_handler *handler, int cg_flags)
 {
-	__do_close int dfd_mnt_cgroupfs = -EBADF, fd_fs = -EBADF;
+	__do_close int dfd_mnt_tmpfs = -EBADF, fd_fs = -EBADF;
 	__do_free char *cgroup_root = NULL;
 	bool in_cgroup_ns = false, wants_force_mount = false;
 	struct lxc_conf *conf = handler->conf;
@@ -1946,8 +1967,33 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
 	if ((cg_flags & LXC_AUTO_CGROUP_MASK) == 0)
 		return log_trace(true, "No cgroup mounts requested");

-	if (cg_flags & LXC_AUTO_CGROUP_FORCE)
+	if (cg_flags & LXC_AUTO_CGROUP_FORCE) {
+		cg_flags &= ~LXC_AUTO_CGROUP_FORCE;
 		wants_force_mount = true;
+	}
+
+	switch (cg_flags) {
+	case LXC_AUTO_CGROUP_RO:
+		TRACE("Read-only cgroup mounts requested");
+		break;
+	case LXC_AUTO_CGROUP_RW:
+		TRACE("Read-write cgroup mounts requested");
+		break;
+	case LXC_AUTO_CGROUP_MIXED:
+		TRACE("Mixed cgroup mounts requested");
+		break;
+	case LXC_AUTO_CGROUP_FULL_RO:
+		TRACE("Full read-only cgroup mounts requested");
+		break;
+	case LXC_AUTO_CGROUP_FULL_RW:
+		TRACE("Full read-write cgroup mounts requested");
+		break;
+	case LXC_AUTO_CGROUP_FULL_MIXED:
+		TRACE("Full mixed cgroup mounts requested");
+		break;
+	default:
+		return log_error_errno(false, EINVAL, "Invalid cgroup mount options specified");
+	}

 	if (!wants_force_mount) {
 		wants_force_mount = !lxc_wants_cap(CAP_SYS_ADMIN, conf);
@@ -1982,25 +2028,15 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
 	if (in_cgroup_ns && !wants_force_mount)
 		return log_trace(true, "Mounting cgroups not requested or needed");

-	/*
-	 * Fallback to a mixed layout when the user did not specify what cgroup
-	 * layout they want.
-	 */
-	if ((cg_flags & LXC_AUTO_CGROUP_NOSPEC))
-		cg_flags = LXC_AUTO_CGROUP_MIXED;
-	else if (cg_flags & LXC_AUTO_CGROUP_FULL_NOSPEC)
-		cg_flags = LXC_AUTO_CGROUP_FULL_MIXED;
-
-	dfd_mnt_cgroupfs = open_at(rootfs->dfd_mnt,
-				   DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
-				   PROTECT_OPATH_DIRECTORY,
-				   PROTECT_LOOKUP_BENEATH_XDEV, 0);
-	if (dfd_mnt_cgroupfs < 0)
-		return syserrno(-errno, "Failed to open %d(%s)", rootfs->dfd_mnt,
-				DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
-
 	/* This is really the codepath that we want. */
 	if (pure_unified_layout(ops)) {
+		__do_close int dfd_mnt_unified = -EBADF;
+
+		dfd_mnt_unified = open_at(rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
+					  PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV, 0);
+		if (dfd_mnt_unified < 0)
+			return syserrno(-errno, "Failed to open %d(%s)", rootfs->dfd_mnt,
+					DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
 		/*
 		 * If cgroup namespaces are supported but the container will
 		 * not have CAP_SYS_ADMIN after it has started we need to mount
@@ -2031,7 +2067,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
 			 * 11. cgroup-full:ro:force    -> Not supported.
 			 * 12. cgroup-full:mixed:force -> Not supported.
 			 */
-			ret = cgroupfs_mount(cg_flags, ops->unified, rootfs, dfd_mnt_cgroupfs, "");
+			ret = cgroupfs_mount(cg_flags, ops->unified, rootfs, dfd_mnt_unified, "");
 			if (ret < 0)
 				return syserrno(false, "Failed to force mount cgroup filesystem in cgroup namespace");

@@ -2099,6 +2135,12 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
 		return log_error_errno(false, errno, "Failed to mount tmpfs on %s",
 				       DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);

+	dfd_mnt_tmpfs = open_at(rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
+				PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV, 0);
+	if (dfd_mnt_tmpfs < 0)
+		return syserrno(-errno, "Failed to open %d(%s)", rootfs->dfd_mnt,
+				DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
+
 	for (int i = 0; ops->hierarchies[i]; i++) {
 		__do_free char *controllerpath = NULL, *path2 = NULL;
 		struct hierarchy *h = ops->hierarchies[i];
@@ -2108,9 +2150,9 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
 			continue;
 		controller++;

-		ret = mkdirat(dfd_mnt_cgroupfs, controller, 0000);
+		ret = mkdirat(dfd_mnt_tmpfs, controller, 0000);
 		if (ret < 0)
-			return log_error_errno(false, errno, "Failed to create cgroup mountpoint %d(%s)", dfd_mnt_cgroupfs, controller);
+			return log_error_errno(false, errno, "Failed to create cgroup mountpoint %d(%s)", dfd_mnt_tmpfs, controller);

 		if (in_cgroup_ns && wants_force_mount) {
 			/*
@@ -2118,7 +2160,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
 			 * will not have CAP_SYS_ADMIN after it has started we
 			 * need to mount the cgroups manually.
 			 */
-			ret = cgroupfs_mount(cg_flags, h, rootfs, dfd_mnt_cgroupfs, controller);
+			ret = cgroupfs_mount(cg_flags, h, rootfs, dfd_mnt_tmpfs, controller);
 			if (ret < 0)
 				return false;

@@ -2126,20 +2168,20 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
 		}

 		/* Here is where the ancient kernel section begins. */
-		ret = cgroupfs_bind_mount(cg_flags, h, rootfs, dfd_mnt_cgroupfs, controller);
+		ret = cgroupfs_bind_mount(cg_flags, h, rootfs, dfd_mnt_tmpfs, controller);
 		if (ret < 0)
 			return false;

 		if (!cg_mount_needs_subdirs(cg_flags))
 			continue;

-		controllerpath = must_make_path(cgroup_root, controller, NULL);
-		if (dir_exists(controllerpath))
-			continue;
+		if (!cgroup_root)
+			cgroup_root = must_make_path(rootfs_mnt, DEFAULT_CGROUP_MOUNTPOINT, NULL);

+		controllerpath = must_make_path(cgroup_root, controller, NULL);
 		path2 = must_make_path(controllerpath, h->container_base_path, ops->container_cgroup, NULL);
 		ret = mkdir_p(path2, 0755);
-		if (ret < 0)
+		if (ret < 0 && (errno != EEXIST))
 			return false;

 		ret = cg_legacy_mount_controllers(cg_flags, h, controllerpath, path2, ops->container_cgroup);

--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -687,7 +687,7 @@ static int lxc_mount_auto_mounts(struct lxc_handler *handler, int flags)
 		 * CAP_SYS_ADMIN, do only default to :mixed, because then the
 		 * container can't remount it read-write.
 		 */
-		if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) {
+		if ((cg_flags == LXC_AUTO_CGROUP_NOSPEC) || (cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC)) {
 			int has_sys_admin = 0;

 			if (!lxc_list_empty(&conf->keepcaps))