Merge pull request #3675 from brauner/2021-02-16/fixes

cgroups: second batch of cgroup fixes

Merge pull request #3675 from brauner/2021-02-16/fixes
858f6225 · Stéphane Graber · GitHub · 136b349c · 060e54d6 · 858f6225
Unverified Commit 858f6225 authored Feb 16, 2021 by Stéphane Graber Committed by GitHub Feb 16, 2021
9 changed files
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
--- a/src/lxc/cgroups/cgroup.c
+++ b/src/lxc/cgroups/cgroup.c
@@ -33,10 +33,14 @@ struct cgroup_ops *cgroup_init(struct lxc_conf *conf)
 	if (!cgroup_ops)
 		return log_error_errno(NULL, errno, "Failed to initialize cgroup driver");
+	if (!cgroup_ops->hierarchies) {
+		cgroup_exit(cgroup_ops);
+		return log_error_errno(NULL, ENOENT, "No cgroup hierarchies found");
+	}
 	if (cgroup_ops->data_init(cgroup_ops)) {
 		cgroup_exit(cgroup_ops);
-		return log_error_errno(NULL, errno,
+		return log_error_errno(NULL, errno, "Failed to initialize cgroup data");
-				       "Failed to initialize cgroup data");
 	}
 	TRACE("Initialized cgroup driver %s", cgroup_ops->driver);
@@ -68,6 +72,9 @@ void cgroup_exit(struct cgroup_ops *ops)
 	if (ops->cgroup2_devices)
 		bpf_program_free(ops->cgroup2_devices);
+	if (ops->dfd_mnt_cgroupfs_host >= 0)
+		close(ops->dfd_mnt_cgroupfs_host);
 	for (struct hierarchy **it = ops->hierarchies; it && *it; it++) {
 		for (char **p = (*it)->controllers; p && *p; p++)
 			free(*p);
@@ -79,12 +86,34 @@ void cgroup_exit(struct cgroup_ops *ops)
 		free((*it)->mountpoint);
 		free((*it)->container_base_path);
-		free((*it)->container_full_path);
-		free((*it)->monitor_full_path);
+		{
-		if ((*it)->cgfd_con >= 0)
+			free((*it)->container_full_path);
-			close((*it)->cgfd_con);
+			if ((*it)->container_full_path != (*it)->container_limit_path)
+				free((*it)->monitor_full_path);
+		}
+		{
+			if ((*it)->cgfd_limit >= 0 && (*it)->cgfd_con != (*it)->cgfd_limit)
+				close((*it)->cgfd_limit);
+			if ((*it)->cgfd_con >= 0)
+				close((*it)->cgfd_con);
+		}
 		if ((*it)->cgfd_mon >= 0)
 			close((*it)->cgfd_mon);
+		{
+			if ((*it)->dfd_base >= 0 && (*it)->dfd_mnt != (*it)->dfd_base)
+				close((*it)->dfd_base);
+			if ((*it)->dfd_mnt >= 0)
+				close((*it)->dfd_mnt);
+		}
 		free(*it);
 	}
 	free(ops->hierarchies);
@@ -95,21 +124,13 @@ void cgroup_exit(struct cgroup_ops *ops)
 }
 #define INIT_SCOPE "/init.scope"
-void prune_init_scope(char *cg)
+char *prune_init_scope(char *cg)
 {
-	char *point;
+	if (is_empty_string(cg))
+		return NULL;
-	if (!cg)
+	if (strnequal(cg, INIT_SCOPE, STRLITERALLEN(INIT_SCOPE)))
-		return;
+		return cg + STRLITERALLEN(INIT_SCOPE);
-	point = cg + strlen(cg) - strlen(INIT_SCOPE);
-	if (point < cg)
-		return;
-	if (strequal(point, INIT_SCOPE)) {
+	return cg;
-		if (point == cg)
-			*(point + 1) = '\0';
-		else
-			*point = '\0';
-	}
 }
--- a/src/lxc/cgroups/cgroup.h
+++ b/src/lxc/cgroups/cgroup.h
@@ -91,12 +91,24 @@ struct hierarchy {
 	unsigned int bpf_device_controller:1;
 	unsigned int freezer_controller:1;
-	/* container cgroup fd */
+	/* File descriptor for the container's cgroup @container_full_path. */
 	int cgfd_con;
-	/* limiting cgroup fd (may be equal to cgfd_con if not separated) */
+	/*
+	 * File descriptor for the container's limiting cgroup
+	 * @container_limit_path.
+	 * Will be equal to @cgfd_con if no limiting cgroup has been requested.
+	 */
 	int cgfd_limit;
-	/* monitor cgroup fd */
+	/* File descriptor for the monitor's cgroup @monitor_full_path. */
 	int cgfd_mon;
+	/* File descriptor for the controller's mountpoint @mountpoint. */
+	int dfd_mnt;
+	/* File descriptor for the controller's base cgroup path @container_base_path. */
+	int dfd_base;
 };
 struct cgroup_ops {
@@ -106,6 +118,18 @@ struct cgroup_ops {
 	/* string constant */
 	const char *version;
+	/*
+	 * File descriptor for the host's cgroupfs mount.  On
+	 * CGROUP_LAYOUT_LEGACY or CGROUP_LAYOUT_HYBRID hybrid systems
+	 * @dfd_mnt_cgroupfs_host will be a tmpfs fd and the individual
+	 * controllers will be cgroupfs fds. On CGROUP_LAYOUT_UNIFIED it will
+	 * be a cgroupfs fd itself.
+	 *
+	 * So for CGROUP_LAYOUT_LEGACY or CGROUP_LAYOUT_HYBRID we allow
+	 * mountpoint crossing iff we cross from a tmpfs into a cgroupfs mount.
+	 * */
+	int dfd_mnt_cgroupfs_host;
 	/* What controllers is the container supposed to use. */
 	char **cgroup_use;
 	char *cgroup_pattern;
@@ -186,7 +210,7 @@ __hidden extern struct cgroup_ops *cgroup_init(struct lxc_conf *conf);
 __hidden extern void cgroup_exit(struct cgroup_ops *ops);
 define_cleanup_function(struct cgroup_ops *, cgroup_exit);
-__hidden extern void prune_init_scope(char *cg);
+__hidden extern char *prune_init_scope(char *cg);
 __hidden extern int cgroup_attach(const struct lxc_conf *conf, const char *name,
 				  const char *lxcpath, pid_t pid);

--- a/src/lxc/cgroups/cgroup_utils.c
+++ b/src/lxc/cgroups/cgroup_utils.c
@@ -83,22 +83,6 @@ bool test_writeable_v2(char *mountpoint, char *path)
 	return (access(cgroup_threads_file, W_OK) == 0);
 }
-int unified_cgroup_hierarchy(void)
-{
-	int ret;
-	struct statfs fs;
-	ret = statfs(DEFAULT_CGROUP_MOUNTPOINT, &fs);
-	if (ret < 0)
-		return -ENOMEDIUM;
-	if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC))
-		return CGROUP2_SUPER_MAGIC;
-	return 0;
-}
 int unified_cgroup_fd(int fd)
 {

--- a/src/lxc/cgroups/cgroup_utils.h
+++ b/src/lxc/cgroups/cgroup_utils.h
@@ -29,8 +29,6 @@ __hidden extern bool test_writeable_v1(char *mountpoint, char *path);
 */
 __hidden extern bool test_writeable_v2(char *mountpoint, char *path);
-__hidden extern int unified_cgroup_hierarchy(void);
 __hidden extern int unified_cgroup_fd(int fd);
 static inline bool cgns_supported(void)

--- a/src/lxc/file_utils.c
+++ b/src/lxc/file_utils.c
@@ -31,15 +31,15 @@ int lxc_readat(int dirfd, const char *filename, void *buf, size_t count)
 	__do_close int fd = -EBADF;
 	ssize_t ret;
-	fd = openat(dirfd, filename, O_RDONLY | O_CLOEXEC);
+	fd = open_at(dirfd, filename, PROTECT_OPEN, PROTECT_LOOKUP_BENEATH, 0);
 	if (fd < 0)
-		return -1;
+		return -errno;
 	ret = lxc_read_nointr(fd, buf, count);
-	if (ret < 0 || (size_t)ret != count)
+	if (ret < 0)
-		return -1;
+		return -errno;
-	return 0;
+	return ret;
 }
 int lxc_writeat(int dirfd, const char *filename, const void *buf, size_t count)
@@ -630,21 +630,31 @@ int timens_offset_write(clockid_t clk_id, int64_t s_offset, int64_t ns_offset)
 bool exists_dir_at(int dir_fd, const char *path)
 {
-	struct stat sb;
 	int ret;
+	struct stat sb;
 	ret = fstatat(dir_fd, path, &sb, 0);
 	if (ret < 0)
 		return false;
-	return S_ISDIR(sb.st_mode);
+	ret = S_ISDIR(sb.st_mode);
+	if (ret)
+		errno = EEXIST;
+	else
+		errno = ENOTDIR;
+	return ret;
 }
 bool exists_file_at(int dir_fd, const char *path)
 {
+	int ret;
 	struct stat sb;
-	return fstatat(dir_fd, path, &sb, 0) == 0;
+	ret = fstatat(dir_fd, path, &sb, 0);
+	if (ret == 0)
+		errno = EEXIST;
+	return ret == 0;
 }
 int open_at(int dfd, const char *path, unsigned int o_flags,

--- a/src/lxc/log.h
+++ b/src/lxc/log.h
@@ -501,6 +501,20 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo,	\
 		__internal_ret__;                             \
 	})
+#define syswarn(__ret__, format, ...)                         \
+	({                                                    \
+		typeof(__ret__) __internal_ret__ = (__ret__); \
+		SYSWARN(format, ##__VA_ARGS__);               \
+		__internal_ret__;                             \
+	})
+#define sysdebug(__ret__, format, ...)                        \
+	({                                                    \
+		typeof(__ret__) __internal_ret__ = (__ret__); \
+		SYSDEBUG(format, ##__VA_ARGS__);              \
+		__internal_ret__;                             \
+	})
 #define syserrno_set(__ret__, format, ...)                    \
 	({                                                    \
 		typeof(__ret__) __internal_ret__ = (__ret__); \

--- a/src/lxc/string_utils.c
+++ b/src/lxc/string_utils.c
@@ -813,6 +813,8 @@ char *must_make_path(const char *first, ...)
 	va_start(args, first);
 	while ((cur = va_arg(args, char *)) != NULL) {
 		buf_len = strlen(cur);
+		if (buf_len == 0)
+			continue;
 		full_len += buf_len;
 		if (cur[0] != '/')

--- a/src/lxc/string_utils.h
+++ b/src/lxc/string_utils.h
@@ -150,6 +150,11 @@ static inline bool abspath(const char *str)
 	return *str == '/';
 }
+static inline char *deabs(char *str)
+{
+	return str + strspn(str, "/");
+}
 #define strnprintf(buf, buf_size, ...)                                            \
 	({                                                                        \
 		int __ret_strnprintf;                                             \