Merge pull request #3207 from brauner/cgroup2_improvements_2

cgroups: improve container cgroup attaching

Merge pull request #3207 from brauner/cgroup2_improvements_2
8a7d02b7 · Stéphane Graber · GitHub · c10ac1b1 · 9994db51 · 8a7d02b7
Unverified Commit 8a7d02b7 authored Dec 04, 2019 by Stéphane Graber Committed by GitHub Dec 04, 2019
6 changed files
--- a/src/lxc/attach.c
+++ b/src/lxc/attach.c
@@ -1230,16 +1230,21 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function,
 		/* Attach to cgroup, if requested. */
 		if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
-			struct cgroup_ops *cgroup_ops;
+			/*
+			 * If this is the unified hierarchy cgroup_attach() is
+			 * enough.
+			 */
+			ret = cgroup_attach(name, lxcpath, pid);
+			if (ret) {
+				__do_cgroup_exit struct cgroup_ops *cgroup_ops = NULL;
-			cgroup_ops = cgroup_init(conf);
+				cgroup_ops = cgroup_init(conf);
-			if (!cgroup_ops)
+				if (!cgroup_ops)
-				goto on_error;
+					goto on_error;
-			if (!cgroup_ops->attach(cgroup_ops, name, lxcpath, pid))
+				if (!cgroup_ops->attach(cgroup_ops, name, lxcpath, pid))
-				goto on_error;
+					goto on_error;
+			}
-			cgroup_exit(cgroup_ops);
 			TRACE("Moved intermediate process %d into container's cgroups", pid);
 		}

--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -2202,38 +2202,14 @@ static inline char *build_full_cgpath_from_monitorpath(struct hierarchy *h,
 	return must_make_path(h->mountpoint, inpath, filename, NULL);
 }
-/* Technically, we're always at a delegation boundary here (This is especially
+static int cgroup_attach_leaf(int unified_fd, int64_t pid)
- * true when cgroup namespaces are available.). The reasoning is that in order
- * for us to have been able to start a container in the first place the root
- * cgroup must have been a leaf node. Now, either the container's init system
- * has populated the cgroup and kept it as a leaf node or it has created
- * subtrees. In the former case we will simply attach to the leaf node we
- * created when we started the container in the latter case we create our own
- * cgroup for the attaching process.
- */
-static int __cg_unified_attach(const struct hierarchy *h, const char *name,
-			       const char *lxcpath, const char *pidstr,
-			       size_t pidstr_len, const char *controller)
 {
-	__do_close_prot_errno int unified_fd = -EBADF;
 	int idx = 0;
 	int ret;
+	char pidstr[INTTYPE_TO_STRLEN(int64_t) + 1];
+	size_t pidstr_len;
-	unified_fd = lxc_cmd_get_cgroup2_fd(name, lxcpath);
+	pidstr_len = sprintf(pidstr, INT64_FMT, pid);
-	if (unified_fd < 0) {
-		__do_free char *base_path = NULL, *container_cgroup = NULL;
-		container_cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
-		/* not running */
-		if (!container_cgroup)
-			return 0;
-		base_path = must_make_path(h->mountpoint, container_cgroup, NULL);
-		unified_fd = open(base_path, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
-	}
-	if (unified_fd < 0)
-		return -1;
 	ret = lxc_writeat(unified_fd, "cgroup.procs", pidstr, pidstr_len);
 	if (ret == 0)
 		return 0;
@@ -2275,6 +2251,51 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name,
 	return -1;
 }
+int cgroup_attach(const char *name, const char *lxcpath, int64_t pid)
+{
+	__do_close_prot_errno int unified_fd = -EBADF;
+	unified_fd = lxc_cmd_get_cgroup2_fd(name, lxcpath);
+	if (unified_fd < 0)
+		return -1;
+	return cgroup_attach_leaf(unified_fd, pid);
+}
+/* Technically, we're always at a delegation boundary here (This is especially
+ * true when cgroup namespaces are available.). The reasoning is that in order
+ * for us to have been able to start a container in the first place the root
+ * cgroup must have been a leaf node. Now, either the container's init system
+ * has populated the cgroup and kept it as a leaf node or it has created
+ * subtrees. In the former case we will simply attach to the leaf node we
+ * created when we started the container in the latter case we create our own
+ * cgroup for the attaching process.
+ */
+static int __cg_unified_attach(const struct hierarchy *h, const char *name,
+			       const char *lxcpath, pid_t pid,
+			       const char *controller)
+{
+	__do_close_prot_errno int unified_fd = -EBADF;
+	int ret;
+	ret = cgroup_attach(name, lxcpath, pid);
+	if (ret < 0) {
+		__do_free char *path = NULL, *cgroup = NULL;
+		cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
+		/* not running */
+		if (!cgroup)
+			return 0;
+		path = must_make_path(h->mountpoint, cgroup, NULL);
+		unified_fd = open(path, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
+	}
+	if (unified_fd < 0)
+		return -1;
+	return cgroup_attach_leaf(unified_fd, pid);
+}
 __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, const char *name,
 					 const char *lxcpath, pid_t pid)
 {
@@ -2293,7 +2314,7 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, const char *name,
 		struct hierarchy *h = ops->hierarchies[i];
 		if (h->version == CGROUP2_SUPER_MAGIC) {
-			ret = __cg_unified_attach(h, name, lxcpath, pidstr, len,
+			ret = __cg_unified_attach(h, name, lxcpath, pid,
 						  h->controllers[0]);
 			if (ret < 0)
 				return false;
@@ -2750,10 +2771,8 @@ __cgfsng_ops bool cgfsng_devices_activate(struct cgroup_ops *ops,
 	struct lxc_list *it;
 	struct bpf_program *devices_old;
-	if (!unified)
+	if (!unified || !unified->bpf_device_controller ||
-		return false;
+	    !unified->container_full_path || lxc_list_empty(&conf->devices))
-	if (lxc_list_empty(&conf->devices))
 		return true;
 	devices = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);

--- a/src/lxc/cgroups/cgroup.h
+++ b/src/lxc/cgroups/cgroup.h
@@ -192,6 +192,8 @@ static inline void __auto_cgroup_exit__(struct cgroup_ops **ops)
 		cgroup_exit(*ops);
 }
+extern int cgroup_attach(const char *name, const char *lxcpath, int64_t pid);
 #define __do_cgroup_exit __attribute__((__cleanup__(__auto_cgroup_exit__)))
 #endif
--- a/src/lxc/commands.c
+++ b/src/lxc/commands.c
@@ -131,19 +131,15 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd)
 */
 static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
 {
-	int ret, rspfd;
+	__do_close_prot_errno int fd_rsp = -EBADF;
+	int ret;
 	struct lxc_cmd_rsp *rsp = &cmd->rsp;
-	ret = lxc_abstract_unix_recv_fds(sock, &rspfd, 1, rsp, sizeof(*rsp));
+	ret = lxc_abstract_unix_recv_fds(sock, &fd_rsp, 1, rsp, sizeof(*rsp));
-	if (ret < 0) {
+	if (ret < 0)
-		SYSWARN("Failed to receive response for command \"%s\"",
+		return log_warn_errno(-1,
-		        lxc_cmd_str(cmd->req.cmd));
+				      errno, "Failed to receive response for command \"%s\"",
+				      lxc_cmd_str(cmd->req.cmd));
-		if (errno == ECONNRESET)
-			return -1;
-		return -1;
-	}
 	TRACE("Command \"%s\" received response", lxc_cmd_str(cmd->req.cmd));
 	if (cmd->req.cmd == LXC_CMD_CONSOLE) {
@@ -156,33 +152,31 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
 			return 0;
 		rspdata = malloc(sizeof(*rspdata));
-		if (!rspdata) {
+		if (!rspdata)
-			errno = ENOMEM;
+			return log_warn_errno(-1,
-			ERROR("Failed to allocate response buffer for command \"%s\"",
+					      ENOMEM, "Failed to receive response for command \"%s\"",
-			      lxc_cmd_str(cmd->req.cmd));
+					      lxc_cmd_str(cmd->req.cmd));
-			return -1;
-		}
-		rspdata->masterfd = rspfd;
+		rspdata->masterfd = move_fd(fd_rsp);
 		rspdata->ttynum = PTR_TO_INT(rsp->data);
 		rsp->data = rspdata;
 	}
-	if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD)
+	if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD) {
-		rsp->data = INT_TO_PTR(rspfd);
+		int cgroup2_fd = move_fd(fd_rsp);
+		rsp->data = INT_TO_PTR(cgroup2_fd);
-	if (rsp->datalen == 0) {
-		DEBUG("Response data length for command \"%s\" is 0",
-		      lxc_cmd_str(cmd->req.cmd));
-		return ret;
 	}
+	if (rsp->datalen == 0)
+		return log_debug(ret,
+				 "Response data length for command \"%s\" is 0",
+				 lxc_cmd_str(cmd->req.cmd));
 	if ((rsp->datalen > LXC_CMD_DATA_MAX) &&
-	    (cmd->req.cmd != LXC_CMD_CONSOLE_LOG)) {
+	    (cmd->req.cmd != LXC_CMD_CONSOLE_LOG))
-		ERROR("Response data for command \"%s\" is too long: %d bytes > %d",
+		return log_error(-1, "Response data for command \"%s\" is too long: %d bytes > %d",
-		      lxc_cmd_str(cmd->req.cmd), rsp->datalen, LXC_CMD_DATA_MAX);
+				 lxc_cmd_str(cmd->req.cmd), rsp->datalen,
-		return -1;
+				 LXC_CMD_DATA_MAX);
-	}
 	if (cmd->req.cmd == LXC_CMD_CONSOLE_LOG) {
 		rsp->data = malloc(rsp->datalen + 1);
@@ -190,19 +184,16 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
 	} else {
 		rsp->data = malloc(rsp->datalen);
 	}
-	if (!rsp->data) {
+	if (!rsp->data)
-		errno = ENOMEM;
+		return log_error_errno(-1,
-		ERROR("Failed to allocate response buffer for command \"%s\"",
+				       ENOMEM, "Failed to allocate response buffer for command \"%s\"",
-		      lxc_cmd_str(cmd->req.cmd));
+				       lxc_cmd_str(cmd->req.cmd));
-		return -1;
-	}
 	ret = lxc_recv_nointr(sock, rsp->data, rsp->datalen, 0);
-	if (ret != rsp->datalen) {
+	if (ret != rsp->datalen)
-		SYSERROR("Failed to receive response data for command \"%s\"",
+		return log_error_errno(-1,
-		         lxc_cmd_str(cmd->req.cmd));
+				       errno, "Failed to receive response data for command \"%s\"",
-		return -1;
+				       lxc_cmd_str(cmd->req.cmd));
-	}
 	return ret;
 }
@@ -1305,8 +1296,11 @@ int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath)
 	};
 	ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
-	if (ret <= 0 || cmd.rsp.ret < 0)
+	if (ret < 0)
-		return error_log_errno(errno, "Failed to retrieve cgroup2 fd");
+		return -1;
+	if (cmd.rsp.ret < 0)
+		return log_debug_errno(-1, errno, "Failed to receive cgroup2 fd");
 	return PTR_TO_INT(cmd.rsp.data);
 }
@@ -1361,10 +1355,9 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
 		[LXC_CMD_GET_CGROUP2_FD]		= lxc_cmd_get_cgroup2_fd_callback,
 	};
-	if (req->cmd >= LXC_CMD_MAX) {
+	if (req->cmd >= LXC_CMD_MAX)
-		ERROR("Undefined command id %d", req->cmd);
+		return log_error_errno(-1, ENOENT, "Undefined command id %d", req->cmd);
-		return -1;
-	}
 	return cb[req->cmd](fd, req, handler, descr);
 }

--- a/src/lxc/log.h
+++ b/src/lxc/log.h
@@ -531,6 +531,25 @@ ATTR_UNUSED static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo,	\
 		__ret__;		      \
 	})
+#define log_warn_errno(__ret__, __errno__, format, ...) \
+	({						\
+		errno = __errno__;			\
+		SYSWARN(format, ##__VA_ARGS__);		\
+		__ret__;				\
+	})
+#define log_debug(__ret__, format, ...)	      \
+	({				      \
+		DEBUG(format, ##__VA_ARGS__); \
+		__ret__;		      \
+	})
+#define log_debug_errno(__ret__, __errno__, format, ...) \
+	({						 \
+		SYSDEBUG(format, ##__VA_ARGS__);	 \
+		__ret__;				 \
+	})
 extern int lxc_log_fd;
 extern int lxc_log_syslog(int facility);

--- a/src/lxc/macro.h
+++ b/src/lxc/macro.h
@@ -21,6 +21,10 @@
 #ifndef __LXC_MACRO_H
 #define __LXC_MACRO_H
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+#define __STDC_FORMAT_MACROS
 #include <asm/types.h>
 #include <limits.h>
 #include <linux/if_link.h>
@@ -39,6 +43,8 @@
 #define PATH_MAX 4096
 #endif
+#define INT64_FMT "%" PRId64
 /* Define __S_ISTYPE if missing from the C library. */
 #ifndef __S_ISTYPE
 #define __S_ISTYPE(mode, mask) (((mode)&S_IFMT) == (mask))