Unverified Commit 2c641705 by Stéphane Graber Committed by GitHub

Merge pull request #3633 from brauner/2021-01-25/criu

cgroups: fixes and cgroup2 improvements
parents 6f2f6590 928b065d
......@@ -153,15 +153,28 @@ static struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *contr
for (int i = 0; ops->hierarchies[i]; i++) {
if (!controller) {
/* This is the empty unified hierarchy. */
if (ops->hierarchies[i]->controllers &&
!ops->hierarchies[i]->controllers[0])
if (ops->hierarchies[i]->controllers && !ops->hierarchies[i]->controllers[0])
return ops->hierarchies[i];
continue;
} else if (pure_unified_layout(ops) &&
strcmp(controller, "devices") == 0) {
if (ops->unified->bpf_device_controller)
return ops->unified;
break;
}
/*
* Handle controllers with significant implementation changes
* from cgroup to cgroup2.
*/
if (pure_unified_layout(ops)) {
if (strcmp(controller, "devices") == 0) {
if (ops->unified->bpf_device_controller)
return ops->unified;
break;
} else if (strcmp(controller, "freezer") == 0) {
if (ops->unified->freezer_controller)
return ops->unified;
break;
}
}
if (string_in_list(ops->hierarchies[i]->controllers, controller))
......@@ -176,45 +189,6 @@ static struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *contr
return ret_set_errno(NULL, ENOENT);
}
#define BATCH_SIZE 50
static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
{
int newbatches = (newlen / BATCH_SIZE) + 1;
int oldbatches = (oldlen / BATCH_SIZE) + 1;
if (!*mem || newbatches > oldbatches)
*mem = must_realloc(*mem, newbatches * BATCH_SIZE);
}
static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
{
size_t full = oldlen + newlen;
batch_realloc(dest, oldlen, full + 1);
memcpy(*dest + oldlen, new, newlen + 1);
}
/* Slurp in a whole file */
static char *read_file(const char *fnam)
{
__do_free char *buf = NULL, *line = NULL;
__do_fclose FILE *f = NULL;
size_t len = 0, fulllen = 0;
int linelen;
f = fopen(fnam, "re");
if (!f)
return NULL;
while ((linelen = getline(&line, &len, f)) != -1) {
append_line(&buf, fulllen, line, linelen);
fulllen += linelen;
}
return move_ptr(buf);
}
/* Taken over modified from the kernel sources. */
#define NBITS 32 /* bits in uint32_t */
#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
......@@ -350,7 +324,7 @@ static bool cg_legacy_filter_and_set_cpus(const char *parent_cgroup,
bool flipped_bit = false;
fpath = must_make_path(parent_cgroup, "cpuset.cpus", NULL);
posscpus = read_file(fpath);
posscpus = read_file_at(-EBADF, fpath);
if (!posscpus)
return log_error_errno(false, errno, "Failed to read file \"%s\"", fpath);
......@@ -360,7 +334,7 @@ static bool cg_legacy_filter_and_set_cpus(const char *parent_cgroup,
return false;
if (file_exists(__ISOL_CPUS)) {
isolcpus = read_file(__ISOL_CPUS);
isolcpus = read_file_at(-EBADF, __ISOL_CPUS);
if (!isolcpus)
return log_error_errno(false, errno, "Failed to read file \"%s\"", __ISOL_CPUS);
......@@ -379,7 +353,7 @@ static bool cg_legacy_filter_and_set_cpus(const char *parent_cgroup,
}
if (file_exists(__OFFLINE_CPUS)) {
offlinecpus = read_file(__OFFLINE_CPUS);
offlinecpus = read_file_at(-EBADF, __OFFLINE_CPUS);
if (!offlinecpus)
return log_error_errno(false, errno, "Failed to read file \"%s\"", __OFFLINE_CPUS);
......@@ -691,14 +665,14 @@ static char **cg_unified_make_empty_controller(void)
return move_ptr(aret);
}
static char **cg_unified_get_controllers(const char *file)
static char **cg_unified_get_controllers(int dfd, const char *file)
{
__do_free char *buf = NULL;
__do_free_string_list char **aret = NULL;
char *sep = " \t\n";
char *tok;
buf = read_file(file);
buf = read_file_at(dfd, file);
if (!buf)
return NULL;
......@@ -1691,6 +1665,27 @@ __cgfsng_ops static void cgfsng_payload_finalize(struct cgroup_ops *ops)
if (!is_unified_hierarchy(h))
close_prot_errno_disarm(h->cgfd_con);
}
/*
* The checking for freezer support should obviously be done at cgroup
* initialization time but that doesn't work reliable. The freezer
* controller has been demoted (rightly so) to a simple file located in
* each non-root cgroup. At the time when the container is created we
* might still be located in /sys/fs/cgroup and so checking for
* cgroup.freeze won't tell us anything because this file doesn't exist
* in the root cgroup. We could then iterate through /sys/fs/cgroup and
* find an already existing cgroup and then check within that cgroup
* for the existence of cgroup.freeze but that will only work on
* systemd based hosts. Other init systems might not manage cgroups and
* so no cgroup will exist. So we defer until we have created cgroups
* for our container which means we check here.
*/
if (pure_unified_layout(ops) &&
!faccessat(ops->unified->cgfd_con, "cgroup.freeze", F_OK,
AT_SYMLINK_NOFOLLOW)) {
TRACE("Unified hierarchy supports freezer");
ops->unified->freezer_controller = 1;
}
}
/* cgroup-full:* is done, no need to create subdirs */
......@@ -3148,7 +3143,7 @@ static void cg_unified_delegate(char ***delegate)
char *token;
int idx;
buf = read_file("/sys/kernel/cgroup/delegate");
buf = read_file_at(-EBADF, "/sys/kernel/cgroup/delegate");
if (!buf) {
for (char **p = standard; p && *p; p++) {
idx = append_null_to_list((void ***)delegate);
......@@ -3186,9 +3181,9 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
* cgroups as our base in that case.
*/
if (!relative && (geteuid() == 0))
basecginfo = read_file("/proc/1/cgroup");
basecginfo = read_file_at(-EBADF, "/proc/1/cgroup");
else
basecginfo = read_file("/proc/self/cgroup");
basecginfo = read_file_at(-EBADF, "/proc/self/cgroup");
if (!basecginfo)
return ret_set_errno(-1, ENOMEM);
......@@ -3272,7 +3267,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
"cgroup.controllers",
NULL);
controller_list = cg_unified_get_controllers(cgv2_ctrl_path);
controller_list = cg_unified_get_controllers(-EBADF, cgv2_ctrl_path);
free(cgv2_ctrl_path);
if (!controller_list) {
controller_list = cg_unified_make_empty_controller();
......@@ -3315,9 +3310,9 @@ static char *cg_unified_get_current_cgroup(bool relative)
char *base_cgroup;
if (!relative && (geteuid() == 0))
basecginfo = read_file("/proc/1/cgroup");
basecginfo = read_file_at(-EBADF, "/proc/1/cgroup");
else
basecginfo = read_file("/proc/self/cgroup");
basecginfo = read_file_at(-EBADF, "/proc/self/cgroup");
if (!basecginfo)
return NULL;
......@@ -3336,12 +3331,11 @@ static char *cg_unified_get_current_cgroup(bool relative)
static int cg_unified_init(struct cgroup_ops *ops, bool relative,
bool unprivileged)
{
__do_free char *subtree_path = NULL;
__do_close int cgroup_root_fd = -EBADF;
__do_free char *base_cgroup = NULL, *controllers_path = NULL;
int ret;
char *mountpoint;
char **delegatable;
struct hierarchy *new;
char *base_cgroup = NULL;
ret = unified_cgroup_hierarchy();
if (ret == -ENOMEDIUM)
......@@ -3356,14 +3350,18 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative,
if (!relative)
prune_init_scope(base_cgroup);
cgroup_root_fd = openat(-EBADF, DEFAULT_CGROUP_MOUNTPOINT,
O_NOCTTY | O_CLOEXEC | O_NOFOLLOW | O_DIRECTORY);
if (cgroup_root_fd < 0)
return -errno;
/*
* We assume that the cgroup we're currently in has been delegated to
* us and we are free to further delege all of the controllers listed
* in cgroup.controllers further down the hierarchy.
*/
mountpoint = must_copy_string(DEFAULT_CGROUP_MOUNTPOINT);
subtree_path = must_make_path(mountpoint, base_cgroup, "cgroup.controllers", NULL);
delegatable = cg_unified_get_controllers(subtree_path);
controllers_path = must_make_path_relative(base_cgroup, "cgroup.controllers", NULL);
delegatable = cg_unified_get_controllers(cgroup_root_fd, controllers_path);
if (!delegatable)
delegatable = cg_unified_make_empty_controller();
if (!delegatable[0])
......@@ -3376,7 +3374,11 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative,
* controllers per container.
*/
new = add_hierarchy(&ops->hierarchies, delegatable, mountpoint, base_cgroup, CGROUP2_SUPER_MAGIC);
new = add_hierarchy(&ops->hierarchies,
delegatable,
must_copy_string(DEFAULT_CGROUP_MOUNTPOINT),
move_ptr(base_cgroup),
CGROUP2_SUPER_MAGIC);
if (unprivileged)
cg_unified_delegate(&new->cgroup2_chown);
......
......@@ -89,6 +89,7 @@ struct hierarchy {
/* cgroup2 only */
unsigned int bpf_device_controller:1;
unsigned int freezer_controller:1;
/* container cgroup fd */
int cgfd_con;
......
......@@ -404,7 +404,10 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
goto err;
}
ret = snprintf(log, sizeof(log), "/sys/fs/cgroup/freezer/%s", freezer_relative);
if (pure_unified_layout(cgroup_ops))
ret = snprintf(log, sizeof(log), "/sys/fs/cgroup/%s", freezer_relative);
else
ret = snprintf(log, sizeof(log), "/sys/fs/cgroup/freezer/%s", freezer_relative);
if (ret < 0 || ret >= sizeof(log))
goto err;
......
......@@ -592,3 +592,49 @@ int fd_make_nonblocking(int fd)
flags &= ~O_NONBLOCK;
return fcntl(fd, F_SETFL, flags);
}
#define BATCH_SIZE 50
static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
{
int newbatches = (newlen / BATCH_SIZE) + 1;
int oldbatches = (oldlen / BATCH_SIZE) + 1;
if (!*mem || newbatches > oldbatches)
*mem = must_realloc(*mem, newbatches * BATCH_SIZE);
}
static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
{
size_t full = oldlen + newlen;
batch_realloc(dest, oldlen, full + 1);
memcpy(*dest + oldlen, new, newlen + 1);
}
/* Slurp in a whole file */
char *read_file_at(int dfd, const char *fnam)
{
__do_close int fd = -EBADF;
__do_free char *buf = NULL, *line = NULL;
__do_fclose FILE *f = NULL;
size_t len = 0, fulllen = 0;
int linelen;
fd = openat(dfd, fnam, O_NOCTTY | O_CLOEXEC | O_NOFOLLOW | O_RDONLY);
if (fd < 0)
return NULL;
f = fdopen(fd, "re");
if (!f)
return NULL;
/* Transfer ownership to fdopen(). */
move_fd(fd);
while ((linelen = getline(&line, &len, f)) != -1) {
append_line(&buf, fulllen, line, linelen);
fulllen += linelen;
}
return move_ptr(buf);
}
......@@ -81,5 +81,6 @@ __hidden extern bool exists_dir_at(int dir_fd, const char *path);
__hidden extern bool exists_file_at(int dir_fd, const char *path);
__hidden extern int open_beneath(int dir_fd, const char *path, unsigned int flags);
__hidden int fd_make_nonblocking(int fd);
__hidden extern char *read_file_at(int dfd, const char *fnam);
#endif /* __LXC_FILE_UTILS_H */
......@@ -87,6 +87,16 @@ __hidden __attribute__((sentinel)) extern char *must_concat(size_t *len, const c
__hidden __attribute__((sentinel)) extern char *must_make_path(const char *first, ...);
__hidden __attribute__((sentinel)) extern char *must_append_path(char *first, ...);
#define must_make_path_relative(__first__, ...) \
({ \
char *__ptr__; \
if (*__first__ == '/') \
__ptr__ = must_make_path(".", __first__, __VA_ARGS__); \
else \
__ptr__ = must_make_path(__first__, __VA_ARGS__); \
__ptr__; \
})
/* Return copy of string @entry. Do not fail. */
__hidden extern char *must_copy_string(const char *entry);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment