Unverified Commit 858f6225 by Stéphane Graber Committed by GitHub

Merge pull request #3675 from brauner/2021-02-16/fixes

cgroups: second batch of cgroup fixes
parents 136b349c 060e54d6
......@@ -46,6 +46,7 @@
#include "memory_utils.h"
#include "mount_utils.h"
#include "storage/storage.h"
#include "string_utils.h"
#include "syscall_wrappers.h"
#include "utils.h"
......@@ -312,234 +313,11 @@ static ssize_t get_max_cpus(char *cpulist)
return cpus;
}
#define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
#define __OFFLINE_CPUS "/sys/devices/system/cpu/offline"
static bool cg_legacy_filter_and_set_cpus(const char *parent_cgroup,
char *child_cgroup, bool am_initialized)
{
__do_free char *cpulist = NULL, *fpath = NULL, *isolcpus = NULL,
*offlinecpus = NULL, *posscpus = NULL;
__do_free uint32_t *isolmask = NULL, *offlinemask = NULL,
*possmask = NULL;
int ret;
ssize_t i;
ssize_t maxisol = 0, maxoffline = 0, maxposs = 0;
bool flipped_bit = false;
fpath = must_make_path(parent_cgroup, "cpuset.cpus", NULL);
posscpus = read_file_at(-EBADF, fpath, PROTECT_OPEN, 0);
if (!posscpus)
return log_error_errno(false, errno, "Failed to read file \"%s\"", fpath);
/* Get maximum number of cpus found in possible cpuset. */
maxposs = get_max_cpus(posscpus);
if (maxposs < 0 || maxposs >= INT_MAX - 1)
return false;
if (file_exists(__ISOL_CPUS)) {
isolcpus = read_file_at(-EBADF, __ISOL_CPUS, PROTECT_OPEN, 0);
if (!isolcpus)
return log_error_errno(false, errno, "Failed to read file \"%s\"", __ISOL_CPUS);
if (isdigit(isolcpus[0])) {
/* Get maximum number of cpus found in isolated cpuset. */
maxisol = get_max_cpus(isolcpus);
if (maxisol < 0 || maxisol >= INT_MAX - 1)
return false;
}
if (maxposs < maxisol)
maxposs = maxisol;
maxposs++;
} else {
TRACE("The path \""__ISOL_CPUS"\" to read isolated cpus from does not exist");
}
if (file_exists(__OFFLINE_CPUS)) {
offlinecpus = read_file_at(-EBADF, __OFFLINE_CPUS, PROTECT_OPEN, 0);
if (!offlinecpus)
return log_error_errno(false, errno, "Failed to read file \"%s\"", __OFFLINE_CPUS);
if (isdigit(offlinecpus[0])) {
/* Get maximum number of cpus found in offline cpuset. */
maxoffline = get_max_cpus(offlinecpus);
if (maxoffline < 0 || maxoffline >= INT_MAX - 1)
return false;
}
if (maxposs < maxoffline)
maxposs = maxoffline;
maxposs++;
} else {
TRACE("The path \""__OFFLINE_CPUS"\" to read offline cpus from does not exist");
}
if ((maxisol == 0) && (maxoffline == 0)) {
cpulist = move_ptr(posscpus);
goto copy_parent;
}
possmask = lxc_cpumask(posscpus, maxposs);
if (!possmask)
return log_error_errno(false, errno, "Failed to create cpumask for possible cpus");
if (maxisol > 0) {
isolmask = lxc_cpumask(isolcpus, maxposs);
if (!isolmask)
return log_error_errno(false, errno, "Failed to create cpumask for isolated cpus");
}
if (maxoffline > 0) {
offlinemask = lxc_cpumask(offlinecpus, maxposs);
if (!offlinemask)
return log_error_errno(false, errno, "Failed to create cpumask for offline cpus");
}
for (i = 0; i <= maxposs; i++) {
if ((isolmask && !is_set(i, isolmask)) ||
(offlinemask && !is_set(i, offlinemask)) ||
!is_set(i, possmask))
continue;
flipped_bit = true;
clear_bit(i, possmask);
}
if (!flipped_bit) {
cpulist = lxc_cpumask_to_cpulist(possmask, maxposs);
TRACE("No isolated or offline cpus present in cpuset");
} else {
cpulist = move_ptr(posscpus);
TRACE("Removed isolated or offline cpus from cpuset");
}
if (!cpulist)
return log_error_errno(false, errno, "Failed to create cpu list");
copy_parent:
if (!am_initialized) {
ret = lxc_write_openat(child_cgroup, "cpuset.cpus", cpulist, strlen(cpulist));
if (ret < 0)
return log_error_errno(false,
errno, "Failed to write cpu list to \"%s/cpuset.cpus\"",
child_cgroup);
TRACE("Copied cpu settings of parent cgroup");
}
return true;
}
/* Copy contents of parent(@path)/@file to @path/@file */
static bool copy_parent_file(const char *parent_cgroup,
const char *child_cgroup, const char *file)
{
__do_free char *parent_file = NULL, *value = NULL;
int len = 0;
int ret;
parent_file = must_make_path(parent_cgroup, file, NULL);
len = lxc_read_from_file(parent_file, NULL, 0);
if (len <= 0)
return log_error_errno(false, errno, "Failed to determine buffer size");
value = must_realloc(NULL, len + 1);
value[len] = '\0';
ret = lxc_read_from_file(parent_file, value, len);
if (ret != len)
return log_error_errno(false, errno, "Failed to read from parent file \"%s\"", parent_file);
ret = lxc_write_openat(child_cgroup, file, value, len);
if (ret < 0 && errno != EACCES)
return log_error_errno(false, errno, "Failed to write \"%s\" to file \"%s/%s\"",
value, child_cgroup, file);
return true;
}
static inline bool is_unified_hierarchy(const struct hierarchy *h)
{
return h->version == CGROUP2_SUPER_MAGIC;
}
/*
* Initialize the cpuset hierarchy in first directory of @cgroup_leaf and set
* cgroup.clone_children so that children inherit settings. Since the
* h->base_path is populated by init or ourselves, we know it is already
* initialized.
*
* returns -1 on error, 0 when we didn't created a cgroup, 1 if we created a
* cgroup.
*/
static int cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h,
const char *cgroup_leaf)
{
__do_free char *parent_cgroup = NULL, *child_cgroup = NULL, *dup = NULL;
__do_close int cgroup_fd = -EBADF;
int fret = -1;
int ret;
char v;
char *leaf, *slash;
if (is_unified_hierarchy(h))
return 0;
if (!string_in_list(h->controllers, "cpuset"))
return 0;
if (!cgroup_leaf)
return ret_set_errno(-1, EINVAL);
dup = strdup(cgroup_leaf);
if (!dup)
return ret_set_errno(-1, ENOMEM);
parent_cgroup = must_make_path(h->mountpoint, h->container_base_path, NULL);
leaf = dup;
leaf += strspn(leaf, "/");
slash = strchr(leaf, '/');
if (slash)
*slash = '\0';
child_cgroup = must_make_path(parent_cgroup, leaf, NULL);
if (slash)
*slash = '/';
fret = 1;
ret = mkdir(child_cgroup, 0755);
if (ret < 0) {
if (errno != EEXIST)
return log_error_errno(-1, errno, "Failed to create directory \"%s\"", child_cgroup);
fret = 0;
}
cgroup_fd = lxc_open_dirfd(child_cgroup);
if (cgroup_fd < 0)
return -1;
ret = lxc_readat(cgroup_fd, "cgroup.clone_children", &v, 1);
if (ret < 0)
return log_error_errno(-1, errno, "Failed to read file \"%s/cgroup.clone_children\"", child_cgroup);
/* Make sure any isolated cpus are removed from cpuset.cpus. */
if (!cg_legacy_filter_and_set_cpus(parent_cgroup, child_cgroup, v == '1'))
return log_error_errno(-1, errno, "Failed to remove isolated cpus");
/* Already set for us by someone else. */
if (v == '1')
TRACE("\"cgroup.clone_children\" was already set to \"1\"");
/* copy parent's settings */
if (!copy_parent_file(parent_cgroup, child_cgroup, "cpuset.mems"))
return log_error_errno(-1, errno, "Failed to copy \"cpuset.mems\" settings");
/* Set clone_children so children inherit our settings */
ret = lxc_writeat(cgroup_fd, "cgroup.clone_children", "1", 1);
if (ret < 0)
return log_error_errno(-1, errno, "Failed to write 1 to \"%s/cgroup.clone_children\"", child_cgroup);
return fret;
}
/* Given two null-terminated lists of strings, return true if any string is in
* both.
*/
......@@ -691,26 +469,101 @@ static char **cg_unified_get_controllers(int dfd, const char *file)
return move_ptr(aret);
}
static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char *mountpoint,
char *container_base_path, int type)
static bool cgroup_use_wants_controllers(const struct cgroup_ops *ops,
char **controllers)
{
if (!ops->cgroup_use)
return true;
for (char **cur_ctrl = controllers; cur_ctrl && *cur_ctrl; cur_ctrl++) {
bool found = false;
for (char **cur_use = ops->cgroup_use; cur_use && *cur_use; cur_use++) {
if (!strequal(*cur_use, *cur_ctrl))
continue;
found = true;
break;
}
if (found)
continue;
return false;
}
return true;
}
static int add_hierarchy(struct cgroup_ops *ops, char **clist, char *mountpoint,
char *container_base_path, int type)
{
struct hierarchy *new;
__do_close int dfd_base = -EBADF, dfd_mnt = -EBADF;
__do_free struct hierarchy *new = NULL;
__do_free_string_list char **controllers = clist;
int newentry;
if (abspath(container_base_path))
return syserrno(-errno, "Container base path must be relative to controller mount");
if (!controllers && type != CGROUP2_SUPER_MAGIC)
return syserrno_set(-EINVAL, "Empty controller list for non-unified cgroup hierarchy passed");
dfd_mnt = open_at(-EBADF, mountpoint, PROTECT_OPATH_DIRECTORY,
PROTECT_LOOKUP_ABSOLUTE_XDEV, 0);
if (dfd_mnt < 0)
return syserrno(-errno, "Failed to open %s", mountpoint);
if (is_empty_string(container_base_path))
dfd_base = dfd_mnt;
else
dfd_base = open_at(dfd_mnt, container_base_path,
PROTECT_OPATH_DIRECTORY,
PROTECT_LOOKUP_BENEATH_XDEV, 0);
if (dfd_base < 0)
return syserrno(-errno, "Failed to open %d(%s)", dfd_base, container_base_path);
if (!controllers) {
/*
* We assume that the cgroup we're currently in has been delegated to
* us and we are free to further delege all of the controllers listed
* in cgroup.controllers further down the hierarchy.
*/
controllers = cg_unified_get_controllers(dfd_base, "cgroup.controllers");
if (!controllers)
controllers = cg_unified_make_empty_controller();
if (!controllers[0])
TRACE("No controllers are enabled for delegation");
}
/* Exclude all controllers that cgroup use does not want. */
if (!cgroup_use_wants_controllers(ops, controllers))
return log_trace(0, "Skipping cgroup hiearchy with non-requested controllers");
new = zalloc(sizeof(*new));
if (!new)
return ret_set_errno(NULL, ENOMEM);
new->controllers = clist;
new->mountpoint = mountpoint;
new->container_base_path = container_base_path;
new->version = type;
new->cgfd_con = -EBADF;
new->cgfd_limit = -EBADF;
new->cgfd_mon = -EBADF;
newentry = append_null_to_list((void ***)h);
(*h)[newentry] = new;
return new;
return ret_errno(ENOMEM);
new->version = type;
new->controllers = move_ptr(controllers);
new->mountpoint = mountpoint;
new->container_base_path = container_base_path;
new->cgfd_con = -EBADF;
new->cgfd_limit = -EBADF;
new->cgfd_mon = -EBADF;
TRACE("Adding cgroup hierarchy with mountpoint %s and base cgroup %s",
mountpoint, container_base_path);
for (char *const *it = new->controllers; it && *it; it++)
TRACE("The detected hierarchy contains the %s controller", *it);
newentry = append_null_to_list((void ***)&ops->hierarchies);
new->dfd_mnt = move_fd(dfd_mnt);
new->dfd_base = move_fd(dfd_base);
if (type == CGROUP2_SUPER_MAGIC)
ops->unified = new;
(ops->hierarchies)[newentry] = move_ptr(new);
return 0;
}
/* Get a copy of the mountpoint from @line, which is a line from
......@@ -788,38 +641,69 @@ static bool controller_in_clist(char *cgline, char *c)
return false;
}
static inline char *trim(char *s)
{
size_t len;
len = strlen(s);
while ((len > 1) && (s[len - 1] == '\n'))
s[--len] = '\0';
return s;
}
/* @basecginfo is a copy of /proc/$$/cgroup. Return the current cgroup for
* @controller.
*/
static char *cg_hybrid_get_current_cgroup(char *basecginfo, char *controller,
int type)
static char *cg_hybrid_get_current_cgroup(bool relative, char *basecginfo,
char *controller, int type)
{
char *p = basecginfo;
char *base_cgroup = basecginfo;
for (;;) {
bool is_cgv2_base_cgroup = false;
/* cgroup v2 entry in "/proc/<pid>/cgroup": "0::/some/path" */
if ((type == CGROUP2_SUPER_MAGIC) && (*p == '0'))
if ((type == CGROUP2_SUPER_MAGIC) && (*base_cgroup == '0'))
is_cgv2_base_cgroup = true;
p = strchr(p, ':');
if (!p)
base_cgroup = strchr(base_cgroup, ':');
if (!base_cgroup)
return NULL;
p++;
base_cgroup++;
if (is_cgv2_base_cgroup || (controller && controller_in_clist(base_cgroup, controller))) {
__do_free char *copy = NULL;
if (is_cgv2_base_cgroup || (controller && controller_in_clist(p, controller))) {
p = strchr(p, ':');
if (!p)
base_cgroup = strchr(base_cgroup, ':');
if (!base_cgroup)
return NULL;
p++;
return copy_to_eol(p);
base_cgroup++;
copy = copy_to_eol(base_cgroup);
if (!copy)
return NULL;
trim(copy);
if (!relative) {
base_cgroup = prune_init_scope(copy);
if (!base_cgroup)
return NULL;
} else {
base_cgroup = copy;
}
if (abspath(base_cgroup))
base_cgroup = deabs(base_cgroup);
/* We're allowing base_cgroup to be "". */
return strdup(base_cgroup);
}
p = strchr(p, '\n');
if (!p)
base_cgroup = strchr(base_cgroup, '\n');
if (!base_cgroup)
return NULL;
p++;
base_cgroup++;
}
}
......@@ -877,40 +761,6 @@ static int get_existing_subsystems(char ***klist, char ***nlist)
return 0;
}
static char *trim(char *s)
{
size_t len;
len = strlen(s);
while ((len > 1) && (s[len - 1] == '\n'))
s[--len] = '\0';
return s;
}
static void lxc_cgfsng_print_hierarchies(struct cgroup_ops *ops)
{
int i;
struct hierarchy **it;
if (!ops->hierarchies) {
TRACE(" No hierarchies found");
return;
}
TRACE(" Hierarchies:");
for (i = 0, it = ops->hierarchies; it && *it; it++, i++) {
int j;
char **cit;
TRACE(" %d: base_cgroup: %s", i, (*it)->container_base_path ? (*it)->container_base_path : "(null)");
TRACE(" mountpoint: %s", (*it)->mountpoint ? (*it)->mountpoint : "(null)");
TRACE(" controllers:");
for (j = 0, cit = (*it)->controllers; cit && *cit; cit++, j++)
TRACE(" %d: %s", j, *cit);
}
}
static void lxc_cgfsng_print_basecg_debuginfo(char *basecginfo, char **klist,
char **nlist)
{
......@@ -1023,118 +873,223 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
} else {
ret = cgroup_tree_remove(ops->hierarchies, ops->container_cgroup);
}
if (ret < 0)
SYSWARN("Failed to destroy cgroups");
}
if (ret < 0)
SYSWARN("Failed to destroy cgroups");
}
#define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
#define __OFFLINE_CPUS "/sys/devices/system/cpu/offline"
static bool cpuset1_cpus_initialize(int dfd_parent, int dfd_child,
bool am_initialized)
{
__do_free char *cpulist = NULL, *fpath = NULL, *isolcpus = NULL,
*offlinecpus = NULL, *posscpus = NULL;
__do_free uint32_t *isolmask = NULL, *offlinemask = NULL,
*possmask = NULL;
int ret;
ssize_t i;
ssize_t maxisol = 0, maxoffline = 0, maxposs = 0;
bool flipped_bit = false;
posscpus = read_file_at(dfd_parent, "cpuset.cpus", PROTECT_OPEN, 0);
if (!posscpus)
return log_error_errno(false, errno, "Failed to read file \"%s\"", fpath);
/* Get maximum number of cpus found in possible cpuset. */
maxposs = get_max_cpus(posscpus);
if (maxposs < 0 || maxposs >= INT_MAX - 1)
return false;
if (file_exists(__ISOL_CPUS)) {
isolcpus = read_file_at(-EBADF, __ISOL_CPUS, PROTECT_OPEN, 0);
if (!isolcpus)
return log_error_errno(false, errno, "Failed to read file \"%s\"", __ISOL_CPUS);
if (isdigit(isolcpus[0])) {
/* Get maximum number of cpus found in isolated cpuset. */
maxisol = get_max_cpus(isolcpus);
if (maxisol < 0 || maxisol >= INT_MAX - 1)
return false;
}
if (maxposs < maxisol)
maxposs = maxisol;
maxposs++;
} else {
TRACE("The path \""__ISOL_CPUS"\" to read isolated cpus from does not exist");
}
if (file_exists(__OFFLINE_CPUS)) {
offlinecpus = read_file_at(-EBADF, __OFFLINE_CPUS, PROTECT_OPEN, 0);
if (!offlinecpus)
return log_error_errno(false, errno, "Failed to read file \"%s\"", __OFFLINE_CPUS);
if (isdigit(offlinecpus[0])) {
/* Get maximum number of cpus found in offline cpuset. */
maxoffline = get_max_cpus(offlinecpus);
if (maxoffline < 0 || maxoffline >= INT_MAX - 1)
return false;
}
if (maxposs < maxoffline)
maxposs = maxoffline;
maxposs++;
} else {
TRACE("The path \""__OFFLINE_CPUS"\" to read offline cpus from does not exist");
}
if ((maxisol == 0) && (maxoffline == 0)) {
cpulist = move_ptr(posscpus);
goto copy_parent;
}
__cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
struct lxc_handler *handler)
{
int len;
char pidstr[INTTYPE_TO_STRLEN(pid_t)];
const struct lxc_conf *conf;
possmask = lxc_cpumask(posscpus, maxposs);
if (!possmask)
return log_error_errno(false, errno, "Failed to create cpumask for possible cpus");
if (!ops) {
ERROR("Called with uninitialized cgroup operations");
return;
if (maxisol > 0) {
isolmask = lxc_cpumask(isolcpus, maxposs);
if (!isolmask)
return log_error_errno(false, errno, "Failed to create cpumask for isolated cpus");
}
if (!ops->hierarchies)
return;
if (maxoffline > 0) {
offlinemask = lxc_cpumask(offlinecpus, maxposs);
if (!offlinemask)
return log_error_errno(false, errno, "Failed to create cpumask for offline cpus");
}
if (!handler) {
ERROR("Called with uninitialized handler");
return;
for (i = 0; i <= maxposs; i++) {
if ((isolmask && !is_set(i, isolmask)) ||
(offlinemask && !is_set(i, offlinemask)) ||
!is_set(i, possmask))
continue;
flipped_bit = true;
clear_bit(i, possmask);
}
if (!handler->conf) {
ERROR("Called with uninitialized conf");
return;
if (!flipped_bit) {
cpulist = lxc_cpumask_to_cpulist(possmask, maxposs);
TRACE("No isolated or offline cpus present in cpuset");
} else {
cpulist = move_ptr(posscpus);
TRACE("Removed isolated or offline cpus from cpuset");
}
conf = handler->conf;
if (!cpulist)
return log_error_errno(false, errno, "Failed to create cpu list");
len = strnprintf(pidstr, sizeof(pidstr), "%d", handler->monitor_pid);
if (len < 0)
return;
copy_parent:
if (!am_initialized) {
ret = lxc_writeat(dfd_child, "cpuset.cpus", cpulist, strlen(cpulist));
if (ret < 0)
return log_error_errno(false, errno, "Failed to write cpu list to \"%d/cpuset.cpus\"", dfd_child);
for (int i = 0; ops->hierarchies[i]; i++) {
__do_free char *pivot_path = NULL;
struct hierarchy *h = ops->hierarchies[i];
size_t offset;
int ret;
TRACE("Copied cpu settings of parent cgroup");
}
if (!h->monitor_full_path)
continue;
return true;
}
/* Monitor might have died before we entered the cgroup. */
if (handler->monitor_pid <= 0) {
WARN("No valid monitor process found while destroying cgroups");
goto try_lxc_rm_rf;
}
static bool cpuset1_initialize(int dfd_base, int dfd_next)
{
char mems[PATH_MAX];
ssize_t bytes;
char v;
if (conf->cgroup_meta.monitor_pivot_dir)
pivot_path = must_make_path(h->mountpoint, h->container_base_path,
conf->cgroup_meta.monitor_pivot_dir, CGROUP_PIVOT, NULL);
else if (conf->cgroup_meta.monitor_dir)
pivot_path = must_make_path(h->mountpoint, h->container_base_path,
conf->cgroup_meta.monitor_dir, CGROUP_PIVOT, NULL);
else if (conf->cgroup_meta.dir)
pivot_path = must_make_path(h->mountpoint, h->container_base_path,
conf->cgroup_meta.dir, CGROUP_PIVOT, NULL);
else
pivot_path = must_make_path(h->mountpoint, h->container_base_path,
CGROUP_PIVOT, NULL);
/*
* Determine whether the base cgroup has cpuset
* inheritance turned on.
*/
bytes = lxc_readat(dfd_base, "cgroup.clone_children", &v, 1);
if (bytes < 0)
return syserrno(false, "Failed to read file %d(cgroup.clone_children)", dfd_base);
offset = strlen(h->mountpoint) + strlen(h->container_base_path);
/*
* Initialize cpuset.cpus and make remove any isolated
* and offline cpus.
*/
if (!cpuset1_cpus_initialize(dfd_base, dfd_next, v == '1'))
return syserrno(false, "Failed to initialize cpuset.cpus");
if (cg_legacy_handle_cpuset_hierarchy(h, pivot_path + offset))
SYSWARN("Failed to initialize cpuset %s/" CGROUP_PIVOT, pivot_path);
/* Read cpuset.mems from parent... */
bytes = lxc_readat(dfd_base, "cpuset.mems", mems, sizeof(mems));
if (bytes < 0)
return syserrno(false, "Failed to read file %d(cpuset.mems)", dfd_base);
ret = mkdir_p(pivot_path, 0755);
if (ret < 0 && errno != EEXIST) {
ERROR("Failed to create %s", pivot_path);
goto try_lxc_rm_rf;
}
/* ... and copy to first cgroup in the tree... */
bytes = lxc_writeat(dfd_next, "cpuset.mems", mems, bytes);
if (bytes < 0)
return syserrno(false, "Failed to write %d(cpuset.mems)", dfd_next);
ret = lxc_write_openat(pivot_path, "cgroup.procs", pidstr, len);
if (ret != 0) {
SYSWARN("Failed to move monitor %s to \"%s\"", pidstr, pivot_path);
continue;
}
/* ... and finally turn on cpuset inheritance. */
bytes = lxc_writeat(dfd_next, "cgroup.clone_children", "1", 1);
if (bytes < 0)
return syserrno(false, "Failed to write %d(cgroup.clone_children)", dfd_next);
try_lxc_rm_rf:
ret = lxc_rm_rf(h->monitor_full_path);
if (ret < 0)
WARN("Failed to destroy \"%s\"", h->monitor_full_path);
}
return log_trace(true, "Initialized cpuset in the legacy hierarchy");
}
static int mkdir_eexist_on_last(const char *dir, mode_t mode)
static int __cgroup_tree_create(int dfd_base, const char *path, mode_t mode,
bool cpuset_v1, bool eexist_ignore)
{
const char *tmp = dir;
const char *orig = dir;
size_t orig_len;
__do_close int dfd_final = -EBADF;
int dfd_cur = dfd_base;
int ret = 0;
size_t len;
char *cur;
char buf[PATH_MAX];
orig_len = strlen(dir);
do {
__do_free char *makeme = NULL;
int ret;
size_t cur_len;
if (is_empty_string(path))
return ret_errno(-EINVAL);
len = strlcpy(buf, path, sizeof(buf));
if (len >= sizeof(buf))
return -E2BIG;
lxc_iterate_parts(cur, buf, "/") {
/*
* Even though we vetted the paths when we parsed the config
* we're paranoid here and check that the path is neither
* absolute nor walks upwards.
*/
if (abspath(buf))
return syserrno_set(-EINVAL, "No absolute paths allowed");
dir = tmp + strspn(tmp, "/");
tmp = dir + strcspn(dir, "/");
if (strnequal(buf, "..", STRLITERALLEN("..")))
return syserrno_set(-EINVAL, "No upward walking paths allowed");
cur_len = dir - orig;
makeme = strndup(orig, cur_len);
if (!makeme)
return ret_set_errno(-1, ENOMEM);
ret = mkdirat(dfd_cur, cur, mode);
if (ret < 0) {
if (errno != EEXIST)
return syserrno(-errno, "Failed to create %d(%s)", dfd_cur, cur);
ret = mkdir(makeme, mode);
if (ret < 0 && ((errno != EEXIST) || (orig_len == cur_len)))
return log_warn_errno(-1, errno, "Failed to create directory \"%s\"", makeme);
} while (tmp != dir);
ret = -EEXIST;
}
TRACE("%s %d(%s) cgroup", !ret ? "Created" : "Reusing", dfd_cur, cur);
dfd_final = open_at(dfd_cur, cur, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH, 0);
if (dfd_final < 0)
return syserrno(-errno, "Fail to open%s directory %d(%s)",
!ret ? " newly created" : "", dfd_base, cur);
if (dfd_cur != dfd_base)
close(dfd_cur);
else if (cpuset_v1 && !cpuset1_initialize(dfd_base, dfd_final))
return syserrno(-EINVAL, "Failed to initialize cpuset controller in the legacy hierarchy");
/*
* Leave dfd_final pointing to the last fd we opened so
* it will be automatically zapped if we return early.
*/
dfd_cur = dfd_final;
}
return 0;
/* The final cgroup must be succesfully creatd by us. */
if (ret) {
if (ret != -EEXIST || !eexist_ignore)
return syserrno_set(ret, "Creating the final cgroup %d(%s) failed", dfd_base, path);
}
return move_fd(dfd_final);
}
static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
......@@ -1142,34 +1097,27 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
const char *cgroup_leaf, bool payload,
const char *cgroup_limit_dir)
{
__do_close int fd_limit = -EBADF, fd_final = -EBADF;
__do_free char *path = NULL, *limit_path = NULL;
int ret, ret_cpuset;
bool cpuset_v1 = false;
path = must_make_path(h->mountpoint, h->container_base_path, cgroup_leaf, NULL);
if (dir_exists(path))
return log_warn_errno(false, errno, "The %s cgroup already existed", path);
/* Don't bother with all the rest if the final cgroup already exists. */
if (exists_dir_at(h->dfd_base, cgroup_leaf))
return syswarn(false, "The %d(%s) cgroup already existed", h->dfd_base, cgroup_leaf);
ret_cpuset = cg_legacy_handle_cpuset_hierarchy(h, cgroup_leaf);
if (ret_cpuset < 0)
return log_error_errno(false, errno, "Failed to handle legacy cpuset controller");
/*
* The legacy cpuset controller needs massaging in case inheriting
* settings from its immediate ancestor cgroup hasn't been turned on.
*/
cpuset_v1 = !is_unified_hierarchy(h) && string_in_list(h->controllers, "cpuset");
if (payload && cgroup_limit_dir) {
/* with isolation both parts need to not already exist */
limit_path = must_make_path(h->mountpoint,
h->container_base_path,
cgroup_limit_dir, NULL);
/* With isolation both parts need to not already exist. */
fd_limit = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, cpuset_v1, false);
if (fd_limit < 0)
return syserrno(false, "Failed to create limiting cgroup %d(%s)", h->dfd_base, cgroup_limit_dir);
ret = mkdir_eexist_on_last(limit_path, 0755);
if (ret < 0)
return log_debug_errno(false,
errno, "Failed to create %s limiting cgroup",
limit_path);
h->cgfd_limit = lxc_open_dirfd(limit_path);
if (h->cgfd_limit < 0)
return log_error_errno(false, errno,
"Failed to open %s", path);
h->container_limit_path = move_ptr(limit_path);
limit_path = must_make_path(h->mountpoint, h->container_base_path, cgroup_limit_dir, NULL);
/*
* With isolation the devices legacy cgroup needs to be
......@@ -1182,30 +1130,26 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
return log_error(false, "Failed to setup legacy device limits");
}
ret = mkdir_eexist_on_last(path, 0755);
if (ret < 0) {
/*
* This is the cpuset controller and
* cg_legacy_handle_cpuset_hierarchy() has created our target
* directory for us to ensure correct initialization.
*/
if (ret_cpuset != 1 || cgroup_tree)
return log_debug_errno(false, errno, "Failed to create %s cgroup", path);
}
fd_final = __cgroup_tree_create(h->dfd_base, cgroup_leaf, 0755, cpuset_v1, false);
if (fd_final < 0)
return syserrno(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
path = must_make_path(h->mountpoint, h->container_base_path, cgroup_leaf, NULL);
if (payload) {
h->cgfd_con = lxc_open_dirfd(path);
if (h->cgfd_con < 0)
return log_error_errno(false, errno, "Failed to open %s", path);
h->cgfd_con = move_fd(fd_final);
h->container_full_path = move_ptr(path);
if (h->cgfd_limit < 0)
if (fd_limit < 0)
h->cgfd_limit = h->cgfd_con;
if (!h->container_limit_path)
else
h->cgfd_limit = move_fd(fd_limit);
if (!limit_path)
h->container_limit_path = h->container_full_path;
else
h->container_limit_path = move_ptr(limit_path);
} else {
h->cgfd_mon = lxc_open_dirfd(path);
if (h->cgfd_mon < 0)
return log_error_errno(false, errno, "Failed to open %s", path);
h->cgfd_mon = move_fd(fd_final);
h->monitor_full_path = move_ptr(path);
}
......@@ -1234,6 +1178,82 @@ static void cgroup_tree_leaf_remove(struct hierarchy *h, bool payload)
SYSWARN("Failed to rmdir(\"%s\") cgroup", limit_path);
}
__cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
struct lxc_handler *handler)
{
int len;
char pidstr[INTTYPE_TO_STRLEN(pid_t)];
const struct lxc_conf *conf;
if (!ops) {
ERROR("Called with uninitialized cgroup operations");
return;
}
if (!ops->hierarchies)
return;
if (!handler) {
ERROR("Called with uninitialized handler");
return;
}
if (!handler->conf) {
ERROR("Called with uninitialized conf");
return;
}
conf = handler->conf;
len = strnprintf(pidstr, sizeof(pidstr), "%d", handler->monitor_pid);
if (len < 0)
return;
for (int i = 0; ops->hierarchies[i]; i++) {
__do_close int fd_pivot = -EBADF;
__do_free char *pivot_path = NULL;
struct hierarchy *h = ops->hierarchies[i];
bool cpuset_v1 = false;
int ret;
if (!h->monitor_full_path)
continue;
/* Monitor might have died before we entered the cgroup. */
if (handler->monitor_pid <= 0) {
WARN("No valid monitor process found while destroying cgroups");
goto try_lxc_rm_rf;
}
if (conf->cgroup_meta.monitor_pivot_dir)
pivot_path = must_make_path(conf->cgroup_meta.monitor_pivot_dir, CGROUP_PIVOT, NULL);
else if (conf->cgroup_meta.monitor_dir)
pivot_path = must_make_path(conf->cgroup_meta.monitor_dir, CGROUP_PIVOT, NULL);
else if (conf->cgroup_meta.dir)
pivot_path = must_make_path(conf->cgroup_meta.dir, CGROUP_PIVOT, NULL);
else
pivot_path = must_make_path(CGROUP_PIVOT, NULL);
cpuset_v1 = !is_unified_hierarchy(h) && string_in_list(h->controllers, "cpuset");
fd_pivot = __cgroup_tree_create(h->dfd_base, pivot_path, 0755, cpuset_v1, true);
if (fd_pivot < 0) {
SYSWARN("Failed to create pivot cgroup %d(%s)", h->dfd_base, pivot_path);
continue;
}
ret = lxc_writeat(fd_pivot, "cgroup.procs", pidstr, len);
if (ret != 0) {
SYSWARN("Failed to move monitor %s to \"%s\"", pidstr, pivot_path);
continue;
}
try_lxc_rm_rf:
ret = lxc_rm_rf(h->monitor_full_path);
if (ret < 0)
WARN("Failed to destroy \"%s\"", h->monitor_full_path);
}
}
/*
* Check we have no lxc.cgroup.dir, and that lxc.cgroup.dir.limit_prefix is a
* proper prefix directory of lxc.cgroup.dir.payload.
......@@ -1332,7 +1352,7 @@ __cgfsng_ops static bool cgfsng_monitor_create(struct cgroup_ops *ops, struct lx
monitor_cgroup, false, NULL))
continue;
DEBUG("Failed to create cgroup \"%s\"", ops->hierarchies[i]->monitor_full_path ?: "(null)");
DEBUG("Failed to create cgroup \"%s\"", maybe_empty(ops->hierarchies[i]->monitor_full_path));
for (int j = 0; j < i; j++)
cgroup_tree_leaf_remove(ops->hierarchies[j], false);
......@@ -3251,32 +3271,6 @@ __cgfsng_ops static bool cgfsng_payload_delegate_controllers(struct cgroup_ops *
return __cgfsng_delegate_controllers(ops, ops->container_cgroup);
}
static bool cgroup_use_wants_controllers(const struct cgroup_ops *ops,
char **controllers)
{
if (!ops->cgroup_use)
return true;
for (char **cur_ctrl = controllers; cur_ctrl && *cur_ctrl; cur_ctrl++) {
bool found = false;
for (char **cur_use = ops->cgroup_use; cur_use && *cur_use; cur_use++) {
if (!strequal(*cur_use, *cur_ctrl))
continue;
found = true;
break;
}
if (found)
continue;
return false;
}
return true;
}
static void cg_unified_delegate(char ***delegate)
{
__do_free char *buf = NULL;
......@@ -3343,7 +3337,6 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
__do_free_string_list char **controller_list = NULL;
int type;
bool writeable;
struct hierarchy *new;
type = get_cgroup_version(line);
if (type == 0)
......@@ -3382,16 +3375,14 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
}
if (type == CGROUP_SUPER_MAGIC)
base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, controller_list[0], CGROUP_SUPER_MAGIC);
base_cgroup = cg_hybrid_get_current_cgroup(relative, basecginfo, controller_list[0], CGROUP_SUPER_MAGIC);
else
base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC);
base_cgroup = cg_hybrid_get_current_cgroup(relative, basecginfo, NULL, CGROUP2_SUPER_MAGIC);
if (!base_cgroup) {
WARN("Failed to find current cgroup");
continue;
}
trim(base_cgroup);
prune_init_scope(base_cgroup);
if (type == CGROUP2_SUPER_MAGIC)
writeable = test_writeable_v2(mountpoint, base_cgroup);
else
......@@ -3401,41 +3392,16 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
continue;
}
if (type == CGROUP2_SUPER_MAGIC) {
char *cgv2_ctrl_path;
cgv2_ctrl_path = must_make_path(mountpoint, base_cgroup,
"cgroup.controllers",
NULL);
controller_list = cg_unified_get_controllers(-EBADF, cgv2_ctrl_path);
free(cgv2_ctrl_path);
if (!controller_list) {
controller_list = cg_unified_make_empty_controller();
TRACE("No controllers are enabled for "
"delegation in the unified hierarchy");
}
}
/* Exclude all controllers that cgroup use does not want. */
if (!cgroup_use_wants_controllers(ops, controller_list)) {
TRACE("Skipping controller");
continue;
}
new = add_hierarchy(&ops->hierarchies, move_ptr(controller_list), move_ptr(mountpoint), move_ptr(base_cgroup), type);
if (!new)
return log_error_errno(-1, errno, "Failed to add cgroup hierarchy");
if (type == CGROUP2_SUPER_MAGIC && !ops->unified) {
if (unprivileged)
cg_unified_delegate(&new->cgroup2_chown);
ops->unified = new;
}
if (type == CGROUP2_SUPER_MAGIC)
ret = add_hierarchy(ops, NULL, move_ptr(mountpoint), move_ptr(base_cgroup), type);
else
ret = add_hierarchy(ops, move_ptr(controller_list), move_ptr(mountpoint), move_ptr(base_cgroup), type);
if (ret)
return syserrno(ret, "Failed to add cgroup hierarchy");
if (ops->unified && unprivileged)
cg_unified_delegate(&(ops->unified)->cgroup2_chown);
}
TRACE("Writable cgroup hierarchies:");
lxc_cgfsng_print_hierarchies(ops);
/* verify that all controllers in cgroup.use and all crucial
* controllers are accounted for
*/
......@@ -3448,8 +3414,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
static char *cg_unified_get_current_cgroup(bool relative)
{
__do_free char *basecginfo = NULL;
char *copy;
__do_free char *basecginfo = NULL, *copy = NULL;
char *base_cgroup;
if (!relative && (geteuid() == 0))
......@@ -3467,48 +3432,32 @@ static char *cg_unified_get_current_cgroup(bool relative)
copy = copy_to_eol(base_cgroup);
if (!copy)
return NULL;
trim(copy);
if (!relative) {
base_cgroup = prune_init_scope(copy);
if (!base_cgroup)
return NULL;
} else {
base_cgroup = copy;
}
if (abspath(base_cgroup))
base_cgroup = deabs(base_cgroup);
return trim(copy);
/* We're allowing base_cgroup to be "". */
return strdup(base_cgroup);
}
static int cg_unified_init(struct cgroup_ops *ops, bool relative,
bool unprivileged)
{
__do_close int cgroup_root_fd = -EBADF;
__do_free char *base_cgroup = NULL, *controllers_path = NULL;
__do_free_string_list char **delegatable = NULL;
__do_free struct hierarchy *new = NULL;
__do_free char *base_cgroup = NULL;
int ret;
ret = unified_cgroup_hierarchy();
if (ret == -ENOMEDIUM)
return ret_errno(ENOMEDIUM);
if (ret != CGROUP2_SUPER_MAGIC)
return 0;
base_cgroup = cg_unified_get_current_cgroup(relative);
if (!base_cgroup)
return ret_errno(EINVAL);
if (!relative)
prune_init_scope(base_cgroup);
cgroup_root_fd = openat(-EBADF, DEFAULT_CGROUP_MOUNTPOINT,
O_NOCTTY | O_CLOEXEC | O_NOFOLLOW | O_DIRECTORY);
if (cgroup_root_fd < 0)
return -errno;
/*
* We assume that the cgroup we're currently in has been delegated to
* us and we are free to further delege all of the controllers listed
* in cgroup.controllers further down the hierarchy.
*/
controllers_path = must_make_path_relative(base_cgroup, "cgroup.controllers", NULL);
delegatable = cg_unified_get_controllers(cgroup_root_fd, controllers_path);
if (!delegatable)
delegatable = cg_unified_make_empty_controller();
if (!delegatable[0])
TRACE("No controllers are enabled for delegation");
/* TODO: If the user requested specific controllers via lxc.cgroup.use
* we should verify here. The reason I'm not doing it right is that I'm
......@@ -3517,31 +3466,41 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative,
* controllers per container.
*/
new = add_hierarchy(&ops->hierarchies,
move_ptr(delegatable),
ret = add_hierarchy(ops, NULL,
must_copy_string(DEFAULT_CGROUP_MOUNTPOINT),
move_ptr(base_cgroup),
CGROUP2_SUPER_MAGIC);
if (!new)
return log_error_errno(-1, errno, "Failed to add unified cgroup hierarchy");
move_ptr(base_cgroup), CGROUP2_SUPER_MAGIC);
if (ret)
return syserrno(ret, "Failed to add unified cgroup hierarchy");
if (unprivileged)
cg_unified_delegate(&new->cgroup2_chown);
cg_unified_delegate(&(ops->unified)->cgroup2_chown);
if (bpf_devices_cgroup_supported())
new->bpf_device_controller = 1;
ops->unified->bpf_device_controller = 1;
ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
ops->unified = move_ptr(new);
return CGROUP2_SUPER_MAGIC;
}
static int cg_init(struct cgroup_ops *ops, struct lxc_conf *conf)
static int __cgroup_init(struct cgroup_ops *ops, struct lxc_conf *conf)
{
__do_close int dfd = -EBADF;
bool relative = conf->cgroup_meta.relative;
int ret;
const char *tmp;
bool relative = conf->cgroup_meta.relative;
if (ops->dfd_mnt_cgroupfs_host >= 0)
return ret_errno(EINVAL);
/*
* I don't see the need for allowing symlinks here. If users want to
* have their hierarchy available in different locations I strongly
* suggest bind-mounts.
*/
dfd = open_at(-EBADF, DEFAULT_CGROUP_MOUNTPOINT,
PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_ABSOLUTE_XDEV, 0);
if (dfd < 0)
return syserrno(-errno, "Failed to open " DEFAULT_CGROUP_MOUNTPOINT);
tmp = lxc_global_config_value("lxc.cgroup.use");
if (tmp) {
......@@ -3555,14 +3514,23 @@ static int cg_init(struct cgroup_ops *ops, struct lxc_conf *conf)
must_append_string(&ops->cgroup_use, cur);
}
ret = cg_unified_init(ops, relative, !lxc_list_empty(&conf->id_map));
if (ret < 0)
return -1;
/*
* Keep dfd referenced by the cleanup function and actually move the fd
* once we know the initialization succeeded. So if we fail we clean up
* the dfd.
*/
ops->dfd_mnt_cgroupfs_host = dfd;
if (ret == CGROUP2_SUPER_MAGIC)
return 0;
if (unified_cgroup_fd(dfd))
ret = cg_unified_init(ops, relative, !lxc_list_empty(&conf->id_map));
else
ret = cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map));
if (ret < 0)
return syserrno(ret, "Failed to initialize cgroups");
return cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map));
/* Transfer ownership to cgroup_ops. */
move_fd(dfd);
return 0;
}
__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops)
......@@ -3589,8 +3557,9 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
return ret_set_errno(NULL, ENOMEM);
cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN;
cgfsng_ops->dfd_mnt_cgroupfs_host = -EBADF;
if (cg_init(cgfsng_ops, conf))
if (__cgroup_init(cgfsng_ops, conf))
return NULL;
cgfsng_ops->data_init = cgfsng_data_init;
......
......@@ -33,10 +33,14 @@ struct cgroup_ops *cgroup_init(struct lxc_conf *conf)
if (!cgroup_ops)
return log_error_errno(NULL, errno, "Failed to initialize cgroup driver");
if (!cgroup_ops->hierarchies) {
cgroup_exit(cgroup_ops);
return log_error_errno(NULL, ENOENT, "No cgroup hierarchies found");
}
if (cgroup_ops->data_init(cgroup_ops)) {
cgroup_exit(cgroup_ops);
return log_error_errno(NULL, errno,
"Failed to initialize cgroup data");
return log_error_errno(NULL, errno, "Failed to initialize cgroup data");
}
TRACE("Initialized cgroup driver %s", cgroup_ops->driver);
......@@ -68,6 +72,9 @@ void cgroup_exit(struct cgroup_ops *ops)
if (ops->cgroup2_devices)
bpf_program_free(ops->cgroup2_devices);
if (ops->dfd_mnt_cgroupfs_host >= 0)
close(ops->dfd_mnt_cgroupfs_host);
for (struct hierarchy **it = ops->hierarchies; it && *it; it++) {
for (char **p = (*it)->controllers; p && *p; p++)
free(*p);
......@@ -79,12 +86,34 @@ void cgroup_exit(struct cgroup_ops *ops)
free((*it)->mountpoint);
free((*it)->container_base_path);
free((*it)->container_full_path);
free((*it)->monitor_full_path);
if ((*it)->cgfd_con >= 0)
close((*it)->cgfd_con);
{
free((*it)->container_full_path);
if ((*it)->container_full_path != (*it)->container_limit_path)
free((*it)->monitor_full_path);
}
{
if ((*it)->cgfd_limit >= 0 && (*it)->cgfd_con != (*it)->cgfd_limit)
close((*it)->cgfd_limit);
if ((*it)->cgfd_con >= 0)
close((*it)->cgfd_con);
}
if ((*it)->cgfd_mon >= 0)
close((*it)->cgfd_mon);
{
if ((*it)->dfd_base >= 0 && (*it)->dfd_mnt != (*it)->dfd_base)
close((*it)->dfd_base);
if ((*it)->dfd_mnt >= 0)
close((*it)->dfd_mnt);
}
free(*it);
}
free(ops->hierarchies);
......@@ -95,21 +124,13 @@ void cgroup_exit(struct cgroup_ops *ops)
}
#define INIT_SCOPE "/init.scope"
void prune_init_scope(char *cg)
char *prune_init_scope(char *cg)
{
char *point;
if (is_empty_string(cg))
return NULL;
if (!cg)
return;
point = cg + strlen(cg) - strlen(INIT_SCOPE);
if (point < cg)
return;
if (strnequal(cg, INIT_SCOPE, STRLITERALLEN(INIT_SCOPE)))
return cg + STRLITERALLEN(INIT_SCOPE);
if (strequal(point, INIT_SCOPE)) {
if (point == cg)
*(point + 1) = '\0';
else
*point = '\0';
}
return cg;
}
......@@ -91,12 +91,24 @@ struct hierarchy {
unsigned int bpf_device_controller:1;
unsigned int freezer_controller:1;
/* container cgroup fd */
/* File descriptor for the container's cgroup @container_full_path. */
int cgfd_con;
/* limiting cgroup fd (may be equal to cgfd_con if not separated) */
/*
* File descriptor for the container's limiting cgroup
* @container_limit_path.
* Will be equal to @cgfd_con if no limiting cgroup has been requested.
*/
int cgfd_limit;
/* monitor cgroup fd */
/* File descriptor for the monitor's cgroup @monitor_full_path. */
int cgfd_mon;
/* File descriptor for the controller's mountpoint @mountpoint. */
int dfd_mnt;
/* File descriptor for the controller's base cgroup path @container_base_path. */
int dfd_base;
};
struct cgroup_ops {
......@@ -106,6 +118,18 @@ struct cgroup_ops {
/* string constant */
const char *version;
/*
* File descriptor for the host's cgroupfs mount. On
* CGROUP_LAYOUT_LEGACY or CGROUP_LAYOUT_HYBRID hybrid systems
* @dfd_mnt_cgroupfs_host will be a tmpfs fd and the individual
* controllers will be cgroupfs fds. On CGROUP_LAYOUT_UNIFIED it will
* be a cgroupfs fd itself.
*
* So for CGROUP_LAYOUT_LEGACY or CGROUP_LAYOUT_HYBRID we allow
* mountpoint crossing iff we cross from a tmpfs into a cgroupfs mount.
* */
int dfd_mnt_cgroupfs_host;
/* What controllers is the container supposed to use. */
char **cgroup_use;
char *cgroup_pattern;
......@@ -186,7 +210,7 @@ __hidden extern struct cgroup_ops *cgroup_init(struct lxc_conf *conf);
__hidden extern void cgroup_exit(struct cgroup_ops *ops);
define_cleanup_function(struct cgroup_ops *, cgroup_exit);
__hidden extern void prune_init_scope(char *cg);
__hidden extern char *prune_init_scope(char *cg);
__hidden extern int cgroup_attach(const struct lxc_conf *conf, const char *name,
const char *lxcpath, pid_t pid);
......
......@@ -83,22 +83,6 @@ bool test_writeable_v2(char *mountpoint, char *path)
return (access(cgroup_threads_file, W_OK) == 0);
}
int unified_cgroup_hierarchy(void)
{
int ret;
struct statfs fs;
ret = statfs(DEFAULT_CGROUP_MOUNTPOINT, &fs);
if (ret < 0)
return -ENOMEDIUM;
if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC))
return CGROUP2_SUPER_MAGIC;
return 0;
}
int unified_cgroup_fd(int fd)
{
......
......@@ -29,8 +29,6 @@ __hidden extern bool test_writeable_v1(char *mountpoint, char *path);
*/
__hidden extern bool test_writeable_v2(char *mountpoint, char *path);
__hidden extern int unified_cgroup_hierarchy(void);
__hidden extern int unified_cgroup_fd(int fd);
static inline bool cgns_supported(void)
......
......@@ -31,15 +31,15 @@ int lxc_readat(int dirfd, const char *filename, void *buf, size_t count)
__do_close int fd = -EBADF;
ssize_t ret;
fd = openat(dirfd, filename, O_RDONLY | O_CLOEXEC);
fd = open_at(dirfd, filename, PROTECT_OPEN, PROTECT_LOOKUP_BENEATH, 0);
if (fd < 0)
return -1;
return -errno;
ret = lxc_read_nointr(fd, buf, count);
if (ret < 0 || (size_t)ret != count)
return -1;
if (ret < 0)
return -errno;
return 0;
return ret;
}
int lxc_writeat(int dirfd, const char *filename, const void *buf, size_t count)
......@@ -630,21 +630,31 @@ int timens_offset_write(clockid_t clk_id, int64_t s_offset, int64_t ns_offset)
bool exists_dir_at(int dir_fd, const char *path)
{
struct stat sb;
int ret;
struct stat sb;
ret = fstatat(dir_fd, path, &sb, 0);
if (ret < 0)
return false;
return S_ISDIR(sb.st_mode);
ret = S_ISDIR(sb.st_mode);
if (ret)
errno = EEXIST;
else
errno = ENOTDIR;
return ret;
}
bool exists_file_at(int dir_fd, const char *path)
{
int ret;
struct stat sb;
return fstatat(dir_fd, path, &sb, 0) == 0;
ret = fstatat(dir_fd, path, &sb, 0);
if (ret == 0)
errno = EEXIST;
return ret == 0;
}
int open_at(int dfd, const char *path, unsigned int o_flags,
......
......@@ -501,6 +501,20 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \
__internal_ret__; \
})
#define syswarn(__ret__, format, ...) \
({ \
typeof(__ret__) __internal_ret__ = (__ret__); \
SYSWARN(format, ##__VA_ARGS__); \
__internal_ret__; \
})
#define sysdebug(__ret__, format, ...) \
({ \
typeof(__ret__) __internal_ret__ = (__ret__); \
SYSDEBUG(format, ##__VA_ARGS__); \
__internal_ret__; \
})
#define syserrno_set(__ret__, format, ...) \
({ \
typeof(__ret__) __internal_ret__ = (__ret__); \
......
......@@ -813,6 +813,8 @@ char *must_make_path(const char *first, ...)
va_start(args, first);
while ((cur = va_arg(args, char *)) != NULL) {
buf_len = strlen(cur);
if (buf_len == 0)
continue;
full_len += buf_len;
if (cur[0] != '/')
......
......@@ -150,6 +150,11 @@ static inline bool abspath(const char *str)
return *str == '/';
}
static inline char *deabs(char *str)
{
return str + strspn(str, "/");
}
#define strnprintf(buf, buf_size, ...) \
({ \
int __ret_strnprintf; \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment