Commit 33ad9f1a by Christian Seiler Committed by Stéphane Graber

cgroup: Major rewrite of cgroup logic

This patch rewrites most of the cgroup logic. It creates a set of data structures to store the kernel state of the cgroup hierarchies and their mountpoints. Mainly, everything is now grouped with respect to the hierarchies of the system. Multiple controllers may be mounted together or separately to different hierarchies, the data structures reflect this. Each hierarchy may have multiple mount points (that were created previously using the bind mount method) and each of these mount points may point to a different prefix inside the cgroup tree. The current code does not make any assumptions regarding the mount points, it just parses /proc/self/mountinfo to acquire the relevant information. The only requirement is that the current cgroup of either init (if cgroup.pattern starts with '/' and the tools are executed as root) or the current process (otherwise) are accessible. The root cgroup need not be accessible. The configuration option cgroup.pattern is introduced. For root-executed containers, it specifies which format the cgroups should be in. Example values may include '/lxc/%n', 'lxc/%n', '%n' or '/machine/%n.lxc'. Any occurrence of '%n' is replaced with the name of the container (and if clashes occur in any hierarchy, -1, -2, etc. are appended globally). If the pattern starts with /, new containers' cgroups will be located relative to init's cgroup; if it doesn't, they will be located relative to the current process's cgroup. Some changes to the cgroup.h API have been done to make it more consistent, both with respect to naming and with respect to the parameters. This causes some changes in other parts of the code that are included in the patch. There has been some testing of this functionality, but there are probably still quite a few bugs in there, especially for people with different configurations. Signed-off-by: 's avatarChristian Seiler <christian@iwakd.de> Signed-off-by: 's avatarSerge Hallyn <serge.hallyn@ubuntu.com>
parent bfa3f007
...@@ -727,7 +727,24 @@ int lxc_attach(const char* name, const char* lxcpath, lxc_attach_exec_t exec_fun ...@@ -727,7 +727,24 @@ int lxc_attach(const char* name, const char* lxcpath, lxc_attach_exec_t exec_fun
/* attach to cgroup, if requested */ /* attach to cgroup, if requested */
if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) { if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
ret = lxc_cgroup_attach(attached_pid, name, lxcpath); struct cgroup_meta_data *meta_data;
struct cgroup_process_info *container_info;
meta_data = lxc_cgroup_load_meta();
if (!meta_data) {
ERROR("could not move attached process %ld to cgroup of container", (long)attached_pid);
goto cleanup_error;
}
container_info = lxc_cgroup_get_container_info(name, lxcpath, meta_data);
lxc_cgroup_put_meta(meta_data);
if (!container_info) {
ERROR("could not move attached process %ld to cgroup of container", (long)attached_pid);
goto cleanup_error;
}
ret = lxc_cgroup_enter(container_info, attached_pid, false);
lxc_cgroup_process_info_free(container_info);
if (ret < 0) { if (ret < 0) {
ERROR("could not move attached process %ld to cgroup of container", (long)attached_pid); ERROR("could not move attached process %ld to cgroup of container", (long)attached_pid);
goto cleanup_error; goto cleanup_error;
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include "commands.h" #include "commands.h"
#include "list.h" #include "list.h"
#include "conf.h" #include "conf.h"
#include "utils.h"
#include <lxc/log.h> #include <lxc/log.h>
#include <lxc/cgroup.h> #include <lxc/cgroup.h>
...@@ -61,1245 +62,1365 @@ ...@@ -61,1245 +62,1365 @@
lxc_log_define(lxc_cgroup, lxc); lxc_log_define(lxc_cgroup, lxc);
#define MTAB "/proc/mounts" static struct cgroup_process_info *lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str, struct cgroup_meta_data *meta);
static char **subsystems_from_mount_options(const char *mount_options, char **kernel_list);
/* In the case of a bind mount, there could be two long pathnames in the static void lxc_cgroup_mount_point_free(struct cgroup_mount_point *mp);
* mntent plus options so use large enough buffer size static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy *h);
*/ static bool is_valid_cgroup(const char *name);
#define LARGE_MAXPATHLEN 4 * MAXPATHLEN static int create_or_remove_cgroup(bool remove, struct cgroup_mount_point *mp, const char *path);
static int create_cgroup(struct cgroup_mount_point *mp, const char *path);
/* Check if a mount is a cgroup hierarchy for any subsystem. static int remove_cgroup(struct cgroup_mount_point *mp, const char *path);
* Return the first subsystem found (or NULL if none). static char *cgroup_to_absolute_path(struct cgroup_mount_point *mp, const char *path, const char *suffix);
*/ static struct cgroup_process_info *find_info_for_subsystem(struct cgroup_process_info *info, const char *subsystem);
static char *mount_has_subsystem(const struct mntent *mntent) static int do_cgroup_get(const char *cgroup_path, const char *sub_filename, char *value, size_t len);
static int do_cgroup_set(const char *cgroup_path, const char *sub_filename, const char *value);
static bool cgroup_devices_has_allow_or_deny(struct lxc_handler *h, char *v, bool for_allow);
static int do_setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroup_settings, bool do_devices);
static int cgroup_recursive_task_count(const char *cgroup_path);
static int count_lines(const char *fn);
static int handle_clone_children(struct cgroup_mount_point *mp, char *cgroup_path);
struct cgroup_meta_data *lxc_cgroup_load_meta()
{ {
FILE *f; const char *cgroup_use = NULL;
char *c, *ret = NULL; char **cgroup_use_list = NULL;
char line[MAXPATHLEN]; struct cgroup_meta_data *md = NULL;
int saved_errno;
/* read the list of subsystems from the kernel */
f = fopen("/proc/cgroups", "r"); errno = 0;
if (!f) cgroup_use = lxc_global_config_value("cgroup.use");
return 0; if (!cgroup_use && errno != 0)
return NULL;
/* skip the first line, which contains column headings */ if (cgroup_use) {
if (!fgets(line, MAXPATHLEN, f)) { cgroup_use_list = lxc_string_split_and_trim(cgroup_use, ',');
fclose(f); if (!cgroup_use_list)
return 0; return NULL;
}
while (fgets(line, MAXPATHLEN, f)) {
c = strchr(line, '\t');
if (!c)
continue;
*c = '\0';
ret = hasmntopt(mntent, line);
if (ret)
break;
} }
fclose(f); md = lxc_cgroup_load_meta2((const char **)cgroup_use_list);
return ret; saved_errno = errno;
lxc_free_array((void **)cgroup_use_list, free);
errno = saved_errno;
return md;
} }
/* struct cgroup_meta_data *lxc_cgroup_load_meta2(const char **subsystem_whitelist)
* Determine mountpoint for a cgroup subsystem. {
* @dest: a passed-in buffer of at least size MAXPATHLEN into which the path FILE *proc_cgroups = NULL;
* is copied. FILE *proc_self_cgroup = NULL;
* @subsystem: cgroup subsystem (i.e. freezer) FILE *proc_self_mountinfo = NULL;
bool all_kernel_subsystems = true;
bool all_named_subsystems = false;
struct cgroup_meta_data *meta_data = NULL;
char **kernel_subsystems = NULL;
size_t kernel_subsystems_count = 0;
size_t kernel_subsystems_capacity = 0;
size_t hierarchy_capacity = 0;
size_t mount_point_capacity = 0;
size_t mount_point_count = 0;
char **tokens = NULL;
size_t token_capacity = 0;
char *line = NULL;
size_t sz = 0;
int r, saved_errno = 0;
/* if the subsystem whitelist is not specified, include all
* hierarchies that contain kernel subsystems by default but
* no hierarchies that only contain named subsystems
* *
* Returns true on success, false on error. * if it is specified, the specifier @all will select all
* hierarchies, @kernel will select all hierarchies with
* kernel subsystems and @named will select all named
* hierarchies
*/ */
bool get_subsys_mount(char *dest, const char *subsystem) all_kernel_subsystems = subsystem_whitelist ?
{ (lxc_string_in_array("@kernel", subsystem_whitelist) || lxc_string_in_array("@all", subsystem_whitelist)) :
struct mntent mntent_r; true;
FILE *file = NULL; all_named_subsystems = subsystem_whitelist ?
int ret; (lxc_string_in_array("@named", subsystem_whitelist) || lxc_string_in_array("@all", subsystem_whitelist)) :
bool retv = false; false;
char buf[LARGE_MAXPATHLEN] = {0};
meta_data = calloc(1, sizeof(struct cgroup_meta_data));
if (!meta_data)
return NULL;
meta_data->ref = 1;
file = setmntent(MTAB, "r"); /* Step 1: determine all kernel subsystems */
if (!file) { proc_cgroups = fopen_cloexec("/proc/cgroups", "r");
SYSERROR("failed to open %s", MTAB); if (!proc_cgroups)
return -1; goto out_error;
}
while (getline(&line, &sz, proc_cgroups) != -1) {
char *tab1;
char *tab2;
int hierarchy_number;
while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) { if (line[0] == '#')
if (strcmp(mntent_r.mnt_type, "cgroup")) continue;
if (!line[0])
continue; continue;
if (subsystem) { tab1 = strchr(line, '\t');
if (!hasmntopt(&mntent_r, subsystem)) if (!tab1)
continue; continue;
} else { *tab1++ = '\0';
if (!mount_has_subsystem(&mntent_r)) tab2 = strchr(tab1, '\t');
if (!tab2)
continue; continue;
} *tab2 = '\0';
ret = snprintf(dest, MAXPATHLEN, "%s", mntent_r.mnt_dir); tab2 = NULL;
if (ret < 0 || ret >= MAXPATHLEN) hierarchy_number = strtoul(tab1, &tab2, 10);
goto fail; if (!tab2 || *tab2)
continue;
(void)hierarchy_number;
retv = true; r = lxc_grow_array((void ***)&kernel_subsystems, &kernel_subsystems_capacity, kernel_subsystems_count + 1, 12);
goto out; if (r < 0)
}; goto out_error;
kernel_subsystems[kernel_subsystems_count] = strdup(line);
if (!kernel_subsystems[kernel_subsystems_count])
goto out_error;
kernel_subsystems_count++;
}
fail: fclose(proc_cgroups);
DEBUG("Failed to find cgroup for %s\n", proc_cgroups = NULL;
subsystem ? subsystem : "(NULL)");
out:
endmntent(file);
return retv;
}
/* /* Step 2: determine all hierarchies (by reading /proc/self/cgroup),
* is_in_cgroup: check whether pid is found in the passed-in cgroup tasks * since mount points don't specify hierarchy number and
* file. * /proc/cgroups does not contain named hierarchies
* @path: in full path to a cgroup tasks file
* Note that in most cases the file will simply not exist, which is ok - it
* just means that's not our cgroup.
*/ */
static bool is_in_cgroup(pid_t pid, char *path) proc_self_cgroup = fopen_cloexec("/proc/self/cgroup", "r");
{ /* if for some reason (because of setns() and pid namespace for example),
int cmppid; * /proc/self is not valid, we try /proc/1/cgroup... */
FILE *f = fopen(path, "r"); if (!proc_self_cgroup)
char *line = NULL; proc_self_cgroup = fopen_cloexec("/proc/1/cgroup", "r");
size_t sz = 0; if (!proc_self_cgroup)
goto out_error;
while (getline(&line, &sz, proc_self_cgroup) != -1) {
/* file format: hierarchy:subsystems:group,
* we only extract hierarchy and subsystems
* here */
char *colon1;
char *colon2;
int hierarchy_number;
struct cgroup_hierarchy *h = NULL;
char **p;
if (!line[0])
continue;
if (!f) colon1 = strchr(line, ':');
return false; if (!colon1)
while (getline(&line, &sz, f) != -1) { continue;
if (sscanf(line, "%d", &cmppid) == 1 && cmppid == pid) { *colon1++ = '\0';
fclose(f); colon2 = strchr(colon1, ':');
free(line); if (!colon2)
return true; continue;
} *colon2 = '\0';
}
fclose(f);
if (line)
free(line);
return false;
}
/* colon2 = NULL;
* lxc_cgroup_path_get: Get the absolute pathname for a cgroup hierarchy_number = strtoul(line, &colon2, 10);
* file for a running container. if (!colon2 || *colon2)
* continue;
* @subsystem : subsystem of interest (e.g. "freezer"). If NULL, then
* the first cgroup entry in mtab will be used. if (hierarchy_number > meta_data->maximum_hierarchy) {
* @name : name of container to connect to /* lxc_grow_array will never shrink, so even if we find a lower
* @lxcpath : the lxcpath in which the container is running * hierarchy number here, the array will never be smaller
*
* This is the exported function, which determines cgpath from the
* lxc-start of the @name container running in @lxcpath.
*
* Returns path on success, NULL on error. The caller must free()
* the returned path.
*/ */
char *lxc_cgroup_path_get(const char *subsystem, const char *name, r = lxc_grow_array((void ***)&meta_data->hierarchies, &hierarchy_capacity, hierarchy_number + 1, 12);
const char *lxcpath) if (r < 0)
{ goto out_error;
char *cgpath, *cgp, path[MAXPATHLEN], *pathp, *p;
pid_t initpid = lxc_cmd_get_init_pid(name, lxcpath);
int ret;
if (initpid < 0) meta_data->maximum_hierarchy = hierarchy_number;
return NULL; }
cgpath = lxc_cmd_get_cgroup_path(name, lxcpath, subsystem); /* this shouldn't happen, we had this already */
if (!cgpath) if (meta_data->hierarchies[hierarchy_number])
return NULL; goto out_error;
if (!get_subsys_mount(path, subsystem)) h = calloc(1, sizeof(struct cgroup_hierarchy));
return NULL; if (!h)
goto out_error;
pathp = path + strlen(path); meta_data->hierarchies[hierarchy_number] = h;
/*
* find a mntpt where i have the subsystem mounted, then find h->index = hierarchy_number;
* a subset cgpath under that which has pid in it. h->subsystems = lxc_string_split_and_trim(colon1, ',');
* if (!h->subsystems)
* If d->mntpt is '/a/b/c/d', and the mountpoint is /x/y/z, goto out_error;
* then look for ourselves in: /* see if this hierarchy should be considered */
* /x/y/z/a/b/c/d/tasks if (!all_kernel_subsystems || !all_named_subsystems) {
* /x/y/z/b/c/d/tasks for (p = h->subsystems; *p; p++) {
* /x/y/z/c/d/tasks if (!strncmp(*p, "name=", 5)) {
* /x/y/z/d/tasks if (all_named_subsystems || (subsystem_whitelist && lxc_string_in_array(*p, subsystem_whitelist))) {
* /x/y/z/tasks h->used = true;
*/
cgp = cgpath;
while (cgp[0]) {
ret = snprintf(pathp, MAXPATHLEN - (pathp - path), "%s/tasks", cgp);
if (ret < 0 || ret >= MAXPATHLEN)
return NULL;
if (!is_in_cgroup(initpid, path)) {
// does not exist, try the next one
cgp = index(cgp+1, '/');
if (!cgp)
break; break;
continue;
} }
} else {
if (all_kernel_subsystems || (subsystem_whitelist && lxc_string_in_array(*p, subsystem_whitelist))) {
h->used = true;
break; break;
} }
if (!cgp || !*cgp) {
// try just the path
ret = snprintf(pathp, MAXPATHLEN - (pathp - path), "/tasks");
if (ret < 0 || ret >= MAXPATHLEN)
return NULL;
if (!is_in_cgroup(initpid, path)) {
return NULL;
} }
return strdup("/");
} }
// path still has 'tasks' on the end, drop it } else {
if ((p = strrchr(path, '/')) != NULL) /* we want all hierarchy anyway */
*p = '\0'; h->used = true;
return strdup(path); }
} }
fclose(proc_self_cgroup);
proc_self_cgroup = NULL;
/* Step 3: determine all mount points of each hierarchy */
proc_self_mountinfo = fopen_cloexec("/proc/self/mountinfo", "r");
/* if for some reason (because of setns() and pid namespace for example),
* /proc/self is not valid, we try /proc/1/cgroup... */
if (!proc_self_mountinfo)
proc_self_mountinfo = fopen_cloexec("/proc/1/mountinfo", "r");
if (!proc_self_mountinfo)
goto out_error;
while (getline(&line, &sz, proc_self_mountinfo) != -1) {
char *token, *saveptr = NULL;
size_t i, j, k;
struct cgroup_mount_point *mount_point;
struct cgroup_hierarchy *h;
char **subsystems;
if (line[0] && line[strlen(line) - 1] == '\n')
line[strlen(line) - 1] = '\0';
for (i = 0; (token = strtok_r(line, " ", &saveptr)); line = NULL) {
r = lxc_grow_array((void ***)&tokens, &token_capacity, i + 1, 64);
if (r < 0)
goto out_error;
tokens[i++] = token;
}
/* layout of /proc/self/mountinfo:
* 0: id
* 1: parent id
* 2: device major:minor
* 3: mount prefix
* 4: mount point
* 5: per-mount options
* [optional X]: additional data
* X+7: "-"
* X+8: type
* X+9: source
* X+10: per-superblock options
*/
for (j = 6; j < i && tokens[j]; j++)
if (!strcmp(tokens[j], "-"))
break;
/* /* could not find separator */
* do_cgroup_set: Write a value into a cgroup file if (j >= i || !tokens[j])
* continue;
* @path : absolute path to cgroup file /* there should be exactly three fields after
* @value : value to write into file * the separator
*
* Returns 0 on success, < 0 on error.
*/ */
static int do_cgroup_set(const char *path, const char *value) if (i != j + 4)
{ continue;
int fd, ret;
if ((fd = open(path, O_WRONLY)) < 0) { /* not a cgroup filesystem */
SYSERROR("open %s : %s", path, strerror(errno)); if (strcmp(tokens[j + 1], "cgroup") != 0)
return -1; continue;
subsystems = subsystems_from_mount_options(tokens[j + 3], kernel_subsystems);
if (!subsystems)
goto out_error;
h = NULL;
for (k = 1; k <= meta_data->maximum_hierarchy; k++) {
if (meta_data->hierarchies[k] &&
meta_data->hierarchies[k]->subsystems[0] &&
lxc_string_in_array(meta_data->hierarchies[k]->subsystems[0], (const char **)subsystems)) {
/* TODO: we could also check if the lists really match completely,
* just to have an additional sanity check */
h = meta_data->hierarchies[k];
break;
}
} }
lxc_free_array((void **)subsystems, free);
if ((ret = write(fd, value, strlen(value))) < 0) { r = lxc_grow_array((void ***)&meta_data->mount_points, &mount_point_capacity, mount_point_count + 1, 12);
close(fd); if (r < 0)
SYSERROR("write %s : %s", path, strerror(errno)); goto out_error;
return ret;
/* create mount point object */
mount_point = calloc(1, sizeof(*mount_point));
if (!mount_point)
goto out_error;
meta_data->mount_points[mount_point_count++] = mount_point;
mount_point->hierarchy = h;
mount_point->mount_point = strdup(tokens[4]);
mount_point->mount_prefix = strdup(tokens[3]);
if (!mount_point->mount_point || !mount_point->mount_prefix)
goto out_error;
mount_point->read_only = !lxc_string_in_list("rw", tokens[5], ',');
if (!strcmp(mount_point->mount_prefix, "/")) {
if (mount_point->read_only) {
if (!h->ro_absolute_mount_point)
h->ro_absolute_mount_point = mount_point;
} else {
if (!h->rw_absolute_mount_point)
h->rw_absolute_mount_point = mount_point;
}
} }
if ((ret = close(fd)) < 0) { k = lxc_array_len((void **)h->all_mount_points);
SYSERROR("close %s : %s", path, strerror(errno)); r = lxc_grow_array((void ***)&h->all_mount_points, &h->all_mount_point_capacity, k + 1, 4);
return ret; if (r < 0)
goto out_error;
h->all_mount_points[k] = mount_point;
} }
return 0;
}
static int in_subsys_list(const char *s, const char *list) /* oops, we couldn't find anything */
{ if (!meta_data->hierarchies || !meta_data->mount_points) {
char *token, *str, *saveptr = NULL; errno = EINVAL;
goto out_error;
}
if (!list || !s) return meta_data;
return 0;
str = alloca(strlen(list)+1); out_error:
strcpy(str, list); saved_errno = errno;
for (; (token = strtok_r(str, ",", &saveptr)); str = NULL) { if (proc_cgroups)
if (strcmp(s, token) == 0) fclose(proc_cgroups);
return 1; if (proc_self_cgroup)
} fclose(proc_self_cgroup);
if (proc_self_mountinfo)
fclose(proc_self_mountinfo);
free(line);
free(tokens);
lxc_free_array((void **)kernel_subsystems, free);
lxc_cgroup_put_meta(meta_data);
errno = saved_errno;
return NULL;
}
return 0; struct cgroup_meta_data *lxc_cgroup_get_meta(struct cgroup_meta_data *meta_data)
{
meta_data->ref++;
return meta_data;
} }
static char *cgroup_get_subsys_abspath(struct lxc_handler *handler, const char *subsys) struct cgroup_meta_data *lxc_cgroup_put_meta(struct cgroup_meta_data *meta_data)
{ {
struct cgroup_desc *d; size_t i;
if (!meta_data)
return NULL;
if (--meta_data->ref > 0)
return meta_data;
lxc_free_array((void **)meta_data->mount_points, (lxc_free_fn)lxc_cgroup_mount_point_free);
if (meta_data->hierarchies) {
for (i = 0; i <= meta_data->maximum_hierarchy; i++)
lxc_cgroup_hierarchy_free(meta_data->hierarchies[i]);
}
free(meta_data->hierarchies);
return NULL;
}
for (d = handler->cgroup; d; d = d->next) { struct cgroup_hierarchy *lxc_cgroup_find_hierarchy(struct cgroup_meta_data *meta_data, const char *subsystem)
if (in_subsys_list(subsys, d->subsystems)) {
return d->curcgroup; size_t i;
for (i = 0; i <= meta_data->maximum_hierarchy; i++) {
struct cgroup_hierarchy *h = meta_data->hierarchies[i];
if (h && lxc_string_in_array(subsystem, (const char **)h->subsystems))
return h;
} }
return NULL; return NULL;
} }
static bool cgroup_devices_has_deny(struct lxc_handler *h, char *v) struct cgroup_mount_point *lxc_cgroup_find_mount_point(struct cgroup_hierarchy *hierarchy, const char *group, bool should_be_writable)
{ {
char *cgabspath, path[MAXPATHLEN]; struct cgroup_mount_point **mps;
FILE *f; struct cgroup_mount_point *current_result = NULL;
char *line = NULL; ssize_t quality = -1;
size_t len = 0;
bool ret = true;
int r;
// XXX FIXME if users could use something other than 'lxc.devices.deny = a'. /* trivial case */
// not sure they ever do, but they *could* if (hierarchy->rw_absolute_mount_point)
// right now, I'm assuming they do NOT return hierarchy->rw_absolute_mount_point;
if (strcmp(v, "a") && strcmp(v, "a *:* rwm")) if (!should_be_writable && hierarchy->ro_absolute_mount_point)
return false; return hierarchy->ro_absolute_mount_point;
cgabspath = cgroup_get_subsys_abspath(h, "devices");
if (!cgabspath)
return false;
r = snprintf(path, MAXPATHLEN, "%s/devices.list", cgabspath); for (mps = hierarchy->all_mount_points; mps && *mps; mps++) {
if (r < 0 || r >= MAXPATHLEN) { struct cgroup_mount_point *mp = *mps;
ERROR("pathname too long for devices.list"); size_t prefix_len = mp->mount_prefix ? strlen(mp->mount_prefix) : 0;
return false;
}
if (!(f = fopen(path, "r"))) if (prefix_len == 1 && mp->mount_prefix[0] == '/')
return false; prefix_len = 0;
while (getline(&line, &len, f) != -1) { if (should_be_writable && mp->read_only)
size_t len = strlen(line); continue;
if (len > 0 && line[len-1] == '\n')
line[len-1] = '\0'; if (!prefix_len ||
if (strcmp(line, "a *:* rwm") == 0) { (strncmp(group, mp->mount_prefix, prefix_len) == 0 &&
ret = false; (group[prefix_len] == '\0' || group[prefix_len] == '/'))) {
goto out; /* search for the best quality match, i.e. the match with the
* shortest prefix where this group is still contained
*/
if (quality == -1 || prefix_len < quality) {
current_result = mp;
quality = prefix_len;
}
} }
} }
out: if (!current_result)
fclose(f); errno = ENOENT;
if (line) return current_result;
free(line);
return ret;
} }
static bool cgroup_devices_has_allow(struct lxc_handler *h, char *v) char *lxc_cgroup_find_abs_path(const char *subsystem, const char *group, bool should_be_writable, const char *suffix)
{ {
char *cgabspath, path[MAXPATHLEN]; struct cgroup_meta_data *meta_data;
int r; struct cgroup_hierarchy *h;
bool ret = false; struct cgroup_mount_point *mp;
FILE *f; char *result;
char *line = NULL; int saved_errno;
size_t len = 0;
meta_data = lxc_cgroup_load_meta();
if (!meta_data)
return NULL;
cgabspath = cgroup_get_subsys_abspath(h, "devices"); h = lxc_cgroup_find_hierarchy(meta_data, subsystem);
if (!cgabspath) if (!h)
return false; goto out_error;
r = snprintf(path, MAXPATHLEN, "%s/devices.list", cgabspath); mp = lxc_cgroup_find_mount_point(h, group, should_be_writable);
if (r < 0 || r >= MAXPATHLEN) { if (!mp)
ERROR("pathname too long to for devices.list"); goto out_error;
return false;
}
if (!(f = fopen(path, "r"))) result = cgroup_to_absolute_path(mp, group, suffix);
return false; if (!result)
goto out_error;
while (getline(&line, &len, f) != -1) { lxc_cgroup_put_meta(meta_data);
if (len < 1) return result;
goto out;
if (line[len-1] == '\n')
line[len-1] = '\0';
if (strcmp(line, "a *:* rwm") == 0 || strcmp(line, v) == 0) {
ret = true;
goto out;
}
}
out: out_error:
if (line) saved_errno = errno;
free(line); lxc_cgroup_put_meta(meta_data);
fclose(f); errno = saved_errno;
return ret; return NULL;
} }
/* struct cgroup_process_info *lxc_cgroup_process_info_get(pid_t pid, struct cgroup_meta_data *meta)
* lxc_cgroup_set_bypath: Write a value into a cgroup file
*
* @cgrelpath : a container's relative cgroup path (e.g. "lxc/c1")
* @filename : the cgroup file to write (e.g. "freezer.state")
* @value : value to write into file
*
* Returns 0 on success, < 0 on error.
*/
int lxc_cgroup_set_value(struct lxc_handler *handler, const char *filename,
const char *value)
{ {
char *cgabspath, path[MAXPATHLEN], *p; char pid_buf[32];
int ret; snprintf(pid_buf, 32, "/proc/%lu/cgroup", (unsigned long)pid);
return lxc_cgroup_process_info_getx(pid_buf, meta);
ret = snprintf(path, MAXPATHLEN, "%s", filename); }
if (ret < 0 || ret >= MAXPATHLEN)
return -1;
if ((p = index(path, '.')) != NULL)
*p = '\0';
cgabspath = cgroup_get_subsys_abspath(handler, path);
if (!cgabspath)
return -1;
ret = snprintf(path, MAXPATHLEN, "%s/%s", cgabspath, filename);
if (ret < 0 || ret >= MAXPATHLEN) {
ERROR("pathname too long to set cgroup value %s to %s",
filename, value);
return -1;
}
return do_cgroup_set(path, value); struct cgroup_process_info *lxc_cgroup_process_info_get_init(struct cgroup_meta_data *meta)
{
return lxc_cgroup_process_info_get(1, meta);
} }
/* struct cgroup_process_info *lxc_cgroup_process_info_get_self(struct cgroup_meta_data *meta)
* lxc_cgroup_set: Write a value into a cgroup file
*
* @name : name of container to connect to
* @filename : the cgroup file to write (e.g. "freezer.state")
* @value : value to write into file
* @lxcpath : the lxcpath in which the container is running
*
* Returns 0 on success, < 0 on error.
*/
int lxc_cgroup_set(const char *name, const char *filename, const char *value,
const char *lxcpath)
{ {
int ret; struct cgroup_process_info *i;
char *cgabspath; i = lxc_cgroup_process_info_getx("/proc/self/cgroup", meta);
char path[MAXPATHLEN]; if (!i)
char *subsystem = alloca(strlen(filename)+1), *p; i = lxc_cgroup_process_info_get(getpid(), meta);
strcpy(subsystem, filename); return i;
}
if ((p = index(subsystem, '.')) != NULL) /* create a new cgroup */
*p = '\0'; extern struct cgroup_process_info *lxc_cgroup_create(const char *name, const char *path_pattern, struct cgroup_meta_data *meta_data, const char *sub_pattern)
{
char **cgroup_path_components;
char **p = NULL;
char *path_so_far = NULL;
char **new_cgroup_paths = NULL;
char **new_cgroup_paths_sub = NULL;
struct cgroup_mount_point *mp;
struct cgroup_hierarchy *h;
struct cgroup_process_info *base_info = NULL;
struct cgroup_process_info *info_ptr;
int saved_errno;
int r;
unsigned suffix = 0;
bool had_sub_pattern = false;
size_t i;
cgabspath = lxc_cgroup_path_get(subsystem, name, lxcpath); if (!is_valid_cgroup(name)) {
if (!cgabspath) ERROR("Invalid cgroup name: '%s'", name);
return -1; errno = EINVAL;
return NULL;
}
ret = snprintf(path, MAXPATHLEN, "%s/%s", cgabspath, filename); if (!strstr(path_pattern, "%n")) {
if (ret < 0 || ret >= MAXPATHLEN) { ERROR("Invalid cgroup path pattern: '%s'; contains no %%n for specifying container name", path_pattern);
ERROR("pathname too long"); errno = EINVAL;
ret = -1; return NULL;
goto out;
} }
ret = do_cgroup_set(path, value); /* we will modify the result of this operation directly,
* so we don't have to copy the data structure
*/
base_info = (path_pattern[0] == '/') ?
lxc_cgroup_process_info_get_init(meta_data) :
lxc_cgroup_process_info_get_self(meta_data);
if (!base_info)
return NULL;
out: new_cgroup_paths = calloc(meta_data->maximum_hierarchy + 1, sizeof(char *));
free(cgabspath); if (!new_cgroup_paths)
return ret; goto out_initial_error;
}
/* new_cgroup_paths_sub = calloc(meta_data->maximum_hierarchy + 1, sizeof(char *));
* lxc_cgroup_get: Read value from a cgroup file if (!new_cgroup_paths_sub)
* goto out_initial_error;
* @name : name of container to connect to
* @filename : the cgroup file to read (e.g. "freezer.state")
* @value : a pre-allocated buffer to copy the answer into
* @len : the length of pre-allocated @value
* @lxcpath : the lxcpath in which the container is running
*
* Returns the number of bytes read on success, < 0 on error
*
* If you pass in NULL value or 0 len, the return value will be the size of
* the file, and @value will not contain the contents.
*
* Note that we can't get the file size quickly through stat or lseek.
* Therefore if you pass in len > 0 but less than the file size, your only
* indication will be that the return value will be equal to the passed-in ret.
* We will not return the actual full file size.
*/
int lxc_cgroup_get(const char *name, const char *filename, char *value,
size_t len, const char *lxcpath)
{
int fd, ret;
char *cgabspath;
char path[MAXPATHLEN];
char *subsystem = alloca(strlen(filename)+1), *p;
strcpy(subsystem, filename); /* find mount points we can use */
for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
h = info_ptr->hierarchy;
mp = lxc_cgroup_find_mount_point(h, info_ptr->cgroup_path, true);
if (!mp) {
ERROR("Could not find writable mount point for cgroup hierarchy %d while trying to create cgroup.", h->index);
goto out_initial_error;
}
info_ptr->designated_mount_point = mp;
if ((p = index(subsystem, '.')) != NULL) if (handle_clone_children(mp, info_ptr->cgroup_path) < 0) {
*p = '\0'; ERROR("Could not set clone_children to 1 for cpuset hierarchy in parent cgroup.");
goto out_initial_error;
}
}
cgabspath = lxc_cgroup_path_get(subsystem, name, lxcpath); /* normalize the path */
if (!cgabspath) cgroup_path_components = lxc_normalize_path(path_pattern);
return -1; if (!cgroup_path_components)
goto out_initial_error;
ret = snprintf(path, MAXPATHLEN, "%s/%s", cgabspath, filename); /* go through the path components to see if we can create them */
if (ret < 0 || ret >= MAXPATHLEN) { for (p = cgroup_path_components; *p || (sub_pattern && !had_sub_pattern); p++) {
ERROR("pathname too long"); /* we only want to create the same component with -1, -2, etc.
ret = -1; * if the component contains the container name itself, otherwise
goto out; * it's not an error if it already exists
*/
char *p_eff = *p ? *p : (char *)sub_pattern;
bool contains_name = strstr(p_eff, "%n");
char *current_component = NULL;
char *current_subpath = NULL;
char *current_entire_path = NULL;
char *parts[3];
size_t j = 0;
i = 0;
/* if we are processing the subpattern, we want to make sure
* loop is ended the next time around
*/
if (!*p) {
had_sub_pattern = true;
p--;
} }
fd = open(path, O_RDONLY); goto find_name_on_this_level;
if (fd < 0) {
ERROR("open %s : %s", path, strerror(errno)); cleanup_name_on_this_level:
ret = -1; /* This is reached if we found a name clash.
goto out; * In that case, remove the cgroup from all previous hierarchies
*/
for (j = 0, info_ptr = base_info; j < i && info_ptr; info_ptr = info_ptr->next, j++) {
r = remove_cgroup(info_ptr->designated_mount_point, info_ptr->created_paths[info_ptr->created_paths_count - 1]);
if (r < 0)
WARN("could not clean up cgroup we created when trying to create container");
free(info_ptr->created_paths[info_ptr->created_paths_count - 1]);
info_ptr->created_paths[--info_ptr->created_paths_count] = NULL;
}
if (current_component != current_subpath)
free(current_subpath);
if (current_component != p_eff)
free(current_component);
current_component = current_subpath = NULL;
/* try again with another suffix */
++suffix;
find_name_on_this_level:
/* determine name of the path component we should create */
if (contains_name && suffix > 0) {
char *buf = calloc(strlen(name) + 32, 1);
if (!buf)
goto out_initial_error;
snprintf(buf, strlen(name) + 32, "%s-%u", name, suffix);
current_component = lxc_string_replace("%n", buf, p_eff);
free(buf);
} else {
current_component = contains_name ? lxc_string_replace("%n", name, p_eff) : p_eff;
} }
parts[0] = path_so_far;
parts[1] = current_component;
parts[2] = NULL;
current_subpath = path_so_far ? lxc_string_join("/", (const char **)parts, false) : current_component;
if (!len || !value) { /* Now go through each hierarchy and try to create the
char buf[100]; * corresponding cgroup
int count = 0; */
while ((ret = read(fd, buf, 100)) > 0) for (i = 0, info_ptr = base_info; info_ptr; info_ptr = info_ptr->next, i++) {
count += ret; char *parts2[3];
if (ret >= 0) current_entire_path = NULL;
ret = count;
parts2[0] = !strcmp(info_ptr->cgroup_path, "/") ? "" : info_ptr->cgroup_path;
parts2[1] = current_subpath;
parts2[2] = NULL;
current_entire_path = lxc_string_join("/", (const char **)parts2, false);
if (!*p) {
/* we are processing the subpath, so only update that one */
free(new_cgroup_paths_sub[i]);
new_cgroup_paths_sub[i] = strdup(current_entire_path);
if (!new_cgroup_paths_sub[i])
goto cleanup_from_error;
} else { } else {
memset(value, 0, len); /* remember which path was used on this controller */
ret = read(fd, value, len); free(new_cgroup_paths[i]);
new_cgroup_paths[i] = strdup(current_entire_path);
if (!new_cgroup_paths[i])
goto cleanup_from_error;
}
r = create_cgroup(info_ptr->designated_mount_point, current_entire_path);
if (r < 0 && errno == EEXIST && contains_name) {
/* name clash => try new name with new suffix */
free(current_entire_path);
current_entire_path = NULL;
goto cleanup_name_on_this_level;
} else if (r < 0 && errno != EEXIST) {
SYSERROR("Could not create cgroup %s", current_entire_path);
goto cleanup_from_error;
} else if (r == 0) {
/* successfully created */
r = lxc_grow_array((void ***)&info_ptr->created_paths, &info_ptr->created_paths_capacity, info_ptr->created_paths_count + 1, 8);
if (r < 0)
goto cleanup_from_error;
info_ptr->created_paths[info_ptr->created_paths_count++] = current_entire_path;
} else {
/* if we didn't create the cgroup, then we have to make sure that
* further cgroups will be created properly
*/
if (handle_clone_children(mp, info_ptr->cgroup_path) < 0) {
ERROR("Could not set clone_children to 1 for cpuset hierarchy in pre-existing cgroup.");
goto cleanup_from_error;
} }
if (ret < 0) /* already existed but path component of pattern didn't contain '%n',
ERROR("read %s : %s", path, strerror(errno)); * so this is not an error; but then we don't need current_entire_path
* anymore...
*/
free(current_entire_path);
current_entire_path = NULL;
}
}
close(fd); /* save path so far */
out: free(path_so_far);
free(cgabspath); path_so_far = strdup(current_subpath);
return ret; if (!path_so_far)
goto cleanup_from_error;
/* cleanup */
if (current_component != current_subpath)
free(current_subpath);
if (current_component != p_eff)
free(current_component);
current_component = current_subpath = NULL;
continue;
cleanup_from_error:
/* called if an error occured in the loop, so we
* do some additional cleanup here
*/
saved_errno = errno;
if (current_component != current_subpath)
free(current_subpath);
if (current_component != p_eff)
free(current_component);
free(current_entire_path);
errno = saved_errno;
goto out_initial_error;
}
/* we're done, now update the paths */
for (i = 0, info_ptr = base_info; info_ptr; info_ptr = info_ptr->next, i++) {
free(info_ptr->cgroup_path);
info_ptr->cgroup_path = new_cgroup_paths[i];
info_ptr->cgroup_path_sub = new_cgroup_paths_sub[i];
}
/* don't use lxc_free_array since we used the array members
* to store them in our result...
*/
free(new_cgroup_paths);
free(new_cgroup_paths_sub);
free(path_so_far);
lxc_free_array((void **)cgroup_path_components, free);
return base_info;
out_initial_error:
saved_errno = errno;
free(path_so_far);
lxc_cgroup_process_info_free_and_remove(base_info);
lxc_free_array((void **)new_cgroup_paths, free);
lxc_free_array((void **)new_cgroup_paths_sub, free);
lxc_free_array((void **)cgroup_path_components, free);
errno = saved_errno;
return NULL;
} }
int lxc_cgroup_nrtasks(struct lxc_handler *handler) /* get the cgroup membership of a given container */
struct cgroup_process_info *lxc_cgroup_get_container_info(const char *name, const char *lxcpath, struct cgroup_meta_data *meta_data)
{ {
char path[MAXPATHLEN]; struct cgroup_process_info *result = NULL;
int pid, ret; int saved_errno = 0;
FILE *file; size_t i;
struct cgroup_process_info **cptr = &result;
struct cgroup_process_info *entry = NULL;
char *path = NULL;
for (i = 0; i <= meta_data->maximum_hierarchy; i++) {
struct cgroup_hierarchy *h = meta_data->hierarchies[i];
if (!h || !h->used)
continue;
if (!handler->cgroup) /* use the command interface to look for the cgroup */
return -1; path = lxc_cmd_get_cgroup_path(name, lxcpath, h->subsystems[0]);
if (!path)
goto out_error;
entry = calloc(1, sizeof(struct cgroup_process_info));
if (!entry)
goto out_error;
entry->meta_ref = lxc_cgroup_get_meta(meta_data);
entry->hierarchy = h;
entry->cgroup_path = path;
path = NULL;
/* it is not an error if we don't find anything here,
* it is up to the caller to decide what to do in that
* case */
entry->designated_mount_point = lxc_cgroup_find_mount_point(h, entry->cgroup_path, true);
*cptr = entry;
cptr = &entry->next;
entry = NULL;
}
return result;
out_error:
saved_errno = errno;
free(path);
lxc_cgroup_process_info_free(result);
lxc_cgroup_process_info_free(entry);
errno = saved_errno;
return NULL;
}
/* XXX Should we use a specific subsystem rather than the first one we /* move a processs to the cgroups specified by the membership */
* found (handler->cgroup->curcgroup)? */ int lxc_cgroup_enter(struct cgroup_process_info *info, pid_t pid, bool enter_sub)
ret = snprintf(path, MAXPATHLEN, "%s/tasks", handler->cgroup->curcgroup); {
if (ret < 0 || ret >= MAXPATHLEN) { char pid_buf[32];
ERROR("pathname too long"); char *cgroup_tasks_fn;
int r;
struct cgroup_process_info *info_ptr;
snprintf(pid_buf, 32, "%lu", (unsigned long)pid);
for (info_ptr = info; info_ptr; info_ptr = info_ptr->next) {
char *cgroup_path = (enter_sub && info_ptr->cgroup_path_sub) ?
info_ptr->cgroup_path_sub :
info_ptr->cgroup_path;
if (!info_ptr->designated_mount_point) {
info_ptr->designated_mount_point = lxc_cgroup_find_mount_point(info_ptr->hierarchy, cgroup_path, true);
if (!info_ptr->designated_mount_point) {
SYSERROR("Could not add pid %lu to cgroup %s: internal error (couldn't find any writable mountpoint to cgroup filesystem)", (unsigned long)pid, cgroup_path);
return -1; return -1;
} }
}
file = fopen(path, "r"); cgroup_tasks_fn = cgroup_to_absolute_path(info_ptr->designated_mount_point, cgroup_path, "/tasks");
if (!file) { if (!cgroup_tasks_fn) {
SYSERROR("fopen '%s' failed", path); SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid, cgroup_path);
return -1; return -1;
} }
ret = 0; r = lxc_write_to_file(cgroup_tasks_fn, pid_buf, strlen(pid_buf), false);
while (fscanf(file, "%d", &pid) != EOF) if (r < 0) {
ret++; SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid, cgroup_path);
return -1;
}
}
fclose(file); return 0;
return ret;
} }
static int subsys_lists_match(const char *list1, const char *list2) /* free process membership information */
void lxc_cgroup_process_info_free(struct cgroup_process_info *info)
{ {
char *token, *str, *saveptr = NULL; struct cgroup_process_info *next;
if (!info)
if (!list1 || !list2) return;
return 0; next = info->next;
lxc_cgroup_put_meta(info->meta_ref);
if (strlen(list1) != strlen(list2)) free(info->cgroup_path);
return 0; free(info->cgroup_path_sub);
lxc_free_array((void **)info->created_paths, free);
str = alloca(strlen(list1)+1); free(info);
strcpy(str, list1); lxc_cgroup_process_info_free(next);
for (; (token = strtok_r(str, ",", &saveptr)); str = NULL) {
if (in_subsys_list(token, list2) == 0)
return 0;
}
return 1;
} }
static void set_clone_children(struct mntent *m) /* free process membership information and remove cgroups that were created */
void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info *info)
{ {
char path[MAXPATHLEN]; struct cgroup_process_info *next;
FILE *fout; char **pp;
int ret; if (!info)
if (!in_subsys_list("cpuset", m->mnt_opts))
return;
ret = snprintf(path, MAXPATHLEN, "%s/cgroup.clone_children", m->mnt_dir);
if (ret < 0 || ret > MAXPATHLEN)
return; return;
fout = fopen(path, "w"); next = info->next;
if (!fout) for (pp = info->created_paths; pp && *pp; pp++);
return; for ((void)(pp && --pp); info->created_paths && pp >= info->created_paths; --pp) {
fprintf(fout, "1\n"); struct cgroup_mount_point *mp = info->designated_mount_point;
fclose(fout); if (!mp)
mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
if (mp)
/* ignore return value here, perhaps we created the
* '/lxc' cgroup in this container but another container
* is still running (for example)
*/
(void)remove_cgroup(mp, *pp);
free(*pp);
}
free(info->created_paths);
lxc_cgroup_put_meta(info->meta_ref);
free(info->cgroup_path);
free(info->cgroup_path_sub);
free(info);
lxc_cgroup_process_info_free(next);
} }
static bool have_visited(char *opts, char *visited, char *all_subsystems) char *lxc_cgroup_get_hierarchy_path_handler(const char *subsystem, struct lxc_handler *handler)
{ {
char *str, *s = NULL, *token; struct cgroup_process_info *info = find_info_for_subsystem(handler->cgroup, subsystem);
if (!info)
str = alloca(strlen(opts)+1); return NULL;
strcpy(str, opts); return info->cgroup_path;
for (; (token = strtok_r(str, ",", &s)); str = NULL) { }
if (!in_subsys_list(token, all_subsystems))
continue;
if (visited && in_subsys_list(token, visited))
return true;
}
return false; char *lxc_cgroup_get_hierarchy_path(const char *subsystem, const char *name, const char *lxcpath)
{
return lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
} }
static bool is_in_desclist(struct cgroup_desc *d, char *opts, char *all_subsystems) char *lxc_cgroup_get_hierarchy_abs_path_handler(const char *subsystem, struct lxc_handler *handler)
{ {
while (d) { struct cgroup_mount_point *mp = NULL;
if (have_visited(opts, d->subsystems, all_subsystems)) struct cgroup_process_info *info = find_info_for_subsystem(handler->cgroup, subsystem);
return true; if (!info)
d = d->next; return NULL;
if (info->designated_mount_point) {
mp = info->designated_mount_point;
} else {
mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
if (!mp)
return NULL;
} }
return false; return cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
} }
static char *record_visited(char *opts, char *all_subsystems) char *lxc_cgroup_get_hierarchy_abs_path(const char *subsystem, const char *name, const char *lxcpath)
{ {
char *s = NULL, *token, *str; struct cgroup_meta_data *meta;
int oldlen = 0, newlen, toklen; struct cgroup_process_info *base_info, *info;
char *visited = NULL; struct cgroup_mount_point *mp;
char *result = NULL;
str = alloca(strlen(opts)+1); int saved_errno;
strcpy(str, opts);
for (; (token = strtok_r(str, ",", &s)); str = NULL) { meta = lxc_cgroup_load_meta();
if (!in_subsys_list(token, all_subsystems)) if (!meta)
continue; return NULL;
toklen = strlen(token); base_info = lxc_cgroup_get_container_info(name, lxcpath, meta);
newlen = oldlen + toklen + 1; // ',' + token or token + '\0' if (!base_info)
visited = realloc(visited, newlen); return NULL;
if (!visited) info = find_info_for_subsystem(base_info, subsystem);
return (char *)-ENOMEM; if (!info)
if (oldlen) return NULL;
strcat(visited, ","); if (info->designated_mount_point) {
else mp = info->designated_mount_point;
*visited = '\0'; } else {
strcat(visited, token); mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
oldlen = newlen; if (!mp)
} return NULL;
}
return visited; result = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
saved_errno = errno;
lxc_cgroup_process_info_free(base_info);
lxc_cgroup_put_meta(meta);
errno = saved_errno;
return result;
} }
static char *get_all_subsystems(void) int lxc_cgroup_set_handler(const char *filename, const char *value, struct lxc_handler *handler)
{ {
FILE *f; char *subsystem = NULL, *p, *path;
char *line = NULL, *ret = NULL; int ret = -1;
size_t len;
int first = 1;
/* read the list of subsystems from the kernel */
f = fopen("/proc/cgroups", "r");
if (!f)
return NULL;
while (getline(&line, &len, f) != -1) { subsystem = alloca(strlen(filename) + 1);
char *c; strcpy(subsystem, filename);
int oldlen, newlen, inc; if ((p = index(subsystem, '.')) != NULL)
*p = '\0';
/* skip the first line */ path = lxc_cgroup_get_hierarchy_abs_path_handler(subsystem, handler);
if (first) { if (path) {
first=0; ret = do_cgroup_set(path, filename, value);
continue; free(path);
} }
return ret;
}
c = strchr(line, '\t'); int lxc_cgroup_get_handler(const char *filename, char *value, size_t len, struct lxc_handler *handler)
if (!c) {
continue; char *subsystem = NULL, *p, *path;
*c = '\0'; int ret = -1;
oldlen = ret ? strlen(ret) : 0; subsystem = alloca(strlen(filename) + 1);
newlen = oldlen + strlen(line) + 2; strcpy(subsystem, filename);
ret = realloc(ret, newlen); if ((p = index(subsystem, '.')) != NULL)
if (!ret) *p = '\0';
goto out;
inc = snprintf(ret + oldlen, newlen, ",%s", line); path = lxc_cgroup_get_hierarchy_abs_path_handler(subsystem, handler);
if (inc < 0 || inc >= newlen) { if (path) {
free(ret); ret = do_cgroup_get(path, filename, value, len);
ret = NULL; free(path);
goto out;
} }
return ret;
}
int lxc_cgroup_set(const char *filename, const char *value, const char *name, const char *lxcpath)
{
char *subsystem = NULL, *p, *path;
int ret = -1;
subsystem = alloca(strlen(filename) + 1);
strcpy(subsystem, filename);
if ((p = index(subsystem, '.')) != NULL)
*p = '\0';
path = lxc_cgroup_get_hierarchy_abs_path(subsystem, name, lxcpath);
if (path) {
ret = do_cgroup_set(path, filename, value);
free(path);
} }
return ret;
}
out: int lxc_cgroup_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath)
if (line) {
free(line); char *subsystem = NULL, *p, *path;
fclose(f); int ret = -1;
subsystem = alloca(strlen(filename) + 1);
strcpy(subsystem, filename);
if ((p = index(subsystem, '.')) != NULL)
*p = '\0';
path = lxc_cgroup_get_hierarchy_abs_path(subsystem, name, lxcpath);
if (path) {
ret = do_cgroup_get(path, filename, value, len);
free(path);
}
return ret; return ret;
} }
/* /*
* /etc/lxc/lxc.conf can contain lxc.cgroup.use = entries. * lxc_cgroup_path_get: Get the absolute pathname for a cgroup
* If any of those are present, then lxc will ONLY consider * file for a running container.
* cgroup filesystems mounted at one of the listed entries. *
* @filename : the file of interest (e.g. "freezer.state") or
* the subsystem name (e.g. "freezer") in which case
* the directory where the cgroup may be modified
* will be returned
* @name : name of container to connect to
* @lxcpath : the lxcpath in which the container is running
*
* This is the exported function, which determines cgpath from the
* lxc-start of the @name container running in @lxcpath.
*
* Returns path on success, NULL on error. The caller must free()
* the returned path.
*/ */
static char *get_cgroup_uselist() char *lxc_cgroup_path_get(const char *filename, const char *name,
const char *lxcpath)
{ {
FILE *f; char *subsystem = NULL, *longer_file = NULL, *p, *group, *path;
char *line = NULL, *ret = NULL;
size_t sz = 0, retsz = 0, newsz;
if ((f = fopen(LXC_GLOBAL_CONF, "r")) == NULL) subsystem = alloca(strlen(filename) + 1);
return NULL; strcpy(subsystem, filename);
while (getline(&line, &sz, f) != -1) { if ((p = index(subsystem, '.')) != NULL) {
char *p = line; *p = '\0';
while (*p && isblank(*p)) longer_file = alloca(strlen(filename) + 2);
p++; longer_file[0] = '/';
if (strncmp(p, "lxc.cgroup.use", 14) != 0) strcpy(longer_file + 1, filename);
continue;
p = index(p, '=');
if (!p)
continue;
p++;
while (*p && isblank(*p))
p++;
if (strlen(p) < 1)
continue;
newsz = retsz + strlen(p);
if (retsz == 0)
newsz += 1; // for trailing \0
// the last line in the file could lack \n
if (p[strlen(p)-1] != '\n')
newsz += 1;
ret = realloc(ret, newsz);
if (!ret) {
ERROR("Out of memory reading cgroup uselist");
fclose(f);
free(line);
return (char *)-ENOMEM;
}
if (retsz == 0)
strcpy(ret, p);
else
strcat(ret, p);
if (p[strlen(p)-1] != '\n')
ret[newsz-2] = '\0';
ret[newsz-1] = '\0';
retsz = newsz;
} }
if (line) group = lxc_cgroup_get_hierarchy_path(subsystem, name, lxcpath);
free(line); if (!group)
return ret; return NULL;
path = lxc_cgroup_find_abs_path(subsystem, group, true, *p ? longer_file : NULL);
free(group);
return path;
} }
static bool is_in_uselist(char *uselist, struct mntent *m) int lxc_setup_cgroup_without_devices(struct lxc_handler *h, struct lxc_list *cgroup_settings)
{ {
char *p; return do_setup_cgroup(h, cgroup_settings, false);
if (!uselist)
return true;
if (!*uselist)
return false;
while (*uselist) {
p = index(uselist, '\n');
if (strncmp(m->mnt_dir, uselist, p - uselist) == 0)
return true;
uselist = p+1;
}
return false;
} }
static bool find_real_cgroup(struct cgroup_desc *d, char *path) int lxc_setup_cgroup_devices(struct lxc_handler *h, struct lxc_list *cgroup_settings)
{ {
FILE *f; return do_setup_cgroup(h, cgroup_settings, true);
char *line = NULL, *p, *p2; }
int ret = 0;
size_t len;
if ((f = fopen("/proc/self/cgroup", "r")) == NULL) { int lxc_cgroup_nrtasks_handler(struct lxc_handler *handler)
SYSERROR("Error opening /proc/self/cgroups"); {
return false; struct cgroup_process_info *info = handler->cgroup;
} struct cgroup_mount_point *mp = NULL;
char *abs_path = NULL;
int ret;
// If there is no subsystem, ignore the mount. Note we may want if (!info) {
// to change this, so that unprivileged users can use a unbound errno = ENOENT;
// cgroup mount to arrange their container tasks. return -1;
if (!d->subsystems) {
fclose(f);
return false;
} }
while (getline(&line, &len, f) != -1) {
if (!(p = index(line, ':'))) if (info->designated_mount_point) {
continue; mp = info->designated_mount_point;
if (!(p2 = index(++p, ':'))) } else {
continue; mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, false);
*p2 = '\0'; if (!mp)
// remove trailing newlines return -1;
if (*(p2 + 1) && p2[strlen(p2 + 1)] == '\n')
p2[strlen(p2 + 1)] = '\0';
// in case of multiple mounts it may be more correct to
// insist all subsystems be the same
if (subsys_lists_match(p, d->subsystems))
goto found;
} }
if (line) abs_path = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
free(line); if (!abs_path)
fclose(f); return -1;
return false;;
found: ret = cgroup_recursive_task_count(abs_path);
fclose(f); free(abs_path);
ret = snprintf(path, MAXPATHLEN, "%s", p2+1); return ret;
if (ret < 0 || ret >= MAXPATHLEN) {
free(line);
return false;
}
free(line);
return true;
} }
struct cgroup_process_info *lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str, struct cgroup_meta_data *meta)
/*
* for a given cgroup mount entry, and a to-be-created container,
* 1. Figure out full path of the cgroup we are currently in,
* 2. Find a new free cgroup which is $path / $lxc_name with an
* optional '-$n' where n is an ever-increasing integer.
*/
static char *find_free_cgroup(struct cgroup_desc *d, const char *lxc_name)
{ {
char tail[20], cgpath[MAXPATHLEN], *cgp, path[MAXPATHLEN]; struct cgroup_process_info *result = NULL;
int i = 0, ret; FILE *proc_pid_cgroup = NULL;
size_t l; char *line = NULL;
size_t sz = 0;
int saved_errno = 0;
struct cgroup_process_info **cptr = &result;
struct cgroup_process_info *entry = NULL;
if (!find_real_cgroup(d, cgpath)) { proc_pid_cgroup = fopen_cloexec(proc_pid_cgroup_str, "r");
ERROR("Failed to find current cgroup"); if (!proc_pid_cgroup)
return NULL; return NULL;
}
/* while (getline(&line, &sz, proc_pid_cgroup) != -1) {
* If d->mntpt is '/a/b/c/d', and the mountpoint is /x/y/z, /* file format: hierarchy:subsystems:group */
* then look for ourselves in: char *colon1;
* /x/y/z/a/b/c/d/tasks char *colon2;
* /x/y/z/b/c/d/tasks char *endptr;
* /x/y/z/c/d/tasks int hierarchy_number;
* /x/y/z/d/tasks struct cgroup_hierarchy *h = NULL;
* /x/y/z/tasks
*/ if (!line[0])
cgp = cgpath;
while (cgp[0]) {
ret = snprintf(path, MAXPATHLEN, "%s%s/tasks", d->mntpt, cgp);
if (ret < 0 || ret >= MAXPATHLEN)
return NULL;
if (!is_in_cgroup(getpid(), path)) {
// does not exist, try the next one
cgp = index(cgp+1, '/');
if (!cgp)
break;
continue; continue;
}
break;
}
if (!cgp || !*cgp) {
// try just the path
ret = snprintf(path, MAXPATHLEN, "%s/tasks", d->mntpt);
if (ret < 0 || ret >= MAXPATHLEN)
return NULL;
if (!is_in_cgroup(getpid(), path))
return NULL;
}
// found it
// path has '/tasks' at end, drop that
if (!(cgp = strrchr(path, '/'))) {
ERROR("Got nonsensical path name %s\n", path);
return NULL;
}
*cgp = '\0';
if (strlen(path) + strlen(lxc_name) + 20 > MAXPATHLEN) { if (line[strlen(line) - 1] == '\n')
ERROR("Error: cgroup path too long"); line[strlen(line) - 1] = '\0';
return NULL;
}
tail[0] = '\0';
while (1) {
struct stat sb;
int freebytes = MAXPATHLEN - (cgp - path);
if (i) { colon1 = strchr(line, ':');
ret = snprintf(tail, 20, "-%d", i); if (!colon1)
if (ret < 0 || ret >= 20) continue;
return NULL; *colon1++ = '\0';
} colon2 = strchr(colon1, ':');
ret = snprintf(cgp, freebytes, "/%s%s", lxc_name, tail); if (!colon2)
if (ret < 0 || ret >= freebytes) continue;
return NULL; *colon2++ = '\0';
if (stat(path, &sb) == -1)
break;
i++;
}
l = strlen(cgpath); endptr = NULL;
ret = snprintf(cgpath + l, MAXPATHLEN - l, "/%s%s", lxc_name, tail); hierarchy_number = strtoul(line, &endptr, 10);
if (ret < 0 || ret >= (MAXPATHLEN - l)) { if (!endptr || *endptr)
ERROR("Out of memory"); continue;
return NULL;
}
if ((d->realcgroup = strdup(cgpath)) == NULL) {
ERROR("Out of memory");
return NULL;
}
l = strlen(d->realcgroup);
if (l > 0 && d->realcgroup[l-1] == '\n')
d->realcgroup[l-1] = '\0';
return strdup(path);
}
/* if (hierarchy_number > meta->maximum_hierarchy) {
* For a new container, find a cgroup path which is unique in all cgroup mounts. /* we encountered a hierarchy we didn't have before,
* I.e. if r1 is already running, then /lxc/r1-1 may be used. * so probably somebody remounted some stuff in the
* * mean time...
* @lxcgroup: the cgroup 'group' the contaienr should run in. By default, this
* is just 'lxc'. Admins may wish to group some containers into other groups,
* i.e. 'build', to take advantage of cgroup hierarchy to simplify group
* administration. Also, unprivileged users who are placed into a cgroup by
* libcgroup_pam will be using that cgroup rather than the system-wide 'lxc'
* group.
* @name: the name of the container
*
* The chosen cgpath is returned as a strdup'd string. The caller will have to
* free that eventually, however the lxc monitor will keep that string so as to
* return it in response to a LXC_COMMAND_CGROUP query.
*
* Note the path is relative to cgroup mounts. I.e. if the freezer subsystem
* is at /sys/fs/cgroup/freezer, and this fn returns '/lxc/r1', then the
* freezer cgroup's full path will be /sys/fs/cgroup/freezer/lxc/r1/.
*
* Races won't be determintal, you'll just end up with leftover unused cgroups
*/ */
struct cgroup_desc *lxc_cgroup_path_create(const char *name) errno = EAGAIN;
{ goto out_error;
struct cgroup_desc *retdesc = NULL, *newdesc = NULL;
FILE *file = NULL;
struct mntent mntent_r;
char buf[LARGE_MAXPATHLEN] = {0};
char *all_subsystems = get_all_subsystems();
char *cgroup_uselist = get_cgroup_uselist();
if (cgroup_uselist == (char *)-ENOMEM) {
if (all_subsystems)
free(all_subsystems);
return NULL;
}
if (!all_subsystems) {
ERROR("failed to get a list of all cgroup subsystems");
if (cgroup_uselist)
free(cgroup_uselist);
return NULL;
}
file = setmntent(MTAB, "r");
if (!file) {
SYSERROR("failed to open %s", MTAB);
free(all_subsystems);
if (cgroup_uselist)
free(cgroup_uselist);
return NULL;
} }
while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) { h = meta->hierarchies[hierarchy_number];
if (!h) {
/* we encountered a hierarchy that was thought to be
* dead before, so probably somebody remounted some
* stuff in the mean time...
*/
errno = EAGAIN;
goto out_error;
}
if (strcmp(mntent_r.mnt_type, "cgroup")) /* we are told that we should ignore this hierarchy */
if (!h->used)
continue; continue;
if (cgroup_uselist && !is_in_uselist(cgroup_uselist, &mntent_r)) entry = calloc(1, sizeof(struct cgroup_process_info));
continue; if (!entry)
goto out_error;
/* make sure we haven't checked this subsystem already */ entry->meta_ref = lxc_cgroup_get_meta(meta);
if (is_in_desclist(retdesc, mntent_r.mnt_opts, all_subsystems)) entry->hierarchy = h;
continue; entry->cgroup_path = strdup(colon2);
if (!entry->cgroup_path)
goto out_error;
if (!(newdesc = malloc(sizeof(struct cgroup_desc)))) { *cptr = entry;
ERROR("Out of memory reading cgroups"); cptr = &entry->next;
goto fail; entry = NULL;
} }
newdesc->subsystems = record_visited(mntent_r.mnt_opts, all_subsystems);
if (newdesc->subsystems == (char *)-ENOMEM) {
ERROR("Out of memory recording cgroup subsystems");
free(newdesc);
newdesc = NULL;
goto fail;
}
if (!newdesc->subsystems) {
free(newdesc);
newdesc = NULL;
continue;
}
newdesc->mntpt = strdup(mntent_r.mnt_dir);
newdesc->realcgroup = NULL;
newdesc->curcgroup = find_free_cgroup(newdesc, name);
if (!newdesc->mntpt || !newdesc->curcgroup) {
ERROR("Out of memory reading cgroups");
goto fail;
}
set_clone_children(&mntent_r);
if (mkdir(newdesc->curcgroup, 0755)) {
ERROR("Error creating cgroup %s", newdesc->curcgroup);
goto fail;
}
newdesc->next = retdesc;
retdesc = newdesc;
}
endmntent(file);
free(all_subsystems);
if (cgroup_uselist)
free(cgroup_uselist);
return retdesc;
fail:
endmntent(file);
free(all_subsystems);
if (cgroup_uselist)
free(cgroup_uselist);
if (newdesc) {
if (newdesc->mntpt)
free(newdesc->mntpt);
if (newdesc->subsystems)
free(newdesc->subsystems);
if (newdesc->curcgroup)
free(newdesc->curcgroup);
if (newdesc->realcgroup)
free(newdesc->realcgroup);
free(newdesc);
}
while (retdesc) {
struct cgroup_desc *t = retdesc;
retdesc = retdesc->next;
if (t->mntpt)
free(t->mntpt);
if (t->subsystems)
free(t->subsystems);
if (t->curcgroup)
free(t->curcgroup);
if (t->realcgroup)
free(t->realcgroup);
free(t);
} fclose(proc_pid_cgroup);
free(line);
return result;
out_error:
saved_errno = errno;
if (proc_pid_cgroup)
fclose(proc_pid_cgroup);
lxc_cgroup_process_info_free(result);
lxc_cgroup_process_info_free(entry);
free(line);
errno = saved_errno;
return NULL; return NULL;
} }
static bool lxc_cgroup_enter_one(const char *dir, int pid) char **subsystems_from_mount_options(const char *mount_options, char **kernel_list)
{ {
char path[MAXPATHLEN]; char *token, *str, *saveptr = NULL;
int ret; char **result = NULL;
FILE *fout; size_t result_capacity = 0;
size_t result_count = 0;
int saved_errno;
int r;
ret = snprintf(path, MAXPATHLEN, "%s/tasks", dir); str = alloca(strlen(mount_options)+1);
if (ret < 0 || ret >= MAXPATHLEN) { strcpy(str, mount_options);
ERROR("Error entering cgroup"); for (; (token = strtok_r(str, ",", &saveptr)); str = NULL) {
return false; /* we have a subsystem if it's either in the list of
} * subsystems provided by the kernel OR if it starts
fout = fopen(path, "w"); * with name= for named hierarchies
if (!fout) { */
SYSERROR("Error entering cgroup"); if (!strncmp(token, "name=", 5) || lxc_string_in_array(token, (const char **)kernel_list)) {
return false; r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 12);
} if (r < 0)
if (fprintf(fout, "%d\n", (int)pid) < 0) { goto out_free;
ERROR("Error writing pid to %s to enter cgroup", path); result[result_count + 1] = NULL;
fclose(fout); result[result_count] = strdup(token);
return false; if (!result[result_count])
goto out_free;
result_count++;
} }
if (fclose(fout) < 0) {
SYSERROR("Error writing pid to %s to enter cgroup", path);
return false;
} }
return true; return result;
out_free:
saved_errno = errno;
lxc_free_array((void**)result, free);
errno = saved_errno;
return NULL;
} }
int lxc_cgroup_enter(struct cgroup_desc *cgroups, pid_t pid) void lxc_cgroup_mount_point_free(struct cgroup_mount_point *mp)
{ {
while (cgroups) { if (!mp)
if (!cgroups->subsystems) return;
goto next; free(mp->mount_point);
free(mp->mount_prefix);
if (!lxc_cgroup_enter_one(cgroups->curcgroup, pid)) free(mp);
return -1;
next:
cgroups = cgroups->next;
}
return 0;
} }
static int cgroup_rmdir(char *dirname) void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy *h)
{ {
struct dirent dirent, *direntp; if (!h)
DIR *dir; return;
int ret; lxc_free_array((void **)h->subsystems, free);
char pathname[MAXPATHLEN]; free(h);
}
dir = opendir(dirname);
if (!dir) {
WARN("failed to open directory: %m");
return -1;
}
while (!readdir_r(dir, &dirent, &direntp)) {
struct stat mystat;
int rc;
if (!direntp)
break;
if (!strcmp(direntp->d_name, ".") ||
!strcmp(direntp->d_name, ".."))
continue;
rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name); bool is_valid_cgroup(const char *name)
if (rc < 0 || rc >= MAXPATHLEN) { {
ERROR("pathname too long"); const char *p;
continue; for (p = name; *p; p++) {
} if (*p < 32 || *p == 127 || *p == '/')
ret = stat(pathname, &mystat); return false;
if (ret)
continue;
if (S_ISDIR(mystat.st_mode))
cgroup_rmdir(pathname);
} }
return strcmp(name, ".") != 0 && strcmp(name, "..") != 0;
}
ret = rmdir(dirname); int create_or_remove_cgroup(bool do_remove, struct cgroup_mount_point *mp, const char *path)
{
int r, saved_errno = 0;
char *buf = cgroup_to_absolute_path(mp, path, NULL);
if (!buf)
return -1;
if (closedir(dir)) /* create or remove directory */
ERROR("failed to close directory"); r = do_remove ?
return ret; rmdir(buf) :
mkdir(buf, 0777);
saved_errno = errno;
free(buf);
errno = saved_errno;
return r;
} }
/* int create_cgroup(struct cgroup_mount_point *mp, const char *path)
* for each mounted cgroup, destroy the cgroup for the container
*/
void lxc_cgroup_destroy_desc(struct cgroup_desc *cgroups)
{ {
while (cgroups) { return create_or_remove_cgroup(false, mp, path);
struct cgroup_desc *next = cgroups->next;
if (cgroup_rmdir(cgroups->curcgroup) < 0)
SYSERROR("Error removing cgroup directory %s", cgroups->curcgroup);
free(cgroups->mntpt);
free(cgroups->subsystems);
free(cgroups->curcgroup);
free(cgroups->realcgroup);
free(cgroups);
cgroups = next;
}
} }
int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath) int remove_cgroup(struct cgroup_mount_point *mp, const char *path)
{ {
FILE *f; return create_or_remove_cgroup(true, mp, path);
char *line = NULL, ret = 0; }
size_t len = 0;
int first = 1;
char *dirpath;
/* read the list of subsystems from the kernel */ char *cgroup_to_absolute_path(struct cgroup_mount_point *mp, const char *path, const char *suffix)
f = fopen("/proc/cgroups", "r"); {
if (!f) /* first we have to make sure we subtract the mount point's prefix */
return -1; char *prefix = mp->mount_prefix;
char *buf;
ssize_t len, rv;
/* we want to make sure only absolute paths to cgroups are passed to us */
if (path[0] != '/') {
errno = EINVAL;
return NULL;
}
while (getline(&line, &len, f) != -1) { if (prefix && !strcmp(prefix, "/"))
char *c; prefix = NULL;
/* skip the first line */ /* prefix doesn't match */
if (first) { if (prefix && strncmp(prefix, path, strlen(prefix)) != 0) {
first=0; errno = EINVAL;
continue; return NULL;
}
/* if prefix is /foo and path is /foobar */
if (prefix && path[strlen(prefix)] != '/' && path[strlen(prefix)] != '\0') {
errno = EINVAL;
return NULL;
} }
c = strchr(line, '\t'); /* remove prefix from path */
if (!c) path += prefix ? strlen(prefix) : 0;
continue;
*c = '\0';
dirpath = lxc_cgroup_path_get(line, name, lxcpath);
if (!dirpath)
continue;
INFO("joining pid %d to cgroup %s", pid, dirpath); len = strlen(mp->mount_point) + strlen(path) + (suffix ? strlen(suffix) : 0);
if (!lxc_cgroup_enter_one(dirpath, pid)) { buf = calloc(len + 1, 1);
ERROR("Failed joining %d to %s\n", pid, dirpath); rv = snprintf(buf, len + 1, "%s%s%s", mp->mount_point, path, suffix ? suffix : "");
ret = -1; if (rv > len) {
continue; free(buf);
} errno = ENOMEM;
return NULL;
} }
if (line) return buf;
free(line);
fclose(f);
return ret;
} }
bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d) struct cgroup_process_info *find_info_for_subsystem(struct cgroup_process_info *info, const char *subsystem)
{ {
char filepath[MAXPATHLEN], *line = NULL, v1[MAXPATHLEN], v2[MAXPATHLEN]; struct cgroup_process_info *info_ptr;
FILE *f; for (info_ptr = info; info_ptr; info_ptr = info_ptr->next) {
int ret, junk; struct cgroup_hierarchy *h = info_ptr->hierarchy;
size_t sz = 0, l1, l2; if (lxc_string_in_array(subsystem, (const char **)h->subsystems))
char *end = index(subsystem, '.'); return info_ptr;
int len = end ? (end - subsystem) : strlen(subsystem);
const char *cgpath = NULL;
while (d) {
if (in_subsys_list("devices", d->subsystems)) {
cgpath = d->realcgroup;
l1 = strlen(cgpath);
break;
} }
d = d->next; errno = ENOENT;
} return NULL;
if (!d) }
return false;
ret = snprintf(filepath, MAXPATHLEN, "/proc/%d/cgroup", pid); int do_cgroup_get(const char *cgroup_path, const char *sub_filename, char *value, size_t len)
if (ret < 0 || ret >= MAXPATHLEN) {
return false; const char *parts[3] = {
if ((f = fopen(filepath, "r")) == NULL) cgroup_path,
return false; sub_filename,
while (getline(&line, &sz, f) != -1) { NULL
// nr:subsystem:path };
v2[0] = v2[1] = '\0'; char *filename;
ret = sscanf(line, "%d:%[^:]:%s", &junk, v1, v2); int ret, saved_errno;
if (ret != 3) {
fclose(f); filename = lxc_string_join("/", parts, false);
free(line); if (!filename)
return false; return -1;
}
len = end ? end - subsystem : strlen(subsystem); ret = lxc_read_from_file(filename, value, len);
if (strncmp(v1, subsystem, len) != 0) saved_errno = errno;
continue; free(filename);
// v2 will start with '/', skip it by using v2+1 errno = saved_errno;
// we must be in SUBcgroup, so make sure l2 > l1 return ret;
l2 = strlen(v2+1);
if (l2 > l1 && strncmp(v2+1, cgpath, l1) == 0) {
fclose(f);
free(line);
return true;
}
}
fclose(f);
if (line)
free(line);
return false;
} }
char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys) int do_cgroup_set(const char *cgroup_path, const char *sub_filename, const char *value)
{ {
struct cgroup_desc *d; const char *parts[3] = {
cgroup_path,
sub_filename,
NULL
};
char *filename;
int ret, saved_errno;
for (d = handler->cgroup; d; d = d->next) { filename = lxc_string_join("/", parts, false);
if (in_subsys_list(subsys, d->subsystems)) if (!filename)
return d->realcgroup; return -1;
}
return NULL; ret = lxc_write_to_file(filename, value, strlen(value), false);
saved_errno = errno;
free(filename);
errno = saved_errno;
return ret;
} }
static int _setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroups, int do_setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroup_settings, bool do_devices)
int devices)
{ {
struct lxc_list *iterator; struct lxc_list *iterator;
struct lxc_cgroup *cg; struct lxc_cgroup *cg;
int ret = -1; int ret = -1;
if (lxc_list_empty(cgroups)) if (lxc_list_empty(cgroup_settings))
return 0; return 0;
lxc_list_for_each(iterator, cgroups) { lxc_list_for_each(iterator, cgroup_settings) {
cg = iterator->elem; cg = iterator->elem;
if (devices == !strncmp("devices", cg->subsystem, 7)) { if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
if (strcmp(cg->subsystem, "devices.deny") == 0 && if (strcmp(cg->subsystem, "devices.deny") == 0 &&
cgroup_devices_has_deny(h, cg->value)) cgroup_devices_has_allow_or_deny(h, cg->value, false))
continue; continue;
if (strcmp(cg->subsystem, "devices.allow") == 0 && if (strcmp(cg->subsystem, "devices.allow") == 0 &&
cgroup_devices_has_allow(h, cg->value)) cgroup_devices_has_allow_or_deny(h, cg->value, true))
continue; continue;
if (lxc_cgroup_set_value(h, cg->subsystem, cg->value)) { if (lxc_cgroup_set_handler(cg->subsystem, cg->value, h)) {
ERROR("Error setting %s to %s for %s\n", ERROR("Error setting %s to %s for %s\n",
cg->subsystem, cg->value, h->name); cg->subsystem, cg->value, h->name);
goto out; goto out;
...@@ -1315,12 +1436,156 @@ out: ...@@ -1315,12 +1436,156 @@ out:
return ret; return ret;
} }
int setup_cgroup_devices(struct lxc_handler *h, struct lxc_list *cgroups) bool cgroup_devices_has_allow_or_deny(struct lxc_handler *h, char *v, bool for_allow)
{
char *path;
FILE *devices_list;
char *line = NULL;
size_t sz = 0;
bool ret = !for_allow;
const char *parts[3] = {
NULL,
"devices.list",
NULL
};
// XXX FIXME if users could use something other than 'lxc.devices.deny = a'.
// not sure they ever do, but they *could*
// right now, I'm assuming they do NOT
if (!for_allow && strcmp(v, "a") != 0 && strcmp(v, "a *:* rwm") != 0)
return false;
parts[0] = (const char *)lxc_cgroup_get_hierarchy_abs_path_handler("devices", h);
if (!parts[0])
return false;
path = lxc_string_join("/", parts, false);
if (!path) {
free((void *)parts[0]);
return false;
}
devices_list = fopen_cloexec(path, "r");
if (!devices_list) {
free(path);
return false;
}
while (getline(&line, &sz, devices_list) != -1) {
size_t len = strlen(line);
if (len > 0 && line[len-1] == '\n')
line[len-1] = '\0';
if (strcmp(line, "a *:* rwm") == 0) {
ret = for_allow;
goto out;
} else if (for_allow && strcmp(line, v) == 0) {
ret = true;
goto out;
}
}
out:
fclose(devices_list);
free(line);
free(path);
return ret;
}
int cgroup_recursive_task_count(const char *cgroup_path)
{
DIR *d;
struct dirent *dent_buf;
struct dirent *dent;
ssize_t name_max;
int n = 0, r;
/* see man readdir_r(3) */
name_max = pathconf(cgroup_path, _PC_NAME_MAX);
if (name_max <= 0)
name_max = 255;
dent_buf = malloc(offsetof(struct dirent, d_name) + name_max + 1);
if (!dent_buf)
return -1;
d = opendir(cgroup_path);
if (!d)
return 0;
while (readdir_r(d, dent_buf, &dent) == 0 && dent) {
const char *parts[3] = {
cgroup_path,
dent->d_name,
NULL
};
char *sub_path;
struct stat st;
if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
continue;
sub_path = lxc_string_join("/", parts, false);
if (!sub_path) {
closedir(d);
free(dent_buf);
return -1;
}
r = stat(sub_path, &st);
if (r < 0) {
closedir(d);
free(dent_buf);
free(sub_path);
return -1;
}
if (S_ISDIR(st.st_mode)) {
r = cgroup_recursive_task_count(sub_path);
if (r >= 0)
n += r;
} else if (!strcmp(dent->d_name, "tasks")) {
r = count_lines(sub_path);
if (r >= 0)
n += r;
}
free(sub_path);
}
closedir(d);
free(dent_buf);
return n;
}
int count_lines(const char *fn)
{ {
return _setup_cgroup(h, cgroups, 1); FILE *f;
char *line = NULL;
size_t sz = 0;
int n = 0;
f = fopen_cloexec(fn, "r");
if (!f)
return -1;
while (getline(&line, &sz, f) != -1) {
n++;
}
free(line);
fclose(f);
return n;
} }
int setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroups) int handle_clone_children(struct cgroup_mount_point *mp, char *cgroup_path)
{ {
return _setup_cgroup(h, cgroups, 0); int r, saved_errno = 0;
/* if this is a cpuset hierarchy, we have to set cgroup.clone_children in
* the base cgroup, otherwise containers will start with an empty cpuset.mems
* and cpuset.cpus and then
*/
if (lxc_string_in_array("cpuset", (const char **)mp->hierarchy->subsystems)) {
char *cc_path = cgroup_to_absolute_path(mp, cgroup_path, "/cgroup.clone_children");
if (!cc_path)
return -1;
r = lxc_write_to_file(cc_path, "1", 1, false);
saved_errno = errno;
free(cc_path);
errno = saved_errno;
return r < 0 ? -1 : 0;
}
return 0;
} }
...@@ -20,38 +20,145 @@ ...@@ -20,38 +20,145 @@
* License along with this library; if not, write to the Free Software * License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#ifndef _cgroup_h #ifndef _ncgroup_h
#define _cgroup_h #define _ncgroup_h
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h>
#include <stddef.h>
struct cgroup_hierarchy;
struct cgroup_meta_data;
struct cgroup_mount_point;
/*
* cgroup_meta_data: the metadata about the cgroup infrastructure on this
* host
*/
struct cgroup_meta_data {
ptrdiff_t ref; /* simple refcount */
struct cgroup_hierarchy **hierarchies;
struct cgroup_mount_point **mount_points;
int maximum_hierarchy;
};
/*
* cgroup_hierarchy: describes a single cgroup hierarchy
* (may have multiple mount points)
*/
struct cgroup_hierarchy {
int index;
bool used; /* false if the hierarchy should be ignored by lxc */
char **subsystems;
struct cgroup_mount_point *rw_absolute_mount_point;
struct cgroup_mount_point *ro_absolute_mount_point;
struct cgroup_mount_point **all_mount_points;
size_t all_mount_point_capacity;
};
/* /*
* cgroup_desc: describe a container's cgroup membership * cgroup_mount_point: a mount point to where a hierarchy
* is mounted to
*/ */
struct cgroup_desc { struct cgroup_mount_point {
char *mntpt; /* where this is mounted */ struct cgroup_hierarchy *hierarchy;
char *subsystems; /* comma-separated list of subsystems, or NULL */ char *mount_point;
char *curcgroup; /* task's current cgroup, full pathanme */ char *mount_prefix;
char *realcgroup; /* the cgroup as known in /proc/self/cgroup */ bool read_only;
struct cgroup_desc *next;
}; };
/*
* cgroup_process_info: describes the membership of a
* process to the different cgroup
* hierarchies
*/
struct cgroup_process_info {
struct cgroup_process_info *next;
struct cgroup_meta_data *meta_ref;
struct cgroup_hierarchy *hierarchy;
char *cgroup_path;
char *cgroup_path_sub;
char **created_paths;
size_t created_paths_capacity;
size_t created_paths_count;
struct cgroup_mount_point *designated_mount_point;
};
/* meta data management:
* lxc_cgroup_load_meta loads the meta data (using subsystem
* whitelist from main lxc configuration)
* lxc_cgroup_load_meta2 does the same, but allows one to specify
* a custom whitelist
* lxc_cgroup_get_meta increments the refcount of a meta data
* object
* lxc_cgroup_put_meta decrements the refcount of a meta data
* object, potentially destroying it
*/
extern struct cgroup_meta_data *lxc_cgroup_load_meta();
extern struct cgroup_meta_data *lxc_cgroup_load_meta2(const char **subsystem_whitelist);
extern struct cgroup_meta_data *lxc_cgroup_get_meta(struct cgroup_meta_data *meta_data);
extern struct cgroup_meta_data *lxc_cgroup_put_meta(struct cgroup_meta_data *meta_data);
/* find the hierarchy corresponding to a given subsystem */
extern struct cgroup_hierarchy *lxc_cgroup_find_hierarchy(struct cgroup_meta_data *meta_data, const char *subsystem);
/* find a mount point for a given hierarchy that has access to the cgroup in 'cgroup' and (if wanted) is writable */
extern struct cgroup_mount_point *lxc_cgroup_find_mount_point(struct cgroup_hierarchy *hierarchy, const char *group, bool should_be_writable);
/* all-in-one: find a mount point for a given hierarchy that has access to the cgroup and return the correct path within */
extern char *lxc_cgroup_find_abs_path(const char *subsystem, const char *group, bool should_be_writable, const char *suffix);
/* determine the cgroup membership of a given process */
extern struct cgroup_process_info *lxc_cgroup_process_info_get(pid_t pid, struct cgroup_meta_data *meta);
extern struct cgroup_process_info *lxc_cgroup_process_info_get_init(struct cgroup_meta_data *meta);
extern struct cgroup_process_info *lxc_cgroup_process_info_get_self(struct cgroup_meta_data *meta);
/* create a new cgroup */
extern struct cgroup_process_info *lxc_cgroup_create(const char *name, const char *path_pattern, struct cgroup_meta_data *meta_data, const char *sub_pattern);
/* get the cgroup membership of a given container */
extern struct cgroup_process_info *lxc_cgroup_get_container_info(const char *name, const char *lxcpath, struct cgroup_meta_data *meta_data);
/* move a processs to the cgroups specified by the membership */
extern int lxc_cgroup_enter(struct cgroup_process_info *info, pid_t pid, bool enter_sub);
/* free process membership information */
extern void lxc_cgroup_process_info_free(struct cgroup_process_info *info);
extern void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info *info);
struct lxc_handler; struct lxc_handler;
extern void lxc_cgroup_destroy_desc(struct cgroup_desc *cgroups); extern char *lxc_cgroup_get_hierarchy_path_handler(const char *subsystem, struct lxc_handler *handler);
extern char *lxc_cgroup_path_get(const char *subsystem, const char *name, extern char *lxc_cgroup_get_hierarchy_path(const char *subsystem, const char *name, const char *lxcpath);
const char *lxcpath); extern char *lxc_cgroup_get_hierarchy_abs_path_handler(const char *subsystem, struct lxc_handler *handler);
extern int lxc_cgroup_nrtasks(struct lxc_handler *handler); extern char *lxc_cgroup_get_hierarchy_abs_path(const char *subsystem, const char *name, const char *lxcpath);
struct cgroup_desc *lxc_cgroup_path_create(const char *name); extern int lxc_cgroup_set_handler(const char *filename, const char *value, struct lxc_handler *handler);
extern int lxc_cgroup_enter(struct cgroup_desc *cgroups, pid_t pid); extern int lxc_cgroup_get_handler(const char *filename, char *value, size_t len, struct lxc_handler *handler);
extern int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath); extern int lxc_cgroup_set(const char *filename, const char *value, const char *name, const char *lxcpath);
extern char *cgroup_path_get(const char *subsystem, const char *cgpath); extern int lxc_cgroup_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath);
extern bool get_subsys_mount(char *dest, const char *subsystem);
extern bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d);
/* /*
* Called by commands.c by a container's monitor to find out the * lxc_cgroup_path_get: Get the absolute pathname for a cgroup
* container's cgroup path in a specific subsystem * file for a running container.
*
* @filename : the file of interest (e.g. "freezer.state") or
* the subsystem name (e.g. "freezer") in which case
* the directory where the cgroup may be modified
* will be returned
* @name : name of container to connect to
* @lxcpath : the lxcpath in which the container is running
*
* This is the exported function, which determines cgpath from the
* lxc-start of the @name container running in @lxcpath.
*
* Returns path on success, NULL on error. The caller must free()
* the returned path.
*/ */
extern char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys); extern char *lxc_cgroup_path_get(const char *subsystem, const char *name,
const char *lxcpath);
struct lxc_list; struct lxc_list;
extern int setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroups); extern int lxc_setup_cgroup_without_devices(struct lxc_handler *h, struct lxc_list *cgroup_settings);
extern int setup_cgroup_devices(struct lxc_handler *h, struct lxc_list *cgroups); extern int lxc_setup_cgroup_devices(struct lxc_handler *h, struct lxc_list *cgroup_settings);
extern int lxc_cgroup_nrtasks_handler(struct lxc_handler *handler);
#endif #endif
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include <lxc/conf.h> #include <lxc/conf.h>
#include <lxc/start.h> /* for struct lxc_handler */ #include <lxc/start.h> /* for struct lxc_handler */
#include <lxc/utils.h> #include <lxc/utils.h>
#include <lxc/cgroup.h>
#include "commands.h" #include "commands.h"
#include "console.h" #include "console.h"
...@@ -351,7 +352,6 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req, ...@@ -351,7 +352,6 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
return lxc_cmd_rsp_send(fd, &rsp); return lxc_cmd_rsp_send(fd, &rsp);
} }
extern char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys);
/* /*
* lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
* particular subsystem. This is the cgroup path relative to the root * particular subsystem. This is the cgroup path relative to the root
...@@ -404,7 +404,7 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, ...@@ -404,7 +404,7 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
if (req->datalen < 1) if (req->datalen < 1)
return -1; return -1;
path = cgroup_get_subsys_path(handler, req->data); path = lxc_cgroup_get_hierarchy_path_handler(req->data, handler);
if (!path) if (!path)
return -1; return -1;
rsp.datalen = strlen(path) + 1, rsp.datalen = strlen(path) + 1,
...@@ -560,7 +560,7 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req, ...@@ -560,7 +560,7 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req,
memset(&rsp, 0, sizeof(rsp)); memset(&rsp, 0, sizeof(rsp));
rsp.ret = kill(handler->pid, stopsignal); rsp.ret = kill(handler->pid, stopsignal);
if (!rsp.ret) { if (!rsp.ret) {
char *path = cgroup_get_subsys_path(handler, "freezer"); char *path = lxc_cgroup_get_hierarchy_path_handler("freezer", handler);
if (!path) { if (!path) {
ERROR("container %s:%s is not in a freezer cgroup", ERROR("container %s:%s is not in a freezer cgroup",
handler->lxcpath, handler->name); handler->lxcpath, handler->name);
......
...@@ -123,7 +123,7 @@ static int freeze_unfreeze(const char *name, int freeze, const char *lxcpath) ...@@ -123,7 +123,7 @@ static int freeze_unfreeze(const char *name, int freeze, const char *lxcpath)
char *cgabspath; char *cgabspath;
int ret; int ret;
cgabspath = lxc_cgroup_path_get("freezer", name, lxcpath); cgabspath = lxc_cgroup_get_hierarchy_abs_path("freezer", name, lxcpath);
if (!cgabspath) if (!cgabspath)
return -1; return -1;
...@@ -145,17 +145,14 @@ int lxc_unfreeze(const char *name, const char *lxcpath) ...@@ -145,17 +145,14 @@ int lxc_unfreeze(const char *name, const char *lxcpath)
int lxc_unfreeze_bypath(const char *cgrelpath) int lxc_unfreeze_bypath(const char *cgrelpath)
{ {
char cgabspath[MAXPATHLEN]; char *cgabspath;
int len, ret; int ret;
if (!get_subsys_mount(cgabspath, "freezer")) cgabspath = lxc_cgroup_find_abs_path("freezer", cgrelpath, true, NULL);
return -1; if (!cgabspath)
len = strlen(cgabspath);
ret = snprintf(cgabspath+len, MAXPATHLEN-len, "/%s", cgrelpath);
if (ret < 0 || ret >= MAXPATHLEN-len) {
ERROR("freezer path name too long");
return -1; return -1;
}
return do_unfreeze(cgabspath, 0, NULL, NULL); ret = do_unfreeze(cgabspath, 0, NULL, NULL);
free(cgabspath);
return ret;
} }
...@@ -141,37 +141,35 @@ struct lxc_handler; ...@@ -141,37 +141,35 @@ struct lxc_handler;
/* /*
* Set a specified value for a specified subsystem. The specified * Set a specified value for a specified subsystem. The specified
* subsystem must be fully specified, eg. "cpu.shares" * subsystem must be fully specified, eg. "cpu.shares"
* @d : the cgroup descriptor for the container
* @filename : the cgroup attribute filename * @filename : the cgroup attribute filename
* @value : the value to be set * @value : the value to be set
* @handler : the lxc_handler structure of the container
* Returns 0 on success, < 0 otherwise * Returns 0 on success, < 0 otherwise
*/ */
extern int lxc_cgroup_set_value(struct lxc_handler *hander, const char *filename, extern int lxc_cgroup_set_handler(const char *filename, const char *value, struct lxc_handler *handler);
const char *value);
/* /*
* Set a specified value for a specified subsystem. The specified * Set a specified value for a specified subsystem. The specified
* subsystem must be fully specified, eg. "cpu.shares" * subsystem must be fully specified, eg. "cpu.shares"
* @name : the name of the container
* @filename : the cgroup attribute filename * @filename : the cgroup attribute filename
* @value : the value to be set * @value : the value to be set
* @name : the name of the container
* @lxcpath : lxc config path for container * @lxcpath : lxc config path for container
* Returns 0 on success, < 0 otherwise * Returns 0 on success, < 0 otherwise
*/ */
extern int lxc_cgroup_set(const char *name, const char *filename, const char *value, const char *lxcpath); extern int lxc_cgroup_set(const char *filename, const char *value, const char *name, const char *lxcpath);
/* /*
* Get a specified value for a specified subsystem. The specified * Get a specified value for a specified subsystem. The specified
* subsystem must be fully specified, eg. "cpu.shares" * subsystem must be fully specified, eg. "cpu.shares"
* @name : the name of the container
* @filename : the cgroup attribute filename * @filename : the cgroup attribute filename
* @value : the value to be set * @value : the value to be set
* @len : the len of the value variable * @len : the len of the value variable
* @name : the name of the container
* @lxcpath : lxc config path for container * @lxcpath : lxc config path for container
* Returns the number of bytes read, < 0 on error * Returns the number of bytes read, < 0 on error
*/ */
extern int lxc_cgroup_get(const char *name, const char *filename, extern int lxc_cgroup_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath);
char *value, size_t len, const char *lxcpath);
/* /*
* Retrieve the error string associated with the error returned by * Retrieve the error string associated with the error returned by
......
...@@ -1673,7 +1673,7 @@ static bool lxcapi_set_cgroup_item(struct lxc_container *c, const char *subsys, ...@@ -1673,7 +1673,7 @@ static bool lxcapi_set_cgroup_item(struct lxc_container *c, const char *subsys,
if (container_disk_lock(c)) if (container_disk_lock(c))
return false; return false;
ret = lxc_cgroup_set(c->name, subsys, value, c->config_path); ret = lxc_cgroup_set(subsys, value, c->name, c->config_path);
container_disk_unlock(c); container_disk_unlock(c);
return ret == 0; return ret == 0;
...@@ -1692,7 +1692,7 @@ static int lxcapi_get_cgroup_item(struct lxc_container *c, const char *subsys, c ...@@ -1692,7 +1692,7 @@ static int lxcapi_get_cgroup_item(struct lxc_container *c, const char *subsys, c
if (container_disk_lock(c)) if (container_disk_lock(c))
return -1; return -1;
ret = lxc_cgroup_get(c->name, subsys, retv, inlen, c->config_path); ret = lxc_cgroup_get(subsys, retv, inlen, c->name, c->config_path);
container_disk_unlock(c); container_disk_unlock(c);
return ret; return ret;
......
...@@ -283,7 +283,7 @@ static int utmp_get_ntasks(struct lxc_handler *handler) ...@@ -283,7 +283,7 @@ static int utmp_get_ntasks(struct lxc_handler *handler)
{ {
int ntasks; int ntasks;
ntasks = lxc_cgroup_nrtasks(handler); ntasks = lxc_cgroup_nrtasks_handler(handler);
if (ntasks < 0) { if (ntasks < 0) {
ERROR("failed to get the number of tasks"); ERROR("failed to get the number of tasks");
......
...@@ -384,7 +384,7 @@ static void lxc_fini(const char *name, struct lxc_handler *handler) ...@@ -384,7 +384,7 @@ static void lxc_fini(const char *name, struct lxc_handler *handler)
handler->conf->maincmd_fd = -1; handler->conf->maincmd_fd = -1;
free(handler->name); free(handler->name);
if (handler->cgroup) { if (handler->cgroup) {
lxc_cgroup_destroy_desc(handler->cgroup); lxc_cgroup_process_info_free_and_remove(handler->cgroup);
handler->cgroup = NULL; handler->cgroup = NULL;
} }
free(handler); free(handler);
...@@ -603,11 +603,12 @@ int save_phys_nics(struct lxc_conf *conf) ...@@ -603,11 +603,12 @@ int save_phys_nics(struct lxc_conf *conf)
return 0; return 0;
} }
extern bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d);
int lxc_spawn(struct lxc_handler *handler) int lxc_spawn(struct lxc_handler *handler)
{ {
int failed_before_rename = 0; int failed_before_rename = 0;
const char *name = handler->name; const char *name = handler->name;
struct cgroup_meta_data *cgroup_meta = NULL;
const char *cgroup_pattern = NULL;
if (lxc_sync_init(handler)) if (lxc_sync_init(handler))
return -1; return -1;
...@@ -646,6 +647,22 @@ int lxc_spawn(struct lxc_handler *handler) ...@@ -646,6 +647,22 @@ int lxc_spawn(struct lxc_handler *handler)
goto out_abort; goto out_abort;
} }
cgroup_meta = lxc_cgroup_load_meta();
if (!cgroup_meta) {
ERROR("failed to detect cgroup metadata");
goto out_delete_net;
}
/* if we are running as root, use system cgroup pattern, otherwise
* just create a cgroup under the current one. But also fall back to
* that if for some reason reading the configuration fails and no
* default value is available
*/
if (getuid() == 0)
cgroup_pattern = lxc_global_config_value("cgroup.pattern");
if (!cgroup_pattern)
cgroup_pattern = "%n";
/* /*
* if the rootfs is not a blockdev, prevent the container from * if the rootfs is not a blockdev, prevent the container from
* marking it readonly. * marking it readonly.
...@@ -669,15 +686,17 @@ int lxc_spawn(struct lxc_handler *handler) ...@@ -669,15 +686,17 @@ int lxc_spawn(struct lxc_handler *handler)
if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE)) if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
failed_before_rename = 1; failed_before_rename = 1;
if ((handler->cgroup = lxc_cgroup_path_create(name)) == NULL) if ((handler->cgroup = lxc_cgroup_create(name, cgroup_pattern, cgroup_meta, NULL)) == NULL) {
ERROR("failed to create cgroups for '%s'", name);
goto out_delete_net; goto out_delete_net;
}
if (setup_cgroup(handler, &handler->conf->cgroup)) { if (lxc_setup_cgroup_without_devices(handler, &handler->conf->cgroup)) {
ERROR("failed to setup the cgroups for '%s'", name); ERROR("failed to setup the cgroups for '%s'", name);
goto out_delete_net; goto out_delete_net;
} }
if (lxc_cgroup_enter(handler->cgroup, handler->pid) < 0) if (lxc_cgroup_enter(handler->cgroup, handler->pid, false) < 0)
goto out_delete_net; goto out_delete_net;
if (failed_before_rename) if (failed_before_rename)
...@@ -707,7 +726,7 @@ int lxc_spawn(struct lxc_handler *handler) ...@@ -707,7 +726,7 @@ int lxc_spawn(struct lxc_handler *handler)
if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE)) if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE))
goto out_delete_net; goto out_delete_net;
if (setup_cgroup_devices(handler, &handler->conf->cgroup)) { if (lxc_setup_cgroup_devices(handler, &handler->conf->cgroup)) {
ERROR("failed to setup the devices cgroup for '%s'", name); ERROR("failed to setup the devices cgroup for '%s'", name);
goto out_delete_net; goto out_delete_net;
} }
...@@ -739,6 +758,7 @@ int lxc_spawn(struct lxc_handler *handler) ...@@ -739,6 +758,7 @@ int lxc_spawn(struct lxc_handler *handler)
goto out_abort; goto out_abort;
} }
lxc_cgroup_put_meta(cgroup_meta);
lxc_sync_fini(handler); lxc_sync_fini(handler);
return 0; return 0;
...@@ -747,6 +767,7 @@ out_delete_net: ...@@ -747,6 +767,7 @@ out_delete_net:
if (handler->clone_flags & CLONE_NEWNET) if (handler->clone_flags & CLONE_NEWNET)
lxc_delete_network(handler); lxc_delete_network(handler);
out_abort: out_abort:
lxc_cgroup_put_meta(cgroup_meta);
lxc_abort(name, handler); lxc_abort(name, handler);
lxc_sync_fini(handler); lxc_sync_fini(handler);
if (handler->pinfd >= 0) { if (handler->pinfd >= 0) {
......
...@@ -55,7 +55,7 @@ struct lxc_handler { ...@@ -55,7 +55,7 @@ struct lxc_handler {
#endif #endif
int pinfd; int pinfd;
const char *lxcpath; const char *lxcpath;
struct cgroup_desc *cgroup; struct cgroup_process_info *cgroup;
}; };
extern struct lxc_handler *lxc_init(const char *name, struct lxc_conf *, const char *); extern struct lxc_handler *lxc_init(const char *name, struct lxc_conf *, const char *);
......
...@@ -75,7 +75,7 @@ static lxc_state_t freezer_state(const char *name, const char *lxcpath) ...@@ -75,7 +75,7 @@ static lxc_state_t freezer_state(const char *name, const char *lxcpath)
FILE *file; FILE *file;
int ret; int ret;
cgabspath = lxc_cgroup_path_get("freezer", name, lxcpath); cgabspath = lxc_cgroup_get_hierarchy_abs_path("freezer", name, lxcpath);
if (!cgabspath) if (!cgabspath)
return -1; return -1;
......
...@@ -233,6 +233,7 @@ const char *lxc_global_config_value(const char *option_name) ...@@ -233,6 +233,7 @@ const char *lxc_global_config_value(const char *option_name)
{ "zfsroot", DEFAULT_ZFSROOT }, { "zfsroot", DEFAULT_ZFSROOT },
{ "lxcpath", LXCPATH }, { "lxcpath", LXCPATH },
{ "cgroup.pattern", DEFAULT_CGROUP_PATTERN }, { "cgroup.pattern", DEFAULT_CGROUP_PATTERN },
{ "cgroup.use", NULL },
{ NULL, NULL }, { NULL, NULL },
}; };
static const char *values[sizeof(options) / sizeof(options[0])] = { 0 }; static const char *values[sizeof(options) / sizeof(options[0])] = { 0 };
......
...@@ -75,21 +75,21 @@ static int test_running_container(const char *lxcpath, ...@@ -75,21 +75,21 @@ static int test_running_container(const char *lxcpath,
} }
/* test get/set value using memory.swappiness file */ /* test get/set value using memory.swappiness file */
ret = lxc_cgroup_get(c->name, "memory.swappiness", value, ret = lxc_cgroup_get("memory.swappiness", value, sizeof(value),
sizeof(value), c->config_path); c->name, c->config_path);
if (ret < 0) { if (ret < 0) {
TSTERR("lxc_cgroup_get failed"); TSTERR("lxc_cgroup_get failed");
goto err3; goto err3;
} }
strcpy(value_save, value); strcpy(value_save, value);
ret = lxc_cgroup_set(c->name, "memory.swappiness", "100", c->config_path); ret = lxc_cgroup_set("memory.swappiness", "100", c->name, c->config_path);
if (ret < 0) { if (ret < 0) {
TSTERR("lxc_cgroup_set_bypath failed"); TSTERR("lxc_cgroup_set_bypath failed");
goto err3; goto err3;
} }
ret = lxc_cgroup_get(c->name, "memory.swappiness", value, ret = lxc_cgroup_get("memory.swappiness", value, sizeof(value),
sizeof(value), c->config_path); c->name, c->config_path);
if (ret < 0) { if (ret < 0) {
TSTERR("lxc_cgroup_get failed"); TSTERR("lxc_cgroup_get failed");
goto err3; goto err3;
...@@ -100,14 +100,14 @@ static int test_running_container(const char *lxcpath, ...@@ -100,14 +100,14 @@ static int test_running_container(const char *lxcpath,
} }
/* restore original value */ /* restore original value */
ret = lxc_cgroup_set(c->name, "memory.swappiness", value_save, ret = lxc_cgroup_set("memory.swappiness", value_save,
c->config_path); c->name, c->config_path);
if (ret < 0) { if (ret < 0) {
TSTERR("lxc_cgroup_set failed"); TSTERR("lxc_cgroup_set failed");
goto err3; goto err3;
} }
ret = lxc_cgroup_get(c->name, "memory.swappiness", value, ret = lxc_cgroup_get("memory.swappiness", value, sizeof(value),
sizeof(value), c->config_path); c->name, c->config_path);
if (ret < 0) { if (ret < 0) {
TSTERR("lxc_cgroup_get failed"); TSTERR("lxc_cgroup_get failed");
goto err3; goto err3;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment