Commit b98f7d6e by Serge Hallyn

cgroups: rework to handle nested containers with multiple and partial mounts

Currently, if you create a container and use the mountcgruop hook, you get the /lxc/c1/c1.real cgroup mounted to /. If you then try to start containers inside that container, lxc can get confused. This patch addresses that, by accepting that the cgroup as found in /proc/self/cgroup can be partially hidden by bind mounts. In this patch: Add optional 'lxc.cgroup.use' to /etc/lxc/lxc.conf to specify which mounted cgroup filesystems lxc should use. So far only the cgroup creation respects this. Keep separate cgroup information for each cgroup mountpoint. So if the caller is in devices cgroup /a but cpuset cgroup /b that should now be ok. Change how we decide whether to ignore failure to set devices cgroup settings. Actually look to see if our current cgroup already has the settings. If not, add them. Finally, the real reason for this patch: in a nested container, /proc/self/cgroup says nothing about where under /sys/fs/cgroup you might find yourself. Handle this by searching for our pid in tasks files, and keep that info in the cgroup handler. Also remove all strdupa from cgroup.c (not android-friendly). Signed-off-by: 's avatarSerge Hallyn <serge.hallyn@ubuntu.com>
parent 070a4b8e
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <string.h> #include <string.h>
#include <dirent.h> #include <dirent.h>
#include <fcntl.h> #include <fcntl.h>
#include <ctype.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/param.h> #include <sys/param.h>
...@@ -39,6 +40,8 @@ ...@@ -39,6 +40,8 @@
#include "error.h" #include "error.h"
#include "config.h" #include "config.h"
#include "commands.h" #include "commands.h"
#include "list.h"
#include "conf.h"
#include <lxc/log.h> #include <lxc/log.h>
#include <lxc/cgroup.h> #include <lxc/cgroup.h>
...@@ -96,19 +99,18 @@ static char *mount_has_subsystem(const struct mntent *mntent) ...@@ -96,19 +99,18 @@ static char *mount_has_subsystem(const struct mntent *mntent)
/* /*
* Determine mountpoint for a cgroup subsystem. * Determine mountpoint for a cgroup subsystem.
* @subsystem: cgroup subsystem (i.e. freezer). If this is NULL, the first * @dest: a passed-in buffer of at least size MAXPATHLEN into which the path
* cgroup mountpoint with any subsystems is used.
* @mnt: a passed-in buffer of at least size MAXPATHLEN into which the path
* is copied. * is copied.
* @subsystem: cgroup subsystem (i.e. freezer)
* *
* Returns 0 on success, -1 on error. * Returns true on success, false on error.
*/ */
static int get_cgroup_mount(const char *subsystem, char *mnt) bool get_subsys_mount(char *dest, const char *subsystem)
{ {
struct mntent mntent_r; struct mntent mntent_r;
FILE *file = NULL; FILE *file = NULL;
int ret, err = -1; int ret;
bool retv = false;
char buf[LARGE_MAXPATHLEN] = {0}; char buf[LARGE_MAXPATHLEN] = {0};
file = setmntent(MTAB, "r"); file = setmntent(MTAB, "r");
...@@ -118,7 +120,7 @@ static int get_cgroup_mount(const char *subsystem, char *mnt) ...@@ -118,7 +120,7 @@ static int get_cgroup_mount(const char *subsystem, char *mnt)
} }
while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) { while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) {
if (strcmp(mntent_r.mnt_type, "cgroup") != 0) if (strcmp(mntent_r.mnt_type, "cgroup"))
continue; continue;
if (subsystem) { if (subsystem) {
...@@ -129,12 +131,11 @@ static int get_cgroup_mount(const char *subsystem, char *mnt) ...@@ -129,12 +131,11 @@ static int get_cgroup_mount(const char *subsystem, char *mnt)
continue; continue;
} }
ret = snprintf(mnt, MAXPATHLEN, "%s", mntent_r.mnt_dir); ret = snprintf(dest, MAXPATHLEN, "%s", mntent_r.mnt_dir);
if (ret < 0 || ret >= MAXPATHLEN) if (ret < 0 || ret >= MAXPATHLEN)
goto fail; goto fail;
DEBUG("using cgroup mounted at '%s'", mnt); retv = true;
err = 0;
goto out; goto out;
}; };
...@@ -143,78 +144,36 @@ fail: ...@@ -143,78 +144,36 @@ fail:
subsystem ? subsystem : "(NULL)"); subsystem ? subsystem : "(NULL)");
out: out:
endmntent(file); endmntent(file);
return err; return retv;
} }
/* /*
* cgroup_path_get: Get the absolute path to a particular subsystem, * is_in_cgroup: check whether pid is found in the passed-in cgroup tasks
* plus a passed-in (to be appended) relative cgpath for a container. * file.
* * @path: in full path to a cgroup tasks file
* @subsystem : subsystem of interest (e.g. "freezer") * Note that in most cases the file will simply not exist, which is ok - it
* @cgrelpath : a container's relative cgroup path (e.g. "lxc/c1") * just means that's not our cgroup.
*
* Returns absolute path on success, NULL on error. The caller must free()
* the returned path.
*
* Note that @subsystem may be the name of an item (e.g. "freezer.state")
* in which case the subsystem will be determined by taking the string up
* to the first '.'
*/ */
char *cgroup_path_get(const char *subsystem, const char *cgrelpath) static bool is_in_cgroup(pid_t pid, char *path)
{ {
int rc; int cmppid;
FILE *f = fopen(path, "r");
char *buf = NULL; char *line = NULL;
char *cgabspath = NULL; size_t sz = 0;
buf = malloc(MAXPATHLEN * sizeof(char));
if (!buf) {
ERROR("malloc failed");
goto out1;
}
cgabspath = malloc(MAXPATHLEN * sizeof(char));
if (!cgabspath) {
ERROR("malloc failed");
goto out2;
}
/* lxc_cgroup_set passes a state object for the subsystem, if (!f)
* so trim it to just the subsystem part */ return false;
if (subsystem) { while (getline(&line, &sz, f) != -1) {
rc = snprintf(cgabspath, MAXPATHLEN, "%s", subsystem); if (sscanf(line, "%d", &cmppid) == 1 && cmppid == pid) {
if (rc < 0 || rc >= MAXPATHLEN) { fclose(f);
ERROR("subsystem name too long"); free(line);
goto err3; return true;
}
char *s = index(cgabspath, '.');
if (s)
*s = '\0';
DEBUG("%s: called for subsys %s name %s\n", __func__,
subsystem, cgrelpath);
}
if (get_cgroup_mount(subsystem ? cgabspath : NULL, buf)) {
ERROR("cgroup is not mounted");
goto err3;
} }
rc = snprintf(cgabspath, MAXPATHLEN, "%s/%s", buf, cgrelpath);
if (rc < 0 || rc >= MAXPATHLEN) {
ERROR("name too long");
goto err3;
} }
fclose(f);
DEBUG("%s: returning %s for subsystem %s relpath %s", __func__, if (line)
cgabspath, subsystem, cgrelpath); free(line);
goto out2; return false;
err3:
free(cgabspath);
cgabspath = NULL;
out2:
free(buf);
out1:
return cgabspath;
} }
/* /*
...@@ -235,16 +194,64 @@ out1: ...@@ -235,16 +194,64 @@ out1:
char *lxc_cgroup_path_get(const char *subsystem, const char *name, char *lxc_cgroup_path_get(const char *subsystem, const char *name,
const char *lxcpath) const char *lxcpath)
{ {
char *cgabspath; char *cgpath, *cgp, path[MAXPATHLEN], *pathp, *p;
char *cgrelpath; pid_t initpid = lxc_cmd_get_init_pid(name, lxcpath);
int ret;
cgrelpath = lxc_cmd_get_cgroup_path(name, lxcpath); if (initpid < 0) {
if (!cgrelpath) ERROR("Error getting init pid for container %s:%s",
lxcpath, name);
return NULL; return NULL;
}
cgabspath = cgroup_path_get(subsystem, cgrelpath); cgpath = lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
free(cgrelpath); if (!cgpath)
return cgabspath; return NULL;
if (!get_subsys_mount(path, subsystem))
return NULL;
pathp = path + strlen(path);
/*
* find a mntpt where i have the subsystem mounted, then find
* a subset cgpath under that which has pid in it.
*
* If d->mntpt is '/a/b/c/d', and the mountpoint is /x/y/z,
* then look for ourselves in:
* /x/y/z/a/b/c/d/tasks
* /x/y/z/b/c/d/tasks
* /x/y/z/c/d/tasks
* /x/y/z/d/tasks
* /x/y/z/tasks
*/
cgp = cgpath;
while (cgp[0]) {
ret = snprintf(pathp, MAXPATHLEN - (pathp - path), "%s/tasks", cgp);
if (ret < 0 || ret >= MAXPATHLEN)
return NULL;
if (!is_in_cgroup(initpid, path)) {
// does not exist, try the next one
cgp = index(cgp+1, '/');
if (!cgp)
break;
continue;
}
break;
}
if (!cgp || !*cgp) {
// try just the path
ret = snprintf(pathp, MAXPATHLEN - (pathp - path), "/tasks");
if (ret < 0 || ret >= MAXPATHLEN)
return NULL;
if (!is_in_cgroup(initpid, path)) {
return NULL;
}
return strdup("/");
}
// path still has 'tasks' on the end, drop it
if ((p = rindex(path, '/')) != NULL)
*p = '\0';
return strdup(path);
} }
/* /*
...@@ -277,6 +284,91 @@ static int do_cgroup_set(const char *path, const char *value) ...@@ -277,6 +284,91 @@ static int do_cgroup_set(const char *path, const char *value)
return 0; return 0;
} }
static bool cgroup_devices_has_deny(struct lxc_handler *h, char *v)
{
char *cgabspath, path[MAXPATHLEN];
FILE *f;
char *line = NULL;
size_t len = 0;
bool ret = true;
// XXX FIXME if users could use something other than 'lxc.devices.deny = a'.
// not sure they ever do, but they *could*
// right now, I'm assuming they do NOT
if (strcmp(v, "a") && strcmp(v, "a *:* rwm"))
return false;
cgabspath = cgroup_get_subsys_path(h, "devices");
if (!cgabspath)
return -1;
ret = snprintf(path, MAXPATHLEN, "%s/devices.list", cgabspath);
if (ret < 0 || ret >= MAXPATHLEN) {
ERROR("pathname too long for devices.list");
return -1;
}
if (!(f = fopen(path, "r"))) {
SYSERROR("Error opening devices.list");
return -1;
}
while (getline(&line, &len, f) != -1) {
size_t len = strlen(line);
if (len > 0 && line[len-1] == '\n')
line[len-1] = '\0';
if (strcmp(line, "a *:* rwm") == 0) {
ret = false;
goto out;
}
}
out:
fclose(f);
if (line)
free(line);
return ret;
}
static bool cgroup_devices_has_allow(struct lxc_handler *h, char *v)
{
char *cgabspath, path[MAXPATHLEN];
bool ret = false;
FILE *f;
char *line = NULL;
size_t len = 0;
cgabspath = cgroup_get_subsys_path(h, "devices");
if (!cgabspath)
return -1;
ret = snprintf(path, MAXPATHLEN, "%s/devices.list", cgabspath);
if (ret < 0 || ret >= MAXPATHLEN) {
ERROR("pathname too long to for devices.list");
return -1;
}
if (!(f = fopen(path, "r"))) {
SYSERROR("Error opening devices.list");
return -1;
}
while (getline(&line, &len, f) != -1) {
size_t len = strlen(line);
if (len > 0 && line[len-1] == '\n')
line[len-1] = '\0';
if (strcmp(line, v) == 0) {
ret = true;
goto out;
}
}
out:
if (line)
free(line);
fclose(f);
return ret;
}
/* /*
* lxc_cgroup_set_bypath: Write a value into a cgroup file * lxc_cgroup_set_bypath: Write a value into a cgroup file
* *
...@@ -286,28 +378,29 @@ static int do_cgroup_set(const char *path, const char *value) ...@@ -286,28 +378,29 @@ static int do_cgroup_set(const char *path, const char *value)
* *
* Returns 0 on success, < 0 on error. * Returns 0 on success, < 0 on error.
*/ */
int lxc_cgroup_set_bypath(const char *cgrelpath, const char *filename, const char *value) int lxc_cgroup_set_value(struct lxc_handler *handler, const char *filename,
const char *value)
{ {
char *cgabspath, path[MAXPATHLEN], *p;
int ret; int ret;
char *cgabspath;
char path[MAXPATHLEN];
cgabspath = cgroup_path_get(filename, cgrelpath); ret = snprintf(path, MAXPATHLEN, "%s", filename);
if (ret < 0 || ret >= MAXPATHLEN)
return -1;
if ((p = index(path, '.')) != NULL)
*p = '\0';
cgabspath = cgroup_get_subsys_path(handler, path);
if (!cgabspath) if (!cgabspath)
return -1; return -1;
ret = snprintf(path, MAXPATHLEN, "%s/%s", cgabspath, filename); ret = snprintf(path, MAXPATHLEN, "%s/%s", cgabspath, filename);
if (ret < 0 || ret >= MAXPATHLEN) { if (ret < 0 || ret >= MAXPATHLEN) {
ERROR("pathname too long"); ERROR("pathname too long to set cgroup value %s to %s",
ret = -1; filename, value);
goto out; return -1;
} }
ret = do_cgroup_set(path, value); return do_cgroup_set(path, value);
out:
free(cgabspath);
return ret;
} }
/* /*
...@@ -326,8 +419,13 @@ int lxc_cgroup_set(const char *name, const char *filename, const char *value, ...@@ -326,8 +419,13 @@ int lxc_cgroup_set(const char *name, const char *filename, const char *value,
int ret; int ret;
char *cgabspath; char *cgabspath;
char path[MAXPATHLEN]; char path[MAXPATHLEN];
char *subsystem = alloca(strlen(filename)+1), *p;
strcpy(subsystem, filename);
cgabspath = lxc_cgroup_path_get(filename, name, lxcpath); if ((p = index(subsystem, '.')) != NULL)
*p = '\0';
cgabspath = lxc_cgroup_path_get(subsystem, name, lxcpath);
if (!cgabspath) if (!cgabspath)
return -1; return -1;
...@@ -370,8 +468,14 @@ int lxc_cgroup_get(const char *name, const char *filename, char *value, ...@@ -370,8 +468,14 @@ int lxc_cgroup_get(const char *name, const char *filename, char *value,
int fd, ret; int fd, ret;
char *cgabspath; char *cgabspath;
char path[MAXPATHLEN]; char path[MAXPATHLEN];
char *subsystem = alloca(strlen(filename)+1), *p;
strcpy(subsystem, filename);
cgabspath = lxc_cgroup_path_get(filename, name, lxcpath); if ((p = index(subsystem, '.')) != NULL)
*p = '\0';
cgabspath = lxc_cgroup_path_get(subsystem, name, lxcpath);
if (!cgabspath) if (!cgabspath)
return -1; return -1;
...@@ -410,29 +514,27 @@ out: ...@@ -410,29 +514,27 @@ out:
return ret; return ret;
} }
int lxc_cgroup_nrtasks(const char *cgrelpath) int lxc_cgroup_nrtasks(struct lxc_handler *handler)
{ {
char *cgabspath = NULL;
char path[MAXPATHLEN]; char path[MAXPATHLEN];
int pid, ret; int pid, ret;
FILE *file; FILE *file;
cgabspath = cgroup_path_get(NULL, cgrelpath); if (!handler->cgroup)
if (!cgabspath)
return -1; return -1;
ret = snprintf(path, MAXPATHLEN, "%s/tasks", cgabspath); /* XXX Should we use a specific subsystem rather than the first one we
* found (handler->cgroup->curcgroup)? */
ret = snprintf(path, MAXPATHLEN, "%s/tasks", handler->cgroup->curcgroup);
if (ret < 0 || ret >= MAXPATHLEN) { if (ret < 0 || ret >= MAXPATHLEN) {
ERROR("pathname too long"); ERROR("pathname too long");
ret = -1; return -1;
goto out;
} }
file = fopen(path, "r"); file = fopen(path, "r");
if (!file) { if (!file) {
SYSERROR("fopen '%s' failed", path); SYSERROR("fopen '%s' failed", path);
ret = -1; return -1;
goto out;
} }
ret = 0; ret = 0;
...@@ -440,28 +542,35 @@ int lxc_cgroup_nrtasks(const char *cgrelpath) ...@@ -440,28 +542,35 @@ int lxc_cgroup_nrtasks(const char *cgrelpath)
ret++; ret++;
fclose(file); fclose(file);
out:
free(cgabspath);
return ret; return ret;
} }
/* static int in_subsys_list(const char *s, const char *list)
* If first creating the /sys/fs/cgroup/$subsys/lxc container, then {
* try to set clone_children to 1. Some kernels don't support char *token, *str, *saveptr = NULL;
* clone_children, and cgroup maintainer wants to deprecate it. So
* XXX TODO we should instead after each cgroup mkdir (here and in if (!list || !s)
* hooks/mountcgroup) check if cpuset is in the subsystems, and if so return 0;
* manually copy over mems and cpus.
*/ str = alloca(strlen(list)+1);
static void set_clone_children(const char *mntdir) strcpy(str, list);
for (; (token = strtok_r(str, ",", &saveptr)); str = NULL) {
if (strcmp(s, token) == 0)
return 1;
}
return 0;
}
static void set_clone_children(struct mntent *m)
{ {
char path[MAXPATHLEN]; char path[MAXPATHLEN];
FILE *fout; FILE *fout;
int ret; int ret;
ret = snprintf(path, MAXPATHLEN, "%s/cgroup.clone_children", mntdir); if (!in_subsys_list("cpuset", m->mnt_opts))
INFO("writing to %s\n", path); return;
ret = snprintf(path, MAXPATHLEN, "%s/cgroup.clone_children", m->mnt_dir);
if (ret < 0 || ret > MAXPATHLEN) if (ret < 0 || ret > MAXPATHLEN)
return; return;
fout = fopen(path, "w"); fout = fopen(path, "w");
...@@ -471,7 +580,59 @@ static void set_clone_children(const char *mntdir) ...@@ -471,7 +580,59 @@ static void set_clone_children(const char *mntdir)
fclose(fout); fclose(fout);
} }
static char *get_all_cgroups(void) static bool have_visited(char *opts, char *visited, char *all_subsystems)
{
char *str, *s = NULL, *token;
str = alloca(strlen(opts)+1);
strcpy(str, opts);
for (; (token = strtok_r(str, ",", &s)); str = NULL) {
if (!in_subsys_list(token, all_subsystems))
continue;
if (visited && in_subsys_list(token, visited))
return true;
}
return false;
}
static bool is_in_desclist(struct cgroup_desc *d, char *opts, char *all_subsystems)
{
while (d) {
if (have_visited(opts, d->subsystems, all_subsystems))
return true;
d = d->next;
}
return false;
}
static char *record_visited(char *opts, char *all_subsystems)
{
char *s = NULL, *token, *str;
int oldlen = 0, newlen, toklen;
char *visited = NULL;
str = alloca(strlen(opts)+1);
strcpy(str, opts);
for (; (token = strtok_r(str, ",", &s)); str = NULL) {
if (!in_subsys_list(token, all_subsystems))
continue;
toklen = strlen(token);
newlen = oldlen + toklen + 1; // ',' + token or token + '\0'
visited = realloc(visited, newlen);
if (!visited)
return (char *)-ENOMEM;
if (oldlen)
strcat(visited, ",");
else
*visited = '\0';
strcat(visited, token);
}
return visited;
}
static char *get_all_subsystems(void)
{ {
FILE *f; FILE *f;
char *line = NULL, *ret = NULL; char *line = NULL, *ret = NULL;
...@@ -518,112 +679,217 @@ out: ...@@ -518,112 +679,217 @@ out:
return ret; return ret;
} }
static int in_cgroup_list(char *s, char *list) /*
* /etc/lxc/lxc.conf can contain lxc.cgroup.use = entries.
* If any of those are present, then lxc will ONLY consider
* cgroup filesystems mounted at one of the listed entries.
*/
static char *get_cgroup_uselist()
{ {
char *token, *str, *saveptr = NULL; FILE *f;
char *line = NULL, *ret = NULL;
if (!list || !s) size_t sz = 0, retsz = 0, newsz;
return 0;
for (str = strdupa(list); (token = strtok_r(str, ",", &saveptr)); str = NULL) { if ((f = fopen(LXC_GLOBAL_CONF, "r")) == NULL)
if (strcmp(s, token) == 0) return NULL;
return 1; while (getline(&line, &sz, f) != -1) {
char *p = line;
while (*p && isblank(*p))
p++;
if (strncmp(p, "lxc.cgroup.use", 14) != 0)
continue;
p = index(p, '=');
if (!p)
continue;
p++;
while (*p && isblank(*p))
p++;
if (strlen(p) < 1)
continue;
newsz = retsz + strlen(p);
if (retsz == 0)
newsz += 1; // for trailing \0
// the last line in the file could lack \n
if (p[strlen(p)-1] != '\n')
newsz += 1;
ret = realloc(ret, newsz);
if (!ret) {
ERROR("Out of memory reading cgroup uselist");
fclose(f);
free(line);
return (char *)-ENOMEM;
}
if (retsz == 0)
strcpy(ret, p);
else
strcat(ret, p);
if (p[strlen(p)-1] != '\n')
ret[newsz-2] = '\0';
ret[newsz-1] = '\0';
retsz = newsz;
} }
return 0; if (line)
free(line);
return ret;
} }
static int have_visited(char *opts, char *visited, char *allcgroups) static bool is_in_uselist(char *uselist, struct mntent *m)
{ {
char *str, *s = NULL, *token; char *p;
if (!uselist)
for (str = strdupa(opts); (token = strtok_r(str, ",", &s)); str = NULL) { return true;
if (!in_cgroup_list(token, allcgroups)) if (!*uselist)
continue; return false;
if (visited && in_cgroup_list(token, visited)) while (*uselist) {
return 1; p = index(uselist, '\n');
if (strncmp(m->mnt_dir, uselist, p - uselist) == 0)
return true;
uselist = p+1;
} }
return false;
return 0;
} }
static int record_visited(char *opts, char **visitedp, char *allcgroups) static bool find_real_cgroup(struct cgroup_desc *d, char *path)
{ {
char *s = NULL, *token, *str; FILE *f;
int oldlen, newlen, ret; char *line = NULL, *p, *p2;
int ret = 0;
size_t len;
for (str = strdupa(opts); (token = strtok_r(str, ",", &s)); str = NULL) { if ((f = fopen("/proc/self/cgroup", "r")) == NULL) {
if (!in_cgroup_list(token, allcgroups)) SYSERROR("Error opening /proc/self/cgroups");
return false;
}
// If there is no subsystem, ignore the mount. Note we may want
// to change this, so that unprivileged users can use a unbound
// cgroup mount to arrange their container tasks.
if (!d->subsystems) {
fclose(f);
return false;
}
while (getline(&line, &len, f) != -1) {
if (!(p = index(line, ':')))
continue; continue;
if (*visitedp && in_cgroup_list(token, *visitedp)) if (!(p2 = index(++p, ':')))
continue; continue;
oldlen = (*visitedp) ? strlen(*visitedp) : 0; *p2 = '\0';
newlen = oldlen + strlen(token) + 2; // in case of multiple mounts it may be more correct to
(*visitedp) = realloc(*visitedp, newlen); // insist all subsystems be the same
if (!(*visitedp)) if (in_subsys_list(p, d->subsystems))
return -1; goto found;
ret = snprintf((*visitedp)+oldlen, newlen, ",%s", token);
if (ret < 0 || ret >= newlen)
return -1;
} }
return 0; if (line)
free(line);
fclose(f);
return false;;
found:
fclose(f);
ret = snprintf(path, MAXPATHLEN, "%s", p2+1);
if (ret < 0 || ret >= MAXPATHLEN) {
free(line);
return false;
}
free(line);
return true;
} }
/* /*
* Make sure the 'cgroup group' exists, so that we don't have to worry about * for a given cgroup mount entry, and a to-be-created container,
* that later. * 1. Figure out full path of the cgroup we are currently in,
* * 2. Find a new free cgroup which is $path / $lxc_name with an
* @lxcgroup: the cgroup group, i.e. 'lxc' by default. * optional '-$n' where n is an ever-increasing integer.
*
* See detailed comments at lxc_cgroup_path_create for more information.
*
* Returns 0 on success, -1 on error.
*/ */
static int create_lxcgroups(const char *lxcgroup) static char *find_free_cgroup(struct cgroup_desc *d, const char *lxc_name)
{ {
FILE *file = NULL; char tail[20], cgpath[MAXPATHLEN], *cgp, path[MAXPATHLEN];
struct mntent mntent_r; int i = 0, ret;
int ret, retv = -1; size_t l;
char path[MAXPATHLEN];
char buf[LARGE_MAXPATHLEN] = {0};
file = setmntent(MTAB, "r"); if (!find_real_cgroup(d, cgpath)) {
if (!file) { ERROR("Failed to find current cgroup");
SYSERROR("failed to open %s", MTAB); return NULL;
return -1;
} }
while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) {
if (strcmp(mntent_r.mnt_type, "cgroup"))
continue;
if (!mount_has_subsystem(&mntent_r))
continue;
/* /*
* TODO - handle case where lxcgroup has subdirs? (i.e. build/l1) * If d->mntpt is '/a/b/c/d', and the mountpoint is /x/y/z,
* We probably only want to support that for /users/joe * then look for ourselves in:
* /x/y/z/a/b/c/d/tasks
* /x/y/z/b/c/d/tasks
* /x/y/z/c/d/tasks
* /x/y/z/d/tasks
* /x/y/z/tasks
*/ */
ret = snprintf(path, MAXPATHLEN, "%s/%s", mntent_r.mnt_dir, lxcgroup); cgp = cgpath;
while (cgp[0]) {
ret = snprintf(path, MAXPATHLEN, "%s%s/tasks", d->mntpt, cgp);
if (ret < 0 || ret >= MAXPATHLEN) if (ret < 0 || ret >= MAXPATHLEN)
goto fail; return NULL;
if (access(path, F_OK)) { if (!is_in_cgroup(getpid(), path)) {
set_clone_children(mntent_r.mnt_dir); // does not exist, try the next one
ret = mkdir(path, 0755); cgp = index(cgp+1, '/');
if (ret == -1 && errno != EEXIST) { if (!cgp)
SYSERROR("failed to create '%s' directory", path); break;
goto fail; continue;
} }
break;
} }
if (!cgp || !*cgp) {
// try just the path
ret = snprintf(path, MAXPATHLEN, "%s/tasks", d->mntpt);
if (ret < 0 || ret >= MAXPATHLEN)
return NULL;
if (!is_in_cgroup(getpid(), path))
return NULL;
}
// found it
// path has '/tasks' at end, drop that
if (!(cgp = rindex(path, '/'))) {
ERROR("Got nonsensical path name %s\n", path);
return NULL;
}
*cgp = '\0';
if (strlen(path) + strlen(lxc_name) + 20 > MAXPATHLEN) {
ERROR("Error: cgroup path too long");
return NULL;
}
tail[0] = '\0';
while (1) {
struct stat sb;
int freebytes = MAXPATHLEN - (cgp - path);
if (i) {
ret = snprintf(tail, 20, "-%d", i);
if (ret < 0 || ret >= 20)
return NULL;
}
ret = snprintf(cgp, freebytes, "/%s%s", lxc_name, tail);
if (ret < 0 || ret >= freebytes)
return NULL;
if (stat(path, &sb) == -1)
break;
i++;
} }
retv = 0; l = strlen(cgpath);
fail: ret = snprintf(cgpath + l, MAXPATHLEN - l, "/%s%s", lxc_name, tail);
endmntent(file); if (ret < 0 || ret >= (MAXPATHLEN - l)) {
return retv; ERROR("Out of memory");
return NULL;
}
if ((d->realcgroup = strdup(cgpath)) == NULL) {
ERROR("Out of memory");
return NULL;
}
l = strlen(d->realcgroup);
if (l > 0 && d->realcgroup[l-1] == '\n')
d->realcgroup[l-1] = '\0';
return strdup(path);
} }
/* /*
...@@ -646,152 +912,163 @@ fail: ...@@ -646,152 +912,163 @@ fail:
* is at /sys/fs/cgroup/freezer, and this fn returns '/lxc/r1', then the * is at /sys/fs/cgroup/freezer, and this fn returns '/lxc/r1', then the
* freezer cgroup's full path will be /sys/fs/cgroup/freezer/lxc/r1/. * freezer cgroup's full path will be /sys/fs/cgroup/freezer/lxc/r1/.
* *
* XXX This should probably be locked globally
*
* Races won't be determintal, you'll just end up with leftover unused cgroups * Races won't be determintal, you'll just end up with leftover unused cgroups
*/ */
char *lxc_cgroup_path_create(const char *lxcgroup, const char *name) struct cgroup_desc *lxc_cgroup_path_create(const char *name)
{ {
int i = 0, ret; struct cgroup_desc *retdesc = NULL, *newdesc = NULL;
char *retpath, path[MAXPATHLEN];
char tail[12];
FILE *file = NULL; FILE *file = NULL;
struct mntent mntent_r; struct mntent mntent_r;
char *allcgroups = get_all_cgroups();
char *visited = NULL;
char buf[LARGE_MAXPATHLEN] = {0}; char buf[LARGE_MAXPATHLEN] = {0};
char *all_subsystems = get_all_subsystems();
char *cgroup_uselist = get_cgroup_uselist();
if (!lxcgroup || strlen(lxcgroup) == 0 || strcmp(lxcgroup, "/") == 0) if (cgroup_uselist == (char *)-ENOMEM) {
lxcgroup = "lxc"; if (all_subsystems)
if (!allcgroups) free(all_subsystems);
return NULL;
}
if (!all_subsystems) {
ERROR("failed to get a list of all cgroup subsystems");
if (cgroup_uselist)
free(cgroup_uselist);
return NULL; return NULL;
if (create_lxcgroups(lxcgroup) < 0)
goto err1;
again:
if (visited) {
/* we're checking for a new name, so start over with all cgroup
* mounts */
free(visited);
visited = NULL;
} }
file = setmntent(MTAB, "r"); file = setmntent(MTAB, "r");
if (!file) { if (!file) {
SYSERROR("failed to open %s", MTAB); SYSERROR("failed to open %s", MTAB);
goto err1; free(all_subsystems);
if (cgroup_uselist)
free(cgroup_uselist);
return NULL;
} }
if (i)
snprintf(tail, 12, "-%d", i);
else
*tail = '\0';
while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) { while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) {
if (strcmp(mntent_r.mnt_type, "cgroup")) if (strcmp(mntent_r.mnt_type, "cgroup"))
continue; continue;
if (!mount_has_subsystem(&mntent_r))
if (cgroup_uselist && !is_in_uselist(cgroup_uselist, &mntent_r))
continue; continue;
/* make sure we haven't checked this subsystem already */ /* make sure we haven't checked this subsystem already */
if (have_visited(mntent_r.mnt_opts, visited, allcgroups)) if (is_in_desclist(retdesc, mntent_r.mnt_opts, all_subsystems))
continue; continue;
if (record_visited(mntent_r.mnt_opts, &visited, allcgroups) < 0)
goto fail;
/* find unused mnt_dir + lxcgroup + name + -$i */ if (!(newdesc = malloc(sizeof(struct cgroup_desc)))) {
ret = snprintf(path, MAXPATHLEN, "%s/%s/%s%s", mntent_r.mnt_dir, ERROR("Out of memory reading cgroups");
lxcgroup, name, tail);
if (ret < 0 || ret >= MAXPATHLEN)
goto fail; goto fail;
}
INFO("lxcgroup %s name %s tail %s, makes path .%s.", newdesc->subsystems = record_visited(mntent_r.mnt_opts, all_subsystems);
lxcgroup, name, tail, path); if (newdesc->subsystems == (char *)-ENOMEM) {
ERROR("Out of memory recording cgroup subsystems");
if (access(path, F_OK) == 0) goto next; free(newdesc);
newdesc = NULL;
if (mkdir(path, 0755)) {
ERROR("Error creating cgroup %s", path);
goto fail; goto fail;
} }
if (!newdesc->subsystems) {
free(newdesc);
newdesc = NULL;
continue;
} }
newdesc->mntpt = strdup(mntent_r.mnt_dir);
endmntent(file); newdesc->realcgroup = NULL;
newdesc->curcgroup = find_free_cgroup(newdesc, name);
// print out the cgpath part if (!newdesc->mntpt || !newdesc->curcgroup) {
ret = snprintf(path, MAXPATHLEN, "%s/%s%s", lxcgroup, name, tail); ERROR("Out of memory reading cgroups");
if (ret < 0 || ret >= MAXPATHLEN) // can't happen
goto fail; goto fail;
}
retpath = strdup(path); set_clone_children(&mntent_r);
free(allcgroups);
if (visited)
free(visited);
return retpath; if (mkdir(newdesc->curcgroup, 0755)) {
ERROR("Error creating cgroup %s", newdesc->curcgroup);
goto fail;
}
newdesc->next = retdesc;
retdesc = newdesc;
}
next:
endmntent(file); endmntent(file);
i++; free(all_subsystems);
goto again; if (cgroup_uselist)
free(cgroup_uselist);
return retdesc;
fail: fail:
endmntent(file); endmntent(file);
err1: free(all_subsystems);
free(allcgroups); if (cgroup_uselist)
if (visited) free(cgroup_uselist);
free(visited); if (newdesc) {
if (newdesc->mntpt)
free(newdesc->mntpt);
if (newdesc->subsystems)
free(newdesc->subsystems);
if (newdesc->curcgroup)
free(newdesc->curcgroup);
if (newdesc->realcgroup)
free(newdesc->realcgroup);
free(newdesc);
}
while (retdesc) {
struct cgroup_desc *t = retdesc;
retdesc = retdesc->next;
if (t->mntpt)
free(t->mntpt);
if (t->subsystems)
free(t->subsystems);
if (t->curcgroup)
free(t->curcgroup);
if (t->realcgroup)
free(t->realcgroup);
free(t);
}
return NULL; return NULL;
} }
int lxc_cgroup_enter(const char *cgpath, pid_t pid) static bool lxc_cgroup_enter_one(const char *dir, int pid)
{ {
char path[MAXPATHLEN]; char path[MAXPATHLEN];
FILE *file = NULL, *fout; int ret;
struct mntent mntent_r; FILE *fout;
int ret, retv = -1;
char buf[LARGE_MAXPATHLEN] = {0};
file = setmntent(MTAB, "r");
if (!file) {
SYSERROR("failed to open %s", MTAB);
return -1;
}
while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) { ret = snprintf(path, MAXPATHLEN, "%s/tasks", dir);
if (strcmp(mntent_r.mnt_type, "cgroup"))
continue;
if (!mount_has_subsystem(&mntent_r))
continue;
ret = snprintf(path, MAXPATHLEN, "%s/%s/tasks",
mntent_r.mnt_dir, cgpath);
if (ret < 0 || ret >= MAXPATHLEN) { if (ret < 0 || ret >= MAXPATHLEN) {
ERROR("Error entering cgroup"); ERROR("Error entering cgroup");
goto out; return false;
} }
fout = fopen(path, "w"); fout = fopen(path, "w");
if (!fout) { if (!fout) {
SYSERROR("Error entering cgroup"); SYSERROR("Error entering cgroup");
goto out; return false;
} }
if (fprintf(fout, "%d\n", (int)pid) < 0) { if (fprintf(fout, "%d\n", (int)pid) < 0) {
ERROR("Error writing pid to %s", path); ERROR("Error writing pid to %s to enter cgroup", path);
fclose(fout); fclose(fout);
goto out; return false;
} }
if (fclose(fout) < 0) { if (fclose(fout) < 0) {
SYSERROR("Error writing pid to %s", path); SYSERROR("Error writing pid to %s to enter cgroup", path);
goto out; return false;
}
} }
retv = 0;
out: return true;
endmntent(file); }
return retv;
int lxc_cgroup_enter(struct cgroup_desc *cgroups, pid_t pid)
{
while (cgroups) {
if (!cgroups->subsystems)
goto next;
if (!lxc_cgroup_enter_one(cgroups->curcgroup, pid))
return -1;
next:
cgroups = cgroups->next;
}
return 0;
} }
static int cgroup_rmdir(char *dirname) static int cgroup_rmdir(char *dirname)
...@@ -837,85 +1114,87 @@ static int cgroup_rmdir(char *dirname) ...@@ -837,85 +1114,87 @@ static int cgroup_rmdir(char *dirname)
return ret; return ret;
} }
static int lxc_one_cgroup_destroy(struct mntent *mntent, const char *cgpath) /*
* for each mounted cgroup, destroy the cgroup for the container
*/
void lxc_cgroup_destroy_desc(struct cgroup_desc *cgroups)
{ {
char cgname[MAXPATHLEN]; while (cgroups) {
char *cgmnt = mntent->mnt_dir; struct cgroup_desc *next = cgroups->next;
int rc; if (cgroup_rmdir(cgroups->curcgroup) < 0)
SYSERROR("Error removing cgroup directory %s", cgroups->curcgroup);
rc = snprintf(cgname, MAXPATHLEN, "%s/%s", cgmnt, cgpath); free(cgroups->mntpt);
if (rc < 0 || rc >= MAXPATHLEN) { free(cgroups->subsystems);
ERROR("name too long"); free(cgroups->curcgroup);
return -1; free(cgroups->realcgroup);
} free(cgroups);
DEBUG("destroying %s\n", cgname); cgroups = next;
if (cgroup_rmdir(cgname)) {
SYSERROR("failed to remove cgroup '%s'", cgname);
return -1;
} }
DEBUG("'%s' unlinked", cgname);
return 0;
} }
/* int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath)
* for each mounted cgroup, destroy the cgroup for the container
*/
int lxc_cgroup_destroy(const char *cgpath)
{ {
struct mntent mntent_r; FILE *f;
FILE *file = NULL; char *line = NULL, ret = -1;
int err, retv = 0; size_t len = 0;
int first = 1;
char *dirpath;
char buf[LARGE_MAXPATHLEN] = {0}; /* read the list of subsystems from the kernel */
f = fopen("/proc/cgroups", "r");
if (!f)
return ret;
file = setmntent(MTAB, "r"); while (getline(&line, &len, f) != -1) {
if (!file) { char *c;
SYSERROR("failed to open %s", MTAB);
return -1; /* skip the first line */
if (first) {
first=0;
continue;
} }
while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) { c = strchr(line, '\t');
if (strcmp(mntent_r.mnt_type, "cgroup")) if (!c)
continue; continue;
if (!mount_has_subsystem(&mntent_r)) *c = '\0';
dirpath = lxc_cgroup_path_get(line, name, lxcpath);
if (!dirpath)
continue; continue;
err = lxc_one_cgroup_destroy(&mntent_r, cgpath); INFO("joining pid %d to cgroup %s", pid, dirpath);
if (err) // keep trying to clean up the others if (lxc_cgroup_enter_one(dirpath, pid)) {
retv = -1; ERROR("Failed joining %d to %s\n", pid, dirpath);
continue;
} }
endmntent(file);
return retv;
}
int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath)
{
int ret;
char *dirpath;
dirpath = lxc_cmd_get_cgroup_path(name, lxcpath);
if (!dirpath) {
ERROR("Error getting cgroup for container %s: %s", lxcpath, name);
return -1;
} }
INFO("joining pid %d to cgroup %s", pid, dirpath);
ret = lxc_cgroup_enter(dirpath, pid); if (line)
free(dirpath); free(line);
fclose(f);
return ret; return ret;
} }
bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath) bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d)
{ {
char filepath[MAXPATHLEN], *line = NULL, v1[MAXPATHLEN], v2[MAXPATHLEN]; char filepath[MAXPATHLEN], *line = NULL, v1[MAXPATHLEN], v2[MAXPATHLEN];
FILE *f; FILE *f;
int ret, junk; int ret, junk;
size_t sz = 0, l1 = strlen(cgpath), l2; size_t sz = 0, l1, l2;
char *end = index(subsystem, '.'); char *end = index(subsystem, '.');
int len = end ? (end - subsystem) : strlen(subsystem); int len = end ? (end - subsystem) : strlen(subsystem);
const char *cgpath = NULL;
while (d) {
if (in_subsys_list("devices", d->subsystems)) {
cgpath = d->realcgroup;
l1 = strlen(cgpath);
break;
}
d = d->next;
}
if (!d)
return false;
ret = snprintf(filepath, MAXPATHLEN, "/proc/%d/cgroup", pid); ret = snprintf(filepath, MAXPATHLEN, "/proc/%d/cgroup", pid);
if (ret < 0 || ret >= MAXPATHLEN) if (ret < 0 || ret >= MAXPATHLEN)
...@@ -928,6 +1207,7 @@ bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath) ...@@ -928,6 +1207,7 @@ bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath)
ret = sscanf(line, "%d:%[^:]:%s", &junk, v1, v2); ret = sscanf(line, "%d:%[^:]:%s", &junk, v1, v2);
if (ret != 3) { if (ret != 3) {
fclose(f); fclose(f);
free(line);
return false; return false;
} }
len = end ? end - subsystem : strlen(subsystem); len = end ? end - subsystem : strlen(subsystem);
...@@ -938,55 +1218,70 @@ bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath) ...@@ -938,55 +1218,70 @@ bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath)
l2 = strlen(v2+1); l2 = strlen(v2+1);
if (l2 > l1 && strncmp(v2+1, cgpath, l1) == 0) { if (l2 > l1 && strncmp(v2+1, cgpath, l1) == 0) {
fclose(f); fclose(f);
free(line);
return true; return true;
} }
} }
fclose(f); fclose(f);
if (line)
free(line);
return false; return false;
} }
/* char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys)
* Return cgroup of current task.
* This assumes that task is in the same cgroup for each controller. This
* may or may not *always* be a reasonable assumption - it generally is,
* and handling or at least checking for this condition is a TODO.
*/
int lxc_curcgroup(char *cgroup, int inlen)
{ {
FILE *f; struct cgroup_desc *d;
char *line = NULL, *p, *p2;
int ret = 0;
size_t len;
f = fopen("/proc/self/cgroup", "r"); for (d = handler->cgroup; d; d = d->next) {
if (!f) if (in_subsys_list(subsys, d->subsystems))
return -1; return d->realcgroup;
}
while (getline(&line, &len, f) != -1) { return NULL;
if (strstr(line, ":freezer:") == NULL && strstr(line, ":devices:") == NULL) }
static int _setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroups,
int devices)
{
struct lxc_list *iterator;
struct lxc_cgroup *cg;
int ret = -1;
if (lxc_list_empty(cgroups))
return 0;
lxc_list_for_each(iterator, cgroups) {
cg = iterator->elem;
if (devices == !strncmp("devices", cg->subsystem, 7)) {
if (strcmp(cg->subsystem, "devices.deny") == 0 &&
cgroup_devices_has_deny(h, cg->value))
continue; continue;
p = rindex(line, ':'); if (strcmp(cg->subsystem, "devices.allow") == 0 &&
if (!p) cgroup_devices_has_allow(h, cg->value))
continue; continue;
p++; if (lxc_cgroup_set_value(h, cg->subsystem, cg->value)) {
len = strlen(p) + 1; ERROR("Error setting %s to %s for %s\n",
p2 = p + len - 2; cg->subsystem, cg->value, h->name);
while (*p2 == '\n') { len--; *p2 = '\0'; p2--; } goto out;
if (!cgroup || inlen <= 0) {
ret = len;
break;
} }
if (cgroup && len > inlen) {
ret = -1;
break;
} }
strncpy(cgroup, p, len);
ret = len; DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
cgroup[len-1] = '\0';
break;
} }
if (line) ret = 0;
free(line); INFO("cgroup has been setup");
out:
return ret; return ret;
} }
int setup_cgroup_devices(struct lxc_handler *h, struct lxc_list *cgroups)
{
return _setup_cgroup(h, cgroups, 1);
}
int setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroups)
{
return _setup_cgroup(h, cgroups, 0);
}
...@@ -24,15 +24,34 @@ ...@@ -24,15 +24,34 @@
#define _cgroup_h #define _cgroup_h
#include <stdbool.h> #include <stdbool.h>
/*
* cgroup_desc: describe a container's cgroup membership
*/
struct cgroup_desc {
char *mntpt; /* where this is mounted */
char *subsystems; /* comma-separated list of subsystems, or NULL */
char *curcgroup; /* task's current cgroup, full pathanme */
char *realcgroup; /* the cgroup as known in /proc/self/cgroup */
struct cgroup_desc *next;
};
struct lxc_handler; struct lxc_handler;
extern int lxc_cgroup_destroy(const char *cgpath); extern void lxc_cgroup_destroy_desc(struct cgroup_desc *cgroups);
extern char *lxc_cgroup_path_get(const char *subsystem, const char *name, extern char *lxc_cgroup_path_get(const char *subsystem, const char *name,
const char *lxcpath); const char *lxcpath);
extern int lxc_cgroup_nrtasks(const char *cgpath); extern int lxc_cgroup_nrtasks(struct lxc_handler *handler);
extern char *lxc_cgroup_path_create(const char *lxcgroup, const char *name); struct cgroup_desc *lxc_cgroup_path_create(const char *name);
extern int lxc_cgroup_enter(const char *cgpath, pid_t pid); extern int lxc_cgroup_enter(struct cgroup_desc *cgroups, pid_t pid);
extern int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath); extern int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath);
extern char *cgroup_path_get(const char *subsystem, const char *cgpath); extern char *cgroup_path_get(const char *subsystem, const char *cgpath);
extern bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath); extern bool get_subsys_mount(char *dest, const char *subsystem);
extern int lxc_curcgroup(char *cgroup, int inlen); extern bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d);
/*
* Called by commands.c by a container's monitor to find out the
* container's cgroup path in a specific subsystem
*/
extern char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys);
struct lxc_list;
extern int setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroups);
extern int setup_cgroup_devices(struct lxc_handler *h, struct lxc_list *cgroups);
#endif #endif
...@@ -341,6 +341,7 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req, ...@@ -341,6 +341,7 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
return lxc_cmd_rsp_send(fd, &rsp); return lxc_cmd_rsp_send(fd, &rsp);
} }
extern char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys);
/* /*
* lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
* particular subsystem. This is the cgroup path relative to the root * particular subsystem. This is the cgroup path relative to the root
...@@ -348,15 +349,21 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req, ...@@ -348,15 +349,21 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
* *
* @name : name of container to connect to * @name : name of container to connect to
* @lxcpath : the lxcpath in which the container is running * @lxcpath : the lxcpath in which the container is running
* @subsystem : the subsystem being asked about
* *
* Returns the path on success, NULL on failure. The caller must free() the * Returns the path on success, NULL on failure. The caller must free() the
* returned path. * returned path.
*/ */
char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath) char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
const char *subsystem)
{ {
int ret, stopped = 0; int ret, stopped = 0;
struct lxc_cmd_rr cmd = { struct lxc_cmd_rr cmd = {
.req = { .cmd = LXC_CMD_GET_CGROUP }, .req = {
.cmd = LXC_CMD_GET_CGROUP,
.datalen = strlen(subsystem)+1,
.data = subsystem,
},
}; };
ret = lxc_cmd(name, &cmd, &stopped, lxcpath); ret = lxc_cmd(name, &cmd, &stopped, lxcpath);
...@@ -381,10 +388,17 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath) ...@@ -381,10 +388,17 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath)
static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler) struct lxc_handler *handler)
{ {
struct lxc_cmd_rsp rsp = { struct lxc_cmd_rsp rsp;
.datalen = strlen(handler->cgroup) + 1, char *path;
.data = handler->cgroup,
}; if (req->datalen < 1)
return -1;
path = cgroup_get_subsys_path(handler, req->data);
if (!path)
return -1;
rsp.datalen = strlen(path) + 1,
rsp.data = path;
return lxc_cmd_rsp_send(fd, &rsp); return lxc_cmd_rsp_send(fd, &rsp);
} }
...@@ -535,7 +549,13 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req, ...@@ -535,7 +549,13 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req,
memset(&rsp, 0, sizeof(rsp)); memset(&rsp, 0, sizeof(rsp));
rsp.ret = kill(handler->pid, stopsignal); rsp.ret = kill(handler->pid, stopsignal);
if (!rsp.ret) { if (!rsp.ret) {
ret = lxc_unfreeze_bypath(handler->cgroup); char *path = cgroup_get_subsys_path(handler, "freezer");
if (!path) {
ERROR("container %s:%s is not in a freezer cgroup",
handler->lxcpath, handler->name);
return 0;
}
ret = lxc_unfreeze_bypath(path);
if (!ret) if (!ret)
return 0; return 0;
......
...@@ -69,7 +69,12 @@ struct lxc_cmd_console_rsp_data { ...@@ -69,7 +69,12 @@ struct lxc_cmd_console_rsp_data {
extern int lxc_cmd_console_winch(const char *name, const char *lxcpath); extern int lxc_cmd_console_winch(const char *name, const char *lxcpath);
extern int lxc_cmd_console(const char *name, int *ttynum, int *fd, extern int lxc_cmd_console(const char *name, int *ttynum, int *fd,
const char *lxcpath); const char *lxcpath);
extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath); /*
* Get the 'real' cgroup path (as seen in /proc/self/cgroup) for a container
* for a particular subsystem
*/
extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
const char *subsystem);
extern int lxc_cmd_get_clone_flags(const char *name, const char *lxcpath); extern int lxc_cmd_get_clone_flags(const char *name, const char *lxcpath);
extern char *lxc_cmd_get_config_item(const char *name, const char *item, const char *lxcpath); extern char *lxc_cmd_get_config_item(const char *name, const char *item, const char *lxcpath);
extern pid_t lxc_cmd_get_init_pid(const char *name, const char *lxcpath); extern pid_t lxc_cmd_get_init_pid(const char *name, const char *lxcpath);
......
...@@ -1487,47 +1487,6 @@ static int setup_kmsg(const struct lxc_rootfs *rootfs, ...@@ -1487,47 +1487,6 @@ static int setup_kmsg(const struct lxc_rootfs *rootfs,
return 0; return 0;
} }
static int _setup_cgroup(const char *cgpath, struct lxc_list *cgroups,
int devices)
{
struct lxc_list *iterator;
struct lxc_cgroup *cg;
int ret = -1;
if (lxc_list_empty(cgroups))
return 0;
lxc_list_for_each(iterator, cgroups) {
cg = iterator->elem;
if (devices == !strncmp("devices", cg->subsystem, 7)) {
if (lxc_cgroup_set_bypath(cgpath, cg->subsystem,
cg->value)) {
ERROR("Error setting %s to %s for %s\n",
cg->subsystem, cg->value, cgpath);
goto out;
}
}
DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
}
ret = 0;
INFO("cgroup has been setup");
out:
return ret;
}
int setup_cgroup_devices(const char *cgpath, struct lxc_list *cgroups)
{
return _setup_cgroup(cgpath, cgroups, 1);
}
int setup_cgroup(const char *cgpath, struct lxc_list *cgroups)
{
return _setup_cgroup(cgpath, cgroups, 0);
}
static void parse_mntopt(char *opt, unsigned long *flags, char **data) static void parse_mntopt(char *opt, unsigned long *flags, char **data)
{ {
struct mount_opt *mo; struct mount_opt *mo;
......
...@@ -301,8 +301,6 @@ struct lxc_conf { ...@@ -301,8 +301,6 @@ struct lxc_conf {
int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
const char *lxcpath, char *argv[]); const char *lxcpath, char *argv[]);
extern int setup_cgroup(const char *cgpath, struct lxc_list *cgroups);
extern int setup_cgroup_devices(const char *cgpath, struct lxc_list *cgroups);
extern int detect_shared_rootfs(void); extern int detect_shared_rootfs(void);
/* /*
......
...@@ -145,14 +145,17 @@ int lxc_unfreeze(const char *name, const char *lxcpath) ...@@ -145,14 +145,17 @@ int lxc_unfreeze(const char *name, const char *lxcpath)
int lxc_unfreeze_bypath(const char *cgrelpath) int lxc_unfreeze_bypath(const char *cgrelpath)
{ {
char *cgabspath; char cgabspath[MAXPATHLEN];
int ret; int len, ret;
cgabspath = cgroup_path_get("freezer", cgrelpath); if (!get_subsys_mount(cgabspath, "freezer"))
if (!cgabspath) return -1;
len = strlen(cgabspath);
ret = snprintf(cgabspath+len, MAXPATHLEN-len, "/%s", cgrelpath);
if (ret < 0 || ret >= MAXPATHLEN-len) {
ERROR("freezer path name too long");
return -1; return -1;
}
ret = do_unfreeze(cgabspath, 0, NULL, NULL); return do_unfreeze(cgabspath, 0, NULL, NULL);
free(cgabspath);
return ret;
} }
...@@ -136,15 +136,17 @@ extern int lxc_unfreeze_bypath(const char *cgpath); ...@@ -136,15 +136,17 @@ extern int lxc_unfreeze_bypath(const char *cgpath);
*/ */
extern lxc_state_t lxc_state(const char *name, const char *lxcpath); extern lxc_state_t lxc_state(const char *name, const char *lxcpath);
struct lxc_handler;
/* /*
* Set a specified value for a specified subsystem. The specified * Set a specified value for a specified subsystem. The specified
* subsystem must be fully specified, eg. "cpu.shares" * subsystem must be fully specified, eg. "cpu.shares"
* @cgpath : the cgroup path of the container * @d : the cgroup descriptor for the container
* @filename : the cgroup attribute filename * @filename : the cgroup attribute filename
* @value : the value to be set * @value : the value to be set
* Returns 0 on success, < 0 otherwise * Returns 0 on success, < 0 otherwise
*/ */
extern int lxc_cgroup_set_bypath(const char *cgpath, const char *filename, const char *value); extern int lxc_cgroup_set_value(struct lxc_handler *hander, const char *filename,
const char *value);
/* /*
* Set a specified value for a specified subsystem. The specified * Set a specified value for a specified subsystem. The specified
......
...@@ -283,7 +283,7 @@ static int utmp_get_ntasks(struct lxc_handler *handler) ...@@ -283,7 +283,7 @@ static int utmp_get_ntasks(struct lxc_handler *handler)
{ {
int ntasks; int ntasks;
ntasks = lxc_cgroup_nrtasks(handler->cgroup); ntasks = lxc_cgroup_nrtasks(handler);
if (ntasks < 0) { if (ntasks < 0) {
ERROR("failed to get the number of tasks"); ERROR("failed to get the number of tasks");
......
...@@ -374,8 +374,7 @@ static void lxc_fini(const char *name, struct lxc_handler *handler) ...@@ -374,8 +374,7 @@ static void lxc_fini(const char *name, struct lxc_handler *handler)
handler->conf->maincmd_fd = -1; handler->conf->maincmd_fd = -1;
free(handler->name); free(handler->name);
if (handler->cgroup) { if (handler->cgroup) {
lxc_cgroup_destroy(handler->cgroup); lxc_cgroup_destroy_desc(handler->cgroup);
free(handler->cgroup);
handler->cgroup = NULL; handler->cgroup = NULL;
} }
free(handler); free(handler);
...@@ -594,12 +593,11 @@ int save_phys_nics(struct lxc_conf *conf) ...@@ -594,12 +593,11 @@ int save_phys_nics(struct lxc_conf *conf)
return 0; return 0;
} }
extern bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath); extern bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d);
int lxc_spawn(struct lxc_handler *handler) int lxc_spawn(struct lxc_handler *handler)
{ {
int failed_before_rename = 0, len; int failed_before_rename = 0;
const char *name = handler->name; const char *name = handler->name;
char *curcgroup = NULL;
if (lxc_sync_init(handler)) if (lxc_sync_init(handler))
return -1; return -1;
...@@ -661,18 +659,10 @@ int lxc_spawn(struct lxc_handler *handler) ...@@ -661,18 +659,10 @@ int lxc_spawn(struct lxc_handler *handler)
if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE)) if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
failed_before_rename = 1; failed_before_rename = 1;
if ((len = lxc_curcgroup(NULL, 0)) > 1) { if ((handler->cgroup = lxc_cgroup_path_create(name)) == NULL)
curcgroup = alloca(len);
if (lxc_curcgroup(curcgroup, len) <= 1)
curcgroup = NULL;
FILE *f = fopen("/tmp/a", "a");
fprintf(f, "curcgroup is %s\n", curcgroup);
fclose(f);
}
if ((handler->cgroup = lxc_cgroup_path_create(curcgroup, name)) == NULL)
goto out_delete_net; goto out_delete_net;
if (setup_cgroup(handler->cgroup, &handler->conf->cgroup)) { if (setup_cgroup(handler, &handler->conf->cgroup)) {
ERROR("failed to setup the cgroups for '%s'", name); ERROR("failed to setup the cgroups for '%s'", name);
goto out_delete_net; goto out_delete_net;
} }
...@@ -707,16 +697,10 @@ int lxc_spawn(struct lxc_handler *handler) ...@@ -707,16 +697,10 @@ int lxc_spawn(struct lxc_handler *handler)
if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE)) if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE))
goto out_delete_net; goto out_delete_net;
if (setup_cgroup_devices(handler->cgroup, &handler->conf->cgroup)) { if (setup_cgroup_devices(handler, &handler->conf->cgroup)) {
/* an unfortunate special case: startup hooks may have already
* setup the cgroup. If a setting fails, and this is the devices
* subsystem, *and* we are already in a subset of the cgroup,
* then ignore the failure */
if (!is_in_subcgroup(handler->pid, "devices", handler->cgroup)) {
ERROR("failed to setup the devices cgroup for '%s'", name); ERROR("failed to setup the devices cgroup for '%s'", name);
goto out_delete_net; goto out_delete_net;
} }
}
/* Tell the child to complete its initialization and wait for /* Tell the child to complete its initialization and wait for
* it to exec or return an error. (the child will never * it to exec or return an error. (the child will never
......
...@@ -37,6 +37,8 @@ struct lxc_operations { ...@@ -37,6 +37,8 @@ struct lxc_operations {
int (*post_start)(struct lxc_handler *, void *); int (*post_start)(struct lxc_handler *, void *);
}; };
struct cgroup_desc;
struct lxc_handler { struct lxc_handler {
pid_t pid; pid_t pid;
char *name; char *name;
...@@ -53,7 +55,7 @@ struct lxc_handler { ...@@ -53,7 +55,7 @@ struct lxc_handler {
#endif #endif
int pinfd; int pinfd;
const char *lxcpath; const char *lxcpath;
char *cgroup; struct cgroup_desc *cgroup;
}; };
extern struct lxc_handler *lxc_init(const char *name, struct lxc_conf *, const char *); extern struct lxc_handler *lxc_init(const char *name, struct lxc_conf *, const char *);
......
...@@ -37,102 +37,6 @@ ...@@ -37,102 +37,6 @@
} while (0) } while (0)
/* /*
* test_same_name: Create the same named container within a group
*
* @group : name of the container group or NULL for default "lxc"
* @name : name of the container
*
* Note, lxc will append a -<nr> to duplicate container names. This is what
* is tested here.
*
* Returns 0 on success, < 0 on failure
*/
static int test_same_name(const char *group, const char *name)
{
int ret = -1;
char *cgrelpath1;
char *cgrelpath2;
char relpath[PATH_MAX+1];
sprintf(relpath, "%s/%s-1", group ? group : "lxc", name);
cgrelpath1 = lxc_cgroup_path_create(group, name);
if (!cgrelpath1) {
TSTERR("lxc_cgroup_path_create returned NULL");
goto err1;
}
cgrelpath2 = lxc_cgroup_path_create(group, name);
if (!cgrelpath2) {
TSTERR("lxc_cgroup_path_create returned NULL");
goto err2;
}
if (strcmp(cgrelpath2, relpath)) {
TSTERR("unexpected name for duplicate %s", cgrelpath2);
goto err3;
}
ret = 0;
err3:
lxc_cgroup_destroy(cgrelpath2);
free(cgrelpath2);
err2:
lxc_cgroup_destroy(cgrelpath1);
free(cgrelpath1);
err1:
return ret;
}
/*
* test_basic: Test cgroup functions that don't require a running container
*
* @group : name of the container group or NULL for default "lxc"
* @name : name of the container
*
* Returns 0 on success, < 0 on failure
*/
static int test_basic(const char *group, const char *name)
{
int ret = -1;
char *cgabspath;
char *cgrelpath;
char relpath[PATH_MAX+1];
sprintf(relpath, "%s/%s", group ? group : "lxc", name);
cgrelpath = lxc_cgroup_path_create(group, name);
if (!cgrelpath) {
TSTERR("lxc_cgroup_path_create returned NULL");
goto err1;
}
if (!strstr(cgrelpath, relpath)) {
TSTERR("lxc_cgroup_path_create %s not in %s", relpath, cgrelpath);
goto err2;
}
cgabspath = cgroup_path_get("freezer", cgrelpath);
if (!cgabspath) {
TSTERR("cgroup_path_get returned NULL");
goto err2;
}
if (!strstr(cgabspath, relpath)) {
TSTERR("cgroup_path_get %s not in %s", relpath, cgabspath);
goto err3;
}
ret = lxc_cgroup_destroy(cgrelpath);
if (ret < 0) {
TSTERR("lxc_cgroup_destroy failed");
goto err3;
}
err3:
free(cgabspath);
err2:
free(cgrelpath);
err1:
return ret;
}
/*
* test_running_container: test cgroup functions against a running container * test_running_container: test cgroup functions against a running container
* *
* @group : name of the container group or NULL for default "lxc" * @group : name of the container group or NULL for default "lxc"
...@@ -141,7 +45,7 @@ err1: ...@@ -141,7 +45,7 @@ err1:
static int test_running_container(const char *lxcpath, static int test_running_container(const char *lxcpath,
const char *group, const char *name) const char *group, const char *name)
{ {
int nrtasks, ret = -1; int ret = -1;
struct lxc_container *c = NULL; struct lxc_container *c = NULL;
char *cgrelpath; char *cgrelpath;
char *cgabspath; char *cgabspath;
...@@ -160,7 +64,7 @@ static int test_running_container(const char *lxcpath, ...@@ -160,7 +64,7 @@ static int test_running_container(const char *lxcpath,
goto err2; goto err2;
} }
cgrelpath = lxc_cmd_get_cgroup_path(c->name, c->config_path); cgrelpath = lxc_cmd_get_cgroup_path(c->name, c->config_path, "freezer");
if (!cgrelpath) { if (!cgrelpath) {
TSTERR("lxc_cmd_get_cgroup_path returned NULL"); TSTERR("lxc_cmd_get_cgroup_path returned NULL");
goto err2; goto err2;
...@@ -179,7 +83,7 @@ static int test_running_container(const char *lxcpath, ...@@ -179,7 +83,7 @@ static int test_running_container(const char *lxcpath,
} }
strcpy(value_save, value); strcpy(value_save, value);
ret = lxc_cgroup_set_bypath(cgrelpath, "memory.swappiness", "100"); ret = lxc_cgroup_set(c->name, "memory.swappiness", "100", c->config_path);
if (ret < 0) { if (ret < 0) {
TSTERR("lxc_cgroup_set_bypath failed"); TSTERR("lxc_cgroup_set_bypath failed");
goto err3; goto err3;
...@@ -213,12 +117,6 @@ static int test_running_container(const char *lxcpath, ...@@ -213,12 +117,6 @@ static int test_running_container(const char *lxcpath,
goto err3; goto err3;
} }
nrtasks = lxc_cgroup_nrtasks(cgrelpath);
if (nrtasks < 1) {
TSTERR("failed getting nrtasks");
goto err3;
}
cgabspath = lxc_cgroup_path_get("freezer", c->name, c->config_path); cgabspath = lxc_cgroup_path_get("freezer", c->name, c->config_path);
if (!cgabspath) { if (!cgabspath) {
TSTERR("lxc_cgroup_path_get returned NULL"); TSTERR("lxc_cgroup_path_get returned NULL");
...@@ -312,18 +210,6 @@ int main() ...@@ -312,18 +210,6 @@ int main()
exit(0); exit(0);
} }
if (test_basic(NULL, MYNAME) < 0)
goto out;
if (test_basic("ab", MYNAME) < 0)
goto out;
printf("Basic cgroup path tests...Passed\n");
if (test_same_name(NULL, MYNAME) < 0)
goto out;
if (test_same_name("ab", MYNAME) < 0)
goto out;
printf("Same name tests...Passed\n");
#if TEST_ALREADY_RUNNING_CT #if TEST_ALREADY_RUNNING_CT
/* /*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment