Commit 33ad9f1a by Christian Seiler Committed by Stéphane Graber

cgroup: Major rewrite of cgroup logic

This patch rewrites most of the cgroup logic. It creates a set of data structures to store the kernel state of the cgroup hierarchies and their mountpoints. Mainly, everything is now grouped with respect to the hierarchies of the system. Multiple controllers may be mounted together or separately to different hierarchies, the data structures reflect this. Each hierarchy may have multiple mount points (that were created previously using the bind mount method) and each of these mount points may point to a different prefix inside the cgroup tree. The current code does not make any assumptions regarding the mount points, it just parses /proc/self/mountinfo to acquire the relevant information. The only requirement is that the current cgroup of either init (if cgroup.pattern starts with '/' and the tools are executed as root) or the current process (otherwise) are accessible. The root cgroup need not be accessible. The configuration option cgroup.pattern is introduced. For root-executed containers, it specifies which format the cgroups should be in. Example values may include '/lxc/%n', 'lxc/%n', '%n' or '/machine/%n.lxc'. Any occurrence of '%n' is replaced with the name of the container (and if clashes occur in any hierarchy, -1, -2, etc. are appended globally). If the pattern starts with /, new containers' cgroups will be located relative to init's cgroup; if it doesn't, they will be located relative to the current process's cgroup. Some changes to the cgroup.h API have been done to make it more consistent, both with respect to naming and with respect to the parameters. This causes some changes in other parts of the code that are included in the patch. There has been some testing of this functionality, but there are probably still quite a few bugs in there, especially for people with different configurations. Signed-off-by: 's avatarChristian Seiler <christian@iwakd.de> Signed-off-by: 's avatarSerge Hallyn <serge.hallyn@ubuntu.com>
parent bfa3f007
......@@ -727,7 +727,24 @@ int lxc_attach(const char* name, const char* lxcpath, lxc_attach_exec_t exec_fun
/* attach to cgroup, if requested */
if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
ret = lxc_cgroup_attach(attached_pid, name, lxcpath);
struct cgroup_meta_data *meta_data;
struct cgroup_process_info *container_info;
meta_data = lxc_cgroup_load_meta();
if (!meta_data) {
ERROR("could not move attached process %ld to cgroup of container", (long)attached_pid);
goto cleanup_error;
}
container_info = lxc_cgroup_get_container_info(name, lxcpath, meta_data);
lxc_cgroup_put_meta(meta_data);
if (!container_info) {
ERROR("could not move attached process %ld to cgroup of container", (long)attached_pid);
goto cleanup_error;
}
ret = lxc_cgroup_enter(container_info, attached_pid, false);
lxc_cgroup_process_info_free(container_info);
if (ret < 0) {
ERROR("could not move attached process %ld to cgroup of container", (long)attached_pid);
goto cleanup_error;
......
......@@ -20,38 +20,145 @@
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _cgroup_h
#define _cgroup_h
#ifndef _ncgroup_h
#define _ncgroup_h
#include <stdbool.h>
#include <stdint.h>
#include <stddef.h>
struct cgroup_hierarchy;
struct cgroup_meta_data;
struct cgroup_mount_point;
/*
* cgroup_meta_data: the metadata about the cgroup infrastructure on this
* host
*/
struct cgroup_meta_data {
ptrdiff_t ref; /* simple refcount */
struct cgroup_hierarchy **hierarchies;
struct cgroup_mount_point **mount_points;
int maximum_hierarchy;
};
/*
* cgroup_hierarchy: describes a single cgroup hierarchy
* (may have multiple mount points)
*/
struct cgroup_hierarchy {
int index;
bool used; /* false if the hierarchy should be ignored by lxc */
char **subsystems;
struct cgroup_mount_point *rw_absolute_mount_point;
struct cgroup_mount_point *ro_absolute_mount_point;
struct cgroup_mount_point **all_mount_points;
size_t all_mount_point_capacity;
};
/*
* cgroup_mount_point: a mount point to where a hierarchy
* is mounted to
*/
struct cgroup_mount_point {
struct cgroup_hierarchy *hierarchy;
char *mount_point;
char *mount_prefix;
bool read_only;
};
/*
* cgroup_desc: describe a container's cgroup membership
* cgroup_process_info: describes the membership of a
* process to the different cgroup
* hierarchies
*/
struct cgroup_desc {
char *mntpt; /* where this is mounted */
char *subsystems; /* comma-separated list of subsystems, or NULL */
char *curcgroup; /* task's current cgroup, full pathanme */
char *realcgroup; /* the cgroup as known in /proc/self/cgroup */
struct cgroup_desc *next;
struct cgroup_process_info {
struct cgroup_process_info *next;
struct cgroup_meta_data *meta_ref;
struct cgroup_hierarchy *hierarchy;
char *cgroup_path;
char *cgroup_path_sub;
char **created_paths;
size_t created_paths_capacity;
size_t created_paths_count;
struct cgroup_mount_point *designated_mount_point;
};
/* meta data management:
* lxc_cgroup_load_meta loads the meta data (using subsystem
* whitelist from main lxc configuration)
* lxc_cgroup_load_meta2 does the same, but allows one to specify
* a custom whitelist
* lxc_cgroup_get_meta increments the refcount of a meta data
* object
* lxc_cgroup_put_meta decrements the refcount of a meta data
* object, potentially destroying it
*/
extern struct cgroup_meta_data *lxc_cgroup_load_meta();
extern struct cgroup_meta_data *lxc_cgroup_load_meta2(const char **subsystem_whitelist);
extern struct cgroup_meta_data *lxc_cgroup_get_meta(struct cgroup_meta_data *meta_data);
extern struct cgroup_meta_data *lxc_cgroup_put_meta(struct cgroup_meta_data *meta_data);
/* find the hierarchy corresponding to a given subsystem */
extern struct cgroup_hierarchy *lxc_cgroup_find_hierarchy(struct cgroup_meta_data *meta_data, const char *subsystem);
/* find a mount point for a given hierarchy that has access to the cgroup in 'cgroup' and (if wanted) is writable */
extern struct cgroup_mount_point *lxc_cgroup_find_mount_point(struct cgroup_hierarchy *hierarchy, const char *group, bool should_be_writable);
/* all-in-one: find a mount point for a given hierarchy that has access to the cgroup and return the correct path within */
extern char *lxc_cgroup_find_abs_path(const char *subsystem, const char *group, bool should_be_writable, const char *suffix);
/* determine the cgroup membership of a given process */
extern struct cgroup_process_info *lxc_cgroup_process_info_get(pid_t pid, struct cgroup_meta_data *meta);
extern struct cgroup_process_info *lxc_cgroup_process_info_get_init(struct cgroup_meta_data *meta);
extern struct cgroup_process_info *lxc_cgroup_process_info_get_self(struct cgroup_meta_data *meta);
/* create a new cgroup */
extern struct cgroup_process_info *lxc_cgroup_create(const char *name, const char *path_pattern, struct cgroup_meta_data *meta_data, const char *sub_pattern);
/* get the cgroup membership of a given container */
extern struct cgroup_process_info *lxc_cgroup_get_container_info(const char *name, const char *lxcpath, struct cgroup_meta_data *meta_data);
/* move a processs to the cgroups specified by the membership */
extern int lxc_cgroup_enter(struct cgroup_process_info *info, pid_t pid, bool enter_sub);
/* free process membership information */
extern void lxc_cgroup_process_info_free(struct cgroup_process_info *info);
extern void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info *info);
struct lxc_handler;
extern void lxc_cgroup_destroy_desc(struct cgroup_desc *cgroups);
extern char *lxc_cgroup_path_get(const char *subsystem, const char *name,
const char *lxcpath);
extern int lxc_cgroup_nrtasks(struct lxc_handler *handler);
struct cgroup_desc *lxc_cgroup_path_create(const char *name);
extern int lxc_cgroup_enter(struct cgroup_desc *cgroups, pid_t pid);
extern int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath);
extern char *cgroup_path_get(const char *subsystem, const char *cgpath);
extern bool get_subsys_mount(char *dest, const char *subsystem);
extern bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d);
extern char *lxc_cgroup_get_hierarchy_path_handler(const char *subsystem, struct lxc_handler *handler);
extern char *lxc_cgroup_get_hierarchy_path(const char *subsystem, const char *name, const char *lxcpath);
extern char *lxc_cgroup_get_hierarchy_abs_path_handler(const char *subsystem, struct lxc_handler *handler);
extern char *lxc_cgroup_get_hierarchy_abs_path(const char *subsystem, const char *name, const char *lxcpath);
extern int lxc_cgroup_set_handler(const char *filename, const char *value, struct lxc_handler *handler);
extern int lxc_cgroup_get_handler(const char *filename, char *value, size_t len, struct lxc_handler *handler);
extern int lxc_cgroup_set(const char *filename, const char *value, const char *name, const char *lxcpath);
extern int lxc_cgroup_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath);
/*
* Called by commands.c by a container's monitor to find out the
* container's cgroup path in a specific subsystem
* lxc_cgroup_path_get: Get the absolute pathname for a cgroup
* file for a running container.
*
* @filename : the file of interest (e.g. "freezer.state") or
* the subsystem name (e.g. "freezer") in which case
* the directory where the cgroup may be modified
* will be returned
* @name : name of container to connect to
* @lxcpath : the lxcpath in which the container is running
*
* This is the exported function, which determines cgpath from the
* lxc-start of the @name container running in @lxcpath.
*
* Returns path on success, NULL on error. The caller must free()
* the returned path.
*/
extern char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys);
extern char *lxc_cgroup_path_get(const char *subsystem, const char *name,
const char *lxcpath);
struct lxc_list;
extern int setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroups);
extern int setup_cgroup_devices(struct lxc_handler *h, struct lxc_list *cgroups);
extern int lxc_setup_cgroup_without_devices(struct lxc_handler *h, struct lxc_list *cgroup_settings);
extern int lxc_setup_cgroup_devices(struct lxc_handler *h, struct lxc_list *cgroup_settings);
extern int lxc_cgroup_nrtasks_handler(struct lxc_handler *handler);
#endif
......@@ -38,6 +38,7 @@
#include <lxc/conf.h>
#include <lxc/start.h> /* for struct lxc_handler */
#include <lxc/utils.h>
#include <lxc/cgroup.h>
#include "commands.h"
#include "console.h"
......@@ -351,7 +352,6 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
return lxc_cmd_rsp_send(fd, &rsp);
}
extern char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys);
/*
* lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
* particular subsystem. This is the cgroup path relative to the root
......@@ -404,7 +404,7 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
if (req->datalen < 1)
return -1;
path = cgroup_get_subsys_path(handler, req->data);
path = lxc_cgroup_get_hierarchy_path_handler(req->data, handler);
if (!path)
return -1;
rsp.datalen = strlen(path) + 1,
......@@ -560,7 +560,7 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req,
memset(&rsp, 0, sizeof(rsp));
rsp.ret = kill(handler->pid, stopsignal);
if (!rsp.ret) {
char *path = cgroup_get_subsys_path(handler, "freezer");
char *path = lxc_cgroup_get_hierarchy_path_handler("freezer", handler);
if (!path) {
ERROR("container %s:%s is not in a freezer cgroup",
handler->lxcpath, handler->name);
......
......@@ -123,7 +123,7 @@ static int freeze_unfreeze(const char *name, int freeze, const char *lxcpath)
char *cgabspath;
int ret;
cgabspath = lxc_cgroup_path_get("freezer", name, lxcpath);
cgabspath = lxc_cgroup_get_hierarchy_abs_path("freezer", name, lxcpath);
if (!cgabspath)
return -1;
......@@ -145,17 +145,14 @@ int lxc_unfreeze(const char *name, const char *lxcpath)
int lxc_unfreeze_bypath(const char *cgrelpath)
{
char cgabspath[MAXPATHLEN];
int len, ret;
char *cgabspath;
int ret;
if (!get_subsys_mount(cgabspath, "freezer"))
return -1;
len = strlen(cgabspath);
ret = snprintf(cgabspath+len, MAXPATHLEN-len, "/%s", cgrelpath);
if (ret < 0 || ret >= MAXPATHLEN-len) {
ERROR("freezer path name too long");
cgabspath = lxc_cgroup_find_abs_path("freezer", cgrelpath, true, NULL);
if (!cgabspath)
return -1;
}
return do_unfreeze(cgabspath, 0, NULL, NULL);
ret = do_unfreeze(cgabspath, 0, NULL, NULL);
free(cgabspath);
return ret;
}
......@@ -141,37 +141,35 @@ struct lxc_handler;
/*
* Set a specified value for a specified subsystem. The specified
* subsystem must be fully specified, eg. "cpu.shares"
* @d : the cgroup descriptor for the container
* @filename : the cgroup attribute filename
* @value : the value to be set
* @handler : the lxc_handler structure of the container
* Returns 0 on success, < 0 otherwise
*/
extern int lxc_cgroup_set_value(struct lxc_handler *hander, const char *filename,
const char *value);
extern int lxc_cgroup_set_handler(const char *filename, const char *value, struct lxc_handler *handler);
/*
* Set a specified value for a specified subsystem. The specified
* subsystem must be fully specified, eg. "cpu.shares"
* @name : the name of the container
* @filename : the cgroup attribute filename
* @value : the value to be set
* @name : the name of the container
* @lxcpath : lxc config path for container
* Returns 0 on success, < 0 otherwise
*/
extern int lxc_cgroup_set(const char *name, const char *filename, const char *value, const char *lxcpath);
extern int lxc_cgroup_set(const char *filename, const char *value, const char *name, const char *lxcpath);
/*
* Get a specified value for a specified subsystem. The specified
* subsystem must be fully specified, eg. "cpu.shares"
* @name : the name of the container
* @filename : the cgroup attribute filename
* @value : the value to be set
* @len : the len of the value variable
* @name : the name of the container
* @lxcpath : lxc config path for container
* Returns the number of bytes read, < 0 on error
*/
extern int lxc_cgroup_get(const char *name, const char *filename,
char *value, size_t len, const char *lxcpath);
extern int lxc_cgroup_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath);
/*
* Retrieve the error string associated with the error returned by
......
......@@ -1673,7 +1673,7 @@ static bool lxcapi_set_cgroup_item(struct lxc_container *c, const char *subsys,
if (container_disk_lock(c))
return false;
ret = lxc_cgroup_set(c->name, subsys, value, c->config_path);
ret = lxc_cgroup_set(subsys, value, c->name, c->config_path);
container_disk_unlock(c);
return ret == 0;
......@@ -1692,7 +1692,7 @@ static int lxcapi_get_cgroup_item(struct lxc_container *c, const char *subsys, c
if (container_disk_lock(c))
return -1;
ret = lxc_cgroup_get(c->name, subsys, retv, inlen, c->config_path);
ret = lxc_cgroup_get(subsys, retv, inlen, c->name, c->config_path);
container_disk_unlock(c);
return ret;
......
......@@ -283,7 +283,7 @@ static int utmp_get_ntasks(struct lxc_handler *handler)
{
int ntasks;
ntasks = lxc_cgroup_nrtasks(handler);
ntasks = lxc_cgroup_nrtasks_handler(handler);
if (ntasks < 0) {
ERROR("failed to get the number of tasks");
......
......@@ -384,7 +384,7 @@ static void lxc_fini(const char *name, struct lxc_handler *handler)
handler->conf->maincmd_fd = -1;
free(handler->name);
if (handler->cgroup) {
lxc_cgroup_destroy_desc(handler->cgroup);
lxc_cgroup_process_info_free_and_remove(handler->cgroup);
handler->cgroup = NULL;
}
free(handler);
......@@ -603,11 +603,12 @@ int save_phys_nics(struct lxc_conf *conf)
return 0;
}
extern bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d);
int lxc_spawn(struct lxc_handler *handler)
{
int failed_before_rename = 0;
const char *name = handler->name;
struct cgroup_meta_data *cgroup_meta = NULL;
const char *cgroup_pattern = NULL;
if (lxc_sync_init(handler))
return -1;
......@@ -646,6 +647,22 @@ int lxc_spawn(struct lxc_handler *handler)
goto out_abort;
}
cgroup_meta = lxc_cgroup_load_meta();
if (!cgroup_meta) {
ERROR("failed to detect cgroup metadata");
goto out_delete_net;
}
/* if we are running as root, use system cgroup pattern, otherwise
* just create a cgroup under the current one. But also fall back to
* that if for some reason reading the configuration fails and no
* default value is available
*/
if (getuid() == 0)
cgroup_pattern = lxc_global_config_value("cgroup.pattern");
if (!cgroup_pattern)
cgroup_pattern = "%n";
/*
* if the rootfs is not a blockdev, prevent the container from
* marking it readonly.
......@@ -669,15 +686,17 @@ int lxc_spawn(struct lxc_handler *handler)
if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
failed_before_rename = 1;
if ((handler->cgroup = lxc_cgroup_path_create(name)) == NULL)
if ((handler->cgroup = lxc_cgroup_create(name, cgroup_pattern, cgroup_meta, NULL)) == NULL) {
ERROR("failed to create cgroups for '%s'", name);
goto out_delete_net;
}
if (setup_cgroup(handler, &handler->conf->cgroup)) {
if (lxc_setup_cgroup_without_devices(handler, &handler->conf->cgroup)) {
ERROR("failed to setup the cgroups for '%s'", name);
goto out_delete_net;
}
if (lxc_cgroup_enter(handler->cgroup, handler->pid) < 0)
if (lxc_cgroup_enter(handler->cgroup, handler->pid, false) < 0)
goto out_delete_net;
if (failed_before_rename)
......@@ -707,7 +726,7 @@ int lxc_spawn(struct lxc_handler *handler)
if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE))
goto out_delete_net;
if (setup_cgroup_devices(handler, &handler->conf->cgroup)) {
if (lxc_setup_cgroup_devices(handler, &handler->conf->cgroup)) {
ERROR("failed to setup the devices cgroup for '%s'", name);
goto out_delete_net;
}
......@@ -739,6 +758,7 @@ int lxc_spawn(struct lxc_handler *handler)
goto out_abort;
}
lxc_cgroup_put_meta(cgroup_meta);
lxc_sync_fini(handler);
return 0;
......@@ -747,6 +767,7 @@ out_delete_net:
if (handler->clone_flags & CLONE_NEWNET)
lxc_delete_network(handler);
out_abort:
lxc_cgroup_put_meta(cgroup_meta);
lxc_abort(name, handler);
lxc_sync_fini(handler);
if (handler->pinfd >= 0) {
......
......@@ -55,7 +55,7 @@ struct lxc_handler {
#endif
int pinfd;
const char *lxcpath;
struct cgroup_desc *cgroup;
struct cgroup_process_info *cgroup;
};
extern struct lxc_handler *lxc_init(const char *name, struct lxc_conf *, const char *);
......
......@@ -75,7 +75,7 @@ static lxc_state_t freezer_state(const char *name, const char *lxcpath)
FILE *file;
int ret;
cgabspath = lxc_cgroup_path_get("freezer", name, lxcpath);
cgabspath = lxc_cgroup_get_hierarchy_abs_path("freezer", name, lxcpath);
if (!cgabspath)
return -1;
......
......@@ -233,6 +233,7 @@ const char *lxc_global_config_value(const char *option_name)
{ "zfsroot", DEFAULT_ZFSROOT },
{ "lxcpath", LXCPATH },
{ "cgroup.pattern", DEFAULT_CGROUP_PATTERN },
{ "cgroup.use", NULL },
{ NULL, NULL },
};
static const char *values[sizeof(options) / sizeof(options[0])] = { 0 };
......
......@@ -75,21 +75,21 @@ static int test_running_container(const char *lxcpath,
}
/* test get/set value using memory.swappiness file */
ret = lxc_cgroup_get(c->name, "memory.swappiness", value,
sizeof(value), c->config_path);
ret = lxc_cgroup_get("memory.swappiness", value, sizeof(value),
c->name, c->config_path);
if (ret < 0) {
TSTERR("lxc_cgroup_get failed");
goto err3;
}
strcpy(value_save, value);
ret = lxc_cgroup_set(c->name, "memory.swappiness", "100", c->config_path);
ret = lxc_cgroup_set("memory.swappiness", "100", c->name, c->config_path);
if (ret < 0) {
TSTERR("lxc_cgroup_set_bypath failed");
goto err3;
}
ret = lxc_cgroup_get(c->name, "memory.swappiness", value,
sizeof(value), c->config_path);
ret = lxc_cgroup_get("memory.swappiness", value, sizeof(value),
c->name, c->config_path);
if (ret < 0) {
TSTERR("lxc_cgroup_get failed");
goto err3;
......@@ -100,14 +100,14 @@ static int test_running_container(const char *lxcpath,
}
/* restore original value */
ret = lxc_cgroup_set(c->name, "memory.swappiness", value_save,
c->config_path);
ret = lxc_cgroup_set("memory.swappiness", value_save,
c->name, c->config_path);
if (ret < 0) {
TSTERR("lxc_cgroup_set failed");
goto err3;
}
ret = lxc_cgroup_get(c->name, "memory.swappiness", value,
sizeof(value), c->config_path);
ret = lxc_cgroup_get("memory.swappiness", value, sizeof(value),
c->name, c->config_path);
if (ret < 0) {
TSTERR("lxc_cgroup_get failed");
goto err3;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment