cgfsng: backport new cgroup handling logic

BugLink: https://bugs.launchpad.net/bugs/1825155 This will allow us to better support cgroup2 layouts on stable-3.0. Signed-off-by: 's avatarChristian Brauner <christian.brauner@ubuntu.com>
parent bccf738c
......@@ -489,7 +489,7 @@ AC_ARG_WITH([cgroup-pattern],
[AC_HELP_STRING(
[--with-cgroup-pattern=pattern],
[pattern for container cgroups]
)], [], [with_cgroup_pattern=['lxc/%n']])
)], [], [with_cgroup_pattern=['lxc.payload/%n']])
# Container log path. By default, use $lxcpath.
AC_MSG_CHECKING([Whether to place logfiles in container config path])
......@@ -721,8 +721,8 @@ AX_CHECK_COMPILE_FLAG([-fasynchronous-unwind-tables], [CFLAGS="$CFLAGS -fasynchr
AX_CHECK_COMPILE_FLAG([-pipe], [CFLAGS="$CFLAGS -pipe"],,[-Werror])
AX_CHECK_COMPILE_FLAG([-fexceptions], [CFLAGS="$CFLAGS -fexceptions"],,[-Werror])
AX_CHECK_LINK_FLAG([-z relro], [LDLAGS="$LDLAGS -z relro"],,[])
AX_CHECK_LINK_FLAG([-z now], [LDLAGS="$LDLAGS -z now"],,[])
AX_CHECK_LINK_FLAG([-z relro], [LDFLAGS="$LDFLAGS -z relro"],,[])
AX_CHECK_LINK_FLAG([-z now], [LDFLAGS="$LDFLAGS -z now"],,[])
CFLAGS="$CFLAGS -Wvla -std=gnu11"
if test "x$enable_werror" = "xyes"; then
......
......@@ -126,7 +126,7 @@ static struct lxc_proc_context_info *lxc_proc_get_context_info(pid_t pid)
static inline void lxc_proc_close_ns_fd(struct lxc_proc_context_info *ctx)
{
for (int i = 0; i < LXC_NS_MAX; i++) {
__do_close_prot_errno int fd = move_fd(ctx->ns_fd[i]);
__do_close_prot_errno int fd ATTR_UNUSED = move_fd(ctx->ns_fd[i]);
}
}
......@@ -689,8 +689,8 @@ struct attach_clone_payload {
static void lxc_put_attach_clone_payload(struct attach_clone_payload *p)
{
__do_close_prot_errno int ipc_socket = p->ipc_socket;
__do_close_prot_errno int terminal_slave_fd = p->terminal_slave_fd;
__do_close_prot_errno int ipc_socket ATTR_UNUSED = p->ipc_socket;
__do_close_prot_errno int terminal_slave_fd ATTR_UNUSED = p->terminal_slave_fd;
if (p->init_ctx) {
lxc_proc_put_context_info(p->init_ctx);
......@@ -700,7 +700,7 @@ static void lxc_put_attach_clone_payload(struct attach_clone_payload *p)
static int attach_child_main(struct attach_clone_payload *payload)
{
int fd, lsm_fd, ret;
int lsm_fd, ret;
uid_t new_uid;
gid_t new_gid;
uid_t ns_root_uid = 0;
......@@ -893,10 +893,11 @@ static int attach_child_main(struct attach_clone_payload *payload)
if (options->stderr_fd > STDERR_FILENO)
close(options->stderr_fd);
/* Try to remove FD_CLOEXEC flag from stdin/stdout/stderr, but also
/*
* Try to remove FD_CLOEXEC flag from stdin/stdout/stderr, but also
* here, ignore errors.
*/
for (fd = STDIN_FILENO; fd <= STDERR_FILENO; fd++) {
for (int fd = STDIN_FILENO; fd <= STDERR_FILENO; fd++) {
ret = fd_cloexec(fd, false);
if (ret < 0) {
SYSERROR("Failed to clear FD_CLOEXEC from file descriptor %d", fd);
......@@ -1217,7 +1218,7 @@ int lxc_attach(const char *name, const char *lxcpath,
if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
struct cgroup_ops *cgroup_ops;
cgroup_ops = cgroup_init(NULL);
cgroup_ops = cgroup_init(conf);
if (!cgroup_ops)
goto on_error;
......
......@@ -38,13 +38,13 @@
lxc_log_define(cgroup, lxc);
extern struct cgroup_ops *cgfsng_ops_init(void);
extern struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf);
struct cgroup_ops *cgroup_init(struct lxc_handler *handler)
struct cgroup_ops *cgroup_init(struct lxc_conf *conf)
{
struct cgroup_ops *cgroup_ops;
cgroup_ops = cgfsng_ops_init();
cgroup_ops = cgfsng_ops_init(conf);
if (!cgroup_ops) {
ERROR("Failed to initialize cgroup driver");
return NULL;
......@@ -82,15 +82,20 @@ void cgroup_exit(struct cgroup_ops *ops)
free(ops->container_cgroup);
for (it = ops->hierarchies; it && *it; it++) {
char **ctrlr;
char **p;
for (ctrlr = (*it)->controllers; ctrlr && *ctrlr; ctrlr++)
free(*ctrlr);
for (p = (*it)->controllers; p && *p; p++)
free(*p);
free((*it)->controllers);
for (p = (*it)->cgroup2_chown; p && *p; p++)
free(*p);
free((*it)->cgroup2_chown);
free((*it)->mountpoint);
free((*it)->container_base_path);
free((*it)->container_full_path);
free((*it)->monitor_full_path);
free(*it);
}
free(ops->hierarchies);
......
......@@ -28,6 +28,10 @@
#include <stddef.h>
#include <sys/types.h>
#define PAYLOAD_CGROUP "lxc.payload"
#define MONITOR_CGROUP "lxc.monitor"
#define PIVOT_CGROUP "lxc.pivot"
struct lxc_handler;
struct lxc_conf;
struct lxc_list;
......@@ -65,6 +69,9 @@ typedef enum {
* @container_full_path
* - The full path to the containers cgroup.
*
* @monitor_full_path
* - The full path to the monitor's cgroup.
*
* @version
* - legacy hierarchy
* If the hierarchy is a legacy hierarchy this will be set to
......@@ -74,10 +81,16 @@ typedef enum {
* CGROUP2_SUPER_MAGIC.
*/
struct hierarchy {
/*
* cgroup2 only: what files need to be chowned to delegate a cgroup to
* an unprivileged user.
*/
char **cgroup2_chown;
char **controllers;
char *mountpoint;
char *container_base_path;
char *container_full_path;
char *monitor_full_path;
int version;
};
......@@ -93,6 +106,9 @@ struct cgroup_ops {
char *cgroup_pattern;
char *container_cgroup;
/* Static memory, do not free.*/
const char *monitor_pattern;
/* @hierarchies
* - A NULL-terminated array of struct hierarchy, one per legacy
* hierarchy. No duplicates. First sufficient, writeable mounted
......@@ -124,11 +140,14 @@ struct cgroup_ops {
cgroup_layout_t cgroup_layout;
bool (*data_init)(struct cgroup_ops *ops);
void (*destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
void (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
bool (*monitor_enter)(struct cgroup_ops *ops, pid_t pid);
bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
bool (*payload_enter)(struct cgroup_ops *ops, pid_t pid);
const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller);
bool (*escape)(const struct cgroup_ops *ops);
bool (*escape)(const struct cgroup_ops *ops, struct lxc_conf *conf);
int (*num_hierarchies)(struct cgroup_ops *ops);
bool (*get_hierarchies)(struct cgroup_ops *ops, int n, char ***out);
int (*set)(struct cgroup_ops *ops, const char *filename,
......@@ -146,7 +165,7 @@ struct cgroup_ops {
int (*nrtasks)(struct cgroup_ops *ops);
};
extern struct cgroup_ops *cgroup_init(struct lxc_handler *handler);
extern struct cgroup_ops *cgroup_init(struct lxc_conf *conf);
extern void cgroup_exit(struct cgroup_ops *ops);
extern void prune_init_scope(char *cg);
......
......@@ -194,7 +194,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
* /actual/ root cgroup so that lxcfs thinks criu has enough rights to
* see all cgroups.
*/
if (!cgroup_ops->escape(cgroup_ops)) {
if (!cgroup_ops->escape(cgroup_ops, conf)) {
ERROR("failed to escape cgroups");
return;
}
......@@ -969,7 +969,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_
if (lxc_init(c->name, handler) < 0)
goto out;
cgroup_ops = cgroup_init(NULL);
cgroup_ops = cgroup_init(c->lxc_conf);
if (!cgroup_ops)
goto out_fini_handler;
handler->cgroup_ops = cgroup_ops;
......@@ -1268,7 +1268,7 @@ static bool do_dump(struct lxc_container *c, char *mode, struct migrate_opts *op
close(criuout[0]);
cgroup_ops = cgroup_init(NULL);
cgroup_ops = cgroup_init(c->lxc_conf);
if (!cgroup_ops) {
ERROR("failed to cgroup_init()");
_exit(EXIT_FAILURE);
......
......@@ -45,35 +45,41 @@
lxc_log_define(freezer, lxc);
static int do_freeze_thaw(bool freeze, const char *name, const char *lxcpath)
static int do_freeze_thaw(bool freeze, struct lxc_conf *conf, const char *name,
const char *lxcpath)
{
int ret;
char v[100];
struct cgroup_ops *cgroup_ops;
const char *state = freeze ? "FROZEN" : "THAWED";
size_t state_len = 6;
const char *state;
size_t state_len;
lxc_state_t new_state = freeze ? FROZEN : THAWED;
cgroup_ops = cgroup_init(NULL);
state = lxc_state2str(new_state);
state_len = strlen(state);
cgroup_ops = cgroup_init(conf);
if (!cgroup_ops)
return -1;
ret = cgroup_ops->set(cgroup_ops, "freezer.state", state, name, lxcpath);
if (ret < 0) {
cgroup_exit(cgroup_ops);
ERROR("Failed to freeze %s", name);
ERROR("Failed to %s %s",
(new_state == FROZEN ? "freeze" : "unfreeze"), name);
return -1;
}
for (;;) {
ret = cgroup_ops->get(cgroup_ops, "freezer.state", v, sizeof(v), name, lxcpath);
ret = cgroup_ops->get(cgroup_ops, "freezer.state", v, sizeof(v),
name, lxcpath);
if (ret < 0) {
cgroup_exit(cgroup_ops);
ERROR("Failed to get freezer state of %s", name);
return -1;
}
v[99] = '\0';
v[sizeof(v) - 1] = '\0';
v[lxc_char_right_gc(v, strlen(v))] = '\0';
ret = strncmp(v, state, state_len);
......@@ -88,14 +94,14 @@ static int do_freeze_thaw(bool freeze, const char *name, const char *lxcpath)
}
}
int lxc_freeze(const char *name, const char *lxcpath)
int lxc_freeze(struct lxc_conf *conf, const char *name, const char *lxcpath)
{
lxc_cmd_serve_state_clients(name, lxcpath, FREEZING);
lxc_monitor_send_state(name, FREEZING, lxcpath);
return do_freeze_thaw(true, name, lxcpath);
return do_freeze_thaw(true, conf, name, lxcpath);
}
int lxc_unfreeze(const char *name, const char *lxcpath)
int lxc_unfreeze(struct lxc_conf *conf, const char *name, const char *lxcpath)
{
return do_freeze_thaw(false, name, lxcpath);
return do_freeze_thaw(false, conf, name, lxcpath);
}
......@@ -105,7 +105,7 @@ const char *lxc_global_config_value(const char *option_name)
sprintf(user_config_path, "%s/.config/lxc/lxc.conf", user_home);
sprintf(user_default_config_path, "%s/.config/lxc/default.conf", user_home);
sprintf(user_lxc_path, "%s/.local/share/lxc/", user_home);
user_cgroup_pattern = strdup("lxc/%n");
user_cgroup_pattern = strdup("lxc.payload/%n");
}
else {
user_config_path = strdup(LXC_GLOBAL_CONF);
......
......@@ -82,14 +82,16 @@ extern int lxc_monitor_close(int fd);
* @name : the container name
* Returns 0 on success, < 0 otherwise
*/
extern int lxc_freeze(const char *name, const char *lxcpath);
extern int lxc_freeze(struct lxc_conf *conf, const char *name,
const char *lxcpath);
/*
* Unfreeze all previously frozen tasks.
* @name : the name of the container
* Return 0 on success, < 0 otherwise
*/
extern int lxc_unfreeze(const char *name, const char *lxcpath);
extern int lxc_unfreeze(struct lxc_conf *conf, const char *name,
const char *lxcpath);
/*
* Retrieve the container state
......
......@@ -503,14 +503,14 @@ WRAP_API(bool, lxcapi_is_running)
static bool do_lxcapi_freeze(struct lxc_container *c)
{
int ret;
lxc_state_t s;
if (!c)
if (!c || !c->lxc_conf)
return false;
ret = lxc_freeze(c->name, c->config_path);
if (ret < 0)
return false;
s = lxc_getstate(c->name, c->config_path);
if (s != FROZEN)
return lxc_freeze(c->lxc_conf, c->name, c->config_path) == 0;
return true;
}
......@@ -519,14 +519,14 @@ WRAP_API(bool, lxcapi_freeze)
static bool do_lxcapi_unfreeze(struct lxc_container *c)
{
int ret;
lxc_state_t s;
if (!c)
if (!c || !c->lxc_conf)
return false;
ret = lxc_unfreeze(c->name, c->config_path);
if (ret < 0)
return false;
s = lxc_getstate(c->name, c->config_path);
if (s == FROZEN)
return lxc_unfreeze(c->lxc_conf, c->name, c->config_path) == 0;
return true;
}
......@@ -2192,6 +2192,9 @@ static inline bool enter_net_ns(struct lxc_container *c)
{
pid_t pid = do_lxcapi_init_pid(c);
if (pid < 0)
return false;
if ((geteuid() != 0 || (c->lxc_conf && !lxc_list_empty(&c->lxc_conf->id_map))) &&
(access("/proc/self/ns/user", F_OK) == 0))
if (!switch_to_ns(pid, "user"))
......@@ -3241,7 +3244,7 @@ static bool do_lxcapi_set_cgroup_item(struct lxc_container *c, const char *subsy
if (is_stopped(c))
return false;
cgroup_ops = cgroup_init(NULL);
cgroup_ops = cgroup_init(c->lxc_conf);
if (!cgroup_ops)
return false;
......@@ -3265,7 +3268,7 @@ static int do_lxcapi_get_cgroup_item(struct lxc_container *c, const char *subsys
if (is_stopped(c))
return -1;
cgroup_ops = cgroup_init(NULL);
cgroup_ops = cgroup_init(c->lxc_conf);
if (!cgroup_ops)
return -1;
......@@ -4601,6 +4604,7 @@ static bool add_remove_device_node(struct lxc_container *c, const char *src_path
struct stat st;
char value[LXC_MAX_BUFFER];
const char *p;
pid_t init_pid;
/* make sure container is running */
if (!do_lxcapi_is_running(c)) {
......@@ -4627,7 +4631,13 @@ static bool add_remove_device_node(struct lxc_container *c, const char *src_path
if (ret < 0 || ret >= LXC_MAX_BUFFER)
return false;
if (!do_add_remove_node(do_lxcapi_init_pid(c), p, add, &st))
init_pid = do_lxcapi_init_pid(c);
if (init_pid < 0) {
ERROR("Failed to get init pid");
return false;
}
if (!do_add_remove_node(init_pid, p, add, &st))
return false;
/* add or remove device to/from cgroup access list */
......@@ -4697,6 +4707,11 @@ static bool do_lxcapi_attach_interface(struct lxc_container *c,
}
init_pid = do_lxcapi_init_pid(c);
if (init_pid < 0) {
ERROR("Failed to get init pid");
goto err;
}
ret = lxc_netdev_move_by_name(ifname, init_pid, dst_ifname);
if (ret)
goto err;
......@@ -4742,6 +4757,10 @@ static bool do_lxcapi_detach_interface(struct lxc_container *c,
pid_t init_pid;
init_pid = do_lxcapi_init_pid(c);
if (init_pid < 0) {
ERROR("Failed to get init pid");
_exit(EXIT_FAILURE);
}
if (!switch_to_ns(init_pid, "net")) {
ERROR("Failed to enter network namespace");
_exit(EXIT_FAILURE);
......
......@@ -885,7 +885,7 @@ int lxc_init(const char *name, struct lxc_handler *handler)
}
TRACE("Chowned console");
handler->cgroup_ops = cgroup_init(handler);
handler->cgroup_ops = cgroup_init(handler->conf);
if (!handler->cgroup_ops) {
ERROR("Failed to initialize cgroup driver");
goto out_delete_terminal;
......@@ -895,6 +895,10 @@ int lxc_init(const char *name, struct lxc_handler *handler)
INFO("Container \"%s\" is initialized", name);
return 0;
out_destroy_cgroups:
handler->cgroup_ops->payload_destroy(handler->cgroup_ops, handler);
handler->cgroup_ops->monitor_destroy(handler->cgroup_ops, handler);
out_delete_terminal:
lxc_terminal_delete(&handler->conf->console);
......@@ -985,7 +989,8 @@ void lxc_fini(const char *name, struct lxc_handler *handler)
while (namespace_count--)
free(namespaces[namespace_count]);
cgroup_ops->destroy(cgroup_ops, handler);
cgroup_ops->payload_destroy(cgroup_ops, handler);
cgroup_ops->monitor_destroy(cgroup_ops, handler);
if (handler->conf->reboot == REBOOT_NONE) {
/* For all new state clients simply close the command socket.
......@@ -1599,16 +1604,12 @@ static int lxc_spawn(struct lxc_handler *handler)
ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0,
handler->data_sock);
if (ret < 0) {
lxc_sync_fini(handler);
return -1;
}
if (ret < 0)
goto out_sync_fini;
ret = resolve_clone_flags(handler);
if (ret < 0) {
lxc_sync_fini(handler);
return -1;
}
if (ret < 0)
goto out_sync_fini;
if (handler->ns_clone_flags & CLONE_NEWNET) {
if (!lxc_list_empty(&conf->network)) {
......@@ -1621,8 +1622,7 @@ static int lxc_spawn(struct lxc_handler *handler)
ret = lxc_find_gateway_addresses(handler);
if (ret < 0) {
ERROR("Failed to find gateway addresses");
lxc_sync_fini(handler);
return -1;
goto out_sync_fini;
}
/* That should be done before the clone because we will
......@@ -1631,8 +1631,7 @@ static int lxc_spawn(struct lxc_handler *handler)
ret = lxc_create_network_priv(handler);
if (ret < 0) {
ERROR("Failed to create the network");
lxc_sync_fini(handler);
return -1;
goto out_delete_net;
}
}
}
......@@ -1891,6 +1890,8 @@ out_delete_net:
out_abort:
lxc_abort(name, handler);
out_sync_fini:
lxc_sync_fini(handler);
if (handler->pinfd >= 0) {
close(handler->pinfd);
......@@ -1906,6 +1907,7 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
{
int ret, status;
struct lxc_conf *conf = handler->conf;
struct cgroup_ops *cgroup_ops;
ret = lxc_init(name, handler);
if (ret < 0) {
......@@ -1915,9 +1917,23 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
handler->ops = ops;
handler->data = data;
handler->daemonize = daemonize;
cgroup_ops = handler->cgroup_ops;
if (!attach_block_device(handler->conf)) {
ERROR("Failed to attach block device");
ret = -1;
goto out_fini_nonet;
}
if (!cgroup_ops->monitor_create(cgroup_ops, handler)) {
ERROR("Failed to create monitor cgroup");
ret = -1;
goto out_fini_nonet;
}
if (!cgroup_ops->monitor_enter(cgroup_ops, handler->monitor_pid)) {
ERROR("Failed to enter monitor cgroup");
ret = -1;
goto out_fini_nonet;
}
......@@ -1962,6 +1978,7 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
if (!handler->init_died && handler->pid > 0) {
ERROR("Child process is not killed");
ret = -1;
goto out_abort;
}
......
......@@ -59,7 +59,7 @@ static int test_running_container(const char *lxcpath,
char value[NAME_MAX], value_save[NAME_MAX];
struct cgroup_ops *cgroup_ops;
sprintf(relpath, "%s/%s", group ? group : "lxc", name);
sprintf(relpath, "%s/%s", group ? group : "lxc.payload", name);
if ((c = lxc_container_new(name, lxcpath)) == NULL) {
TSTERR("container %s couldn't instantiate", name);
......@@ -80,7 +80,7 @@ static int test_running_container(const char *lxcpath,
goto err3;
}
cgroup_ops = cgroup_init(NULL);
cgroup_ops = cgroup_init(c->lxc_conf);
if (!cgroup_ops)
goto err3;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment