Commit a900cbaf by Wolfgang Bumiller

introduce lxc.cgroup.dir.{monitor,container,container.inner}

This is a new approach to #1302 with a container-side configuration instead of a global boolean flag. Contrary to the previous PR using an optional additional parameter for the get-cgroup command, this introduces two new additional commands to get the limiting cgroup path and cgroup2 file descriptor. If the limiting option is not in use, these behave identical to their full-path counterparts. If these variables are used the payload will end up in the concatenation of lxc.cgroup.dir.container and lxc.cgroup.dir.container.inner (which may be empty), and the monitor will end up in lxc.cgruop.dir.monitor. The directories are fixed, no retry count logic is applied, failing to create these directories will simply be a hard error. Signed-off-by: 's avatarWolfgang Bumiller <w.bumiller@proxmox.com>
parent 514b0270
......@@ -1573,6 +1573,53 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
</varlistentry>
<varlistentry>
<term>
<option>lxc.cgroup.dir.container</option>
</term>
<listitem>
<para>
This is similar to <option>lxc.cgroup.dir</option>, but must be
used together with <option>lxc.cgroup.dir.monitor</option> and
affects only the container's cgroup path. This option is mutually
exclusive with <option>lxc.cgroup.dir</option>.
Note that the final path the container attaches to may be
extended further by the
<option>lxc.cgroup.dir.container.namespace</option> option.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>
<option>lxc.cgroup.dir.monitor</option>
</term>
<listitem>
<para>
This is the monitor process counterpart to
<option>lxc.cgroup.dir.container</option>.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>
<option>lxc.cgroup.dir.container.namespace</option>
</term>
<listitem>
<para>
Specify an additional subdirectory where the cgroup namespace
will be created. With this option, the cgroup limits will be
applied to the outer path specified in
<option>lxc.cgroup.dir.container</option>, which is not accessible
from within the container, making it possible to better enforce
limits for privileged containers in a way they cannot override
them.
This only works in conjunction with the
<option>lxc.cgroup.dir.container</option> and
<option>lxc.cgroup.dir.monitor</option> options and has otherwise
no effect.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>
<option>lxc.cgroup.relative</option>
</term>
<listitem>
......
......@@ -54,7 +54,11 @@ typedef enum {
* init's cgroup (if root).
*
* @container_full_path
* - The full path to the containers cgroup.
* - The full path to the container's cgroup.
*
* @container_limit_path
* - The full path to the container's limiting cgroup. May simply point to
* container_full_path.
*
* @monitor_full_path
* - The full path to the monitor's cgroup.
......@@ -77,15 +81,18 @@ struct hierarchy {
char *mountpoint;
char *container_base_path;
char *container_full_path;
char *container_limit_path;
char *monitor_full_path;
int version;
/* cgroup2 only */
unsigned int bpf_device_controller:1;
/* monitor cgroup fd */
int cgfd_con;
/* container cgroup fd */
int cgfd_con;
/* limiting cgroup fd (may be equal to cgfd_con if not separated) */
int cgfd_limit;
/* monitor cgroup fd */
int cgfd_mon;
};
......@@ -169,6 +176,7 @@ struct cgroup_ops {
bool (*monitor_delegate_controllers)(struct cgroup_ops *ops);
bool (*payload_delegate_controllers)(struct cgroup_ops *ops);
void (*payload_finalize)(struct cgroup_ops *ops);
const char *(*get_limiting_cgroup)(struct cgroup_ops *ops, const char *controller);
};
extern struct cgroup_ops *cgroup_init(struct lxc_conf *conf);
......
......@@ -84,6 +84,8 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd)
[LXC_CMD_UNFREEZE] = "unfreeze",
[LXC_CMD_GET_CGROUP2_FD] = "get_cgroup2_fd",
[LXC_CMD_GET_INIT_PIDFD] = "get_init_pidfd",
[LXC_CMD_GET_LIMITING_CGROUP] = "get_limiting_cgroup",
[LXC_CMD_GET_LIMITING_CGROUP2_FD] = "get_limiting_cgroup2_fd",
};
if (cmd >= LXC_CMD_MAX)
......@@ -142,7 +144,9 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
rsp->data = rspdata;
}
if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD) {
if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD ||
cmd->req.cmd == LXC_CMD_GET_LIMITING_CGROUP2_FD)
{
int cgroup2_fd = move_fd(fd_rsp);
rsp->data = INT_TO_PTR(cgroup2_fd);
}
......@@ -483,25 +487,14 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
return 0;
}
/*
* lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
* particular subsystem. This is the cgroup path relative to the root
* of the cgroup filesystem.
*
* @name : name of container to connect to
* @lxcpath : the lxcpath in which the container is running
* @subsystem : the subsystem being asked about
*
* Returns the path on success, NULL on failure. The caller must free() the
* returned path.
*/
char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
const char *subsystem)
static char *lxc_cmd_get_cgroup_path_do(const char *name, const char *lxcpath,
const char *subsystem,
lxc_cmd_t command)
{
int ret, stopped;
struct lxc_cmd_rr cmd = {
.req = {
.cmd = LXC_CMD_GET_CGROUP,
.cmd = command,
.data = subsystem,
.datalen = 0,
},
......@@ -525,24 +518,72 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
return cmd.rsp.data;
}
static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
/*
* lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
* particular subsystem. This is the cgroup path relative to the root
* of the cgroup filesystem.
*
* @name : name of container to connect to
* @lxcpath : the lxcpath in which the container is running
* @subsystem : the subsystem being asked about
*
* Returns the path on success, NULL on failure. The caller must free() the
* returned path.
*/
char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
const char *subsystem)
{
return lxc_cmd_get_cgroup_path_do(name, lxcpath, subsystem,
LXC_CMD_GET_CGROUP);
}
/*
* lxc_cmd_get_limiting_cgroup_path: Calculate a container's limiting cgroup
* path for a particular subsystem. This is the cgroup path relative to the
* root of the cgroup filesystem. This may be the same as the path returned by
* lxc_cmd_get_cgroup_path if the container doesn't have a limiting path prefix
* set.
*
* @name : name of container to connect to
* @lxcpath : the lxcpath in which the container is running
* @subsystem : the subsystem being asked about
*
* Returns the path on success, NULL on failure. The caller must free() the
* returned path.
*/
char *lxc_cmd_get_limiting_cgroup_path(const char *name, const char *lxcpath,
const char *subsystem)
{
return lxc_cmd_get_cgroup_path_do(name, lxcpath, subsystem,
LXC_CMD_GET_LIMITING_CGROUP);
}
static int lxc_cmd_get_cgroup_callback_do(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr,
bool limiting_cgroup)
{
int ret;
const char *path;
const void *reqdata;
struct lxc_cmd_rsp rsp;
struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
const char *(*get_fn)(struct cgroup_ops *ops, const char *controller);
if (req->datalen > 0) {
ret = validate_string_request(fd, req);
if (ret != 0)
return ret;
path = cgroup_ops->get_cgroup(cgroup_ops, req->data);
reqdata = req->data;
} else {
path = cgroup_ops->get_cgroup(cgroup_ops, NULL);
reqdata = NULL;
}
get_fn = (limiting_cgroup ? cgroup_ops->get_cgroup
: cgroup_ops->get_limiting_cgroup);
path = get_fn(cgroup_ops, reqdata);
if (!path)
return -1;
......@@ -557,6 +598,20 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
return 0;
}
static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
{
return lxc_cmd_get_cgroup_callback_do(fd, req, handler, descr, false);
}
static int lxc_cmd_get_limiting_cgroup_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
{
return lxc_cmd_get_cgroup_callback_do(fd, req, handler, descr, true);
}
/*
* lxc_cmd_get_config_item: Get config item the running container
*
......@@ -1366,28 +1421,48 @@ int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath)
return PTR_TO_INT(cmd.rsp.data);
}
static int lxc_cmd_get_cgroup2_fd_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
static int lxc_cmd_get_cgroup2_fd_callback_do(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr,
bool limiting_cgroup)
{
struct lxc_cmd_rsp rsp = {
.ret = -EINVAL,
};
struct cgroup_ops *ops = handler->cgroup_ops;
int ret;
int ret, send_fd;
if (!pure_unified_layout(ops) || !ops->unified)
return lxc_cmd_rsp_send(fd, &rsp);
send_fd = limiting_cgroup ? ops->unified->cgfd_limit
: ops->unified->cgfd_con;
rsp.ret = 0;
ret = lxc_abstract_unix_send_fds(fd, &ops->unified->cgfd_con, 1, &rsp,
sizeof(rsp));
ret = lxc_abstract_unix_send_fds(fd, &send_fd, 1, &rsp, sizeof(rsp));
if (ret < 0)
return log_error(LXC_CMD_REAP_CLIENT_FD, "Failed to send cgroup2 fd");
return 0;
}
static int lxc_cmd_get_cgroup2_fd_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
{
return lxc_cmd_get_cgroup2_fd_callback_do(fd, req, handler, descr,
false);
}
static int lxc_cmd_get_limiting_cgroup2_fd_callback(int fd,
struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
{
return lxc_cmd_get_cgroup2_fd_callback_do(fd, req, handler, descr,
true);
}
static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
......@@ -1415,6 +1490,8 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
[LXC_CMD_UNFREEZE] = lxc_cmd_unfreeze_callback,
[LXC_CMD_GET_CGROUP2_FD] = lxc_cmd_get_cgroup2_fd_callback,
[LXC_CMD_GET_INIT_PIDFD] = lxc_cmd_get_init_pidfd_callback,
[LXC_CMD_GET_LIMITING_CGROUP] = lxc_cmd_get_limiting_cgroup_callback,
[LXC_CMD_GET_LIMITING_CGROUP2_FD] = lxc_cmd_get_limiting_cgroup2_fd_callback,
};
if (req->cmd >= LXC_CMD_MAX)
......
......@@ -38,6 +38,8 @@ typedef enum {
LXC_CMD_UNFREEZE,
LXC_CMD_GET_CGROUP2_FD,
LXC_CMD_GET_INIT_PIDFD,
LXC_CMD_GET_LIMITING_CGROUP,
LXC_CMD_GET_LIMITING_CGROUP2_FD,
LXC_CMD_MAX,
} lxc_cmd_t;
......@@ -129,5 +131,9 @@ extern int lxc_cmd_add_bpf_device_cgroup(const char *name, const char *lxcpath,
extern int lxc_cmd_freeze(const char *name, const char *lxcpath, int timeout);
extern int lxc_cmd_unfreeze(const char *name, const char *lxcpath, int timeout);
extern int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath);
extern char *lxc_cmd_get_limiting_cgroup_path(const char *name,
const char *lxcpath,
const char *subsystem);
extern int lxc_cmd_get_limiting_cgroup2_fd(const char *name, const char *lxcpath);
#endif /* __commands_h */
......@@ -3832,6 +3832,9 @@ void lxc_conf_free(struct lxc_conf *conf)
lxc_clear_apparmor_raw(conf);
lxc_clear_namespace(conf);
free(conf->cgroup_meta.dir);
free(conf->cgroup_meta.monitor_dir);
free(conf->cgroup_meta.container_dir);
free(conf->cgroup_meta.namespace_dir);
free(conf->cgroup_meta.controllers);
free(conf->shmount.path_host);
free(conf->shmount.path_cont);
......
......@@ -60,6 +60,9 @@ struct lxc_cgroup {
struct /* meta */ {
char *controllers;
char *dir;
char *monitor_dir;
char *container_dir;
char *namespace_dir;
bool relative;
};
};
......
......@@ -71,6 +71,9 @@ lxc_config_define(cap_keep);
lxc_config_define(cgroup_controller);
lxc_config_define(cgroup2_controller);
lxc_config_define(cgroup_dir);
lxc_config_define(cgroup_monitor_dir);
lxc_config_define(cgroup_container_dir);
lxc_config_define(cgroup_container_inner_dir);
lxc_config_define(cgroup_relative);
lxc_config_define(console_buffer_size);
lxc_config_define(console_logfile);
......@@ -170,6 +173,9 @@ static struct lxc_config_t config_jump_table[] = {
{ "lxc.cap.drop", set_config_cap_drop, get_config_cap_drop, clr_config_cap_drop, },
{ "lxc.cap.keep", set_config_cap_keep, get_config_cap_keep, clr_config_cap_keep, },
{ "lxc.cgroup2", set_config_cgroup2_controller, get_config_cgroup2_controller, clr_config_cgroup2_controller, },
{ "lxc.cgroup.dir.monitor", set_config_cgroup_monitor_dir, get_config_cgroup_monitor_dir, clr_config_cgroup_monitor_dir, },
{ "lxc.cgroup.dir.container", set_config_cgroup_container_dir, get_config_cgroup_container_dir, clr_config_cgroup_container_dir, },
{ "lxc.cgroup.dir.container.inner",set_config_cgroup_container_inner_dir, get_config_cgroup_container_inner_dir, clr_config_cgroup_container_inner_dir,},
{ "lxc.cgroup.dir", set_config_cgroup_dir, get_config_cgroup_dir, clr_config_cgroup_dir, },
{ "lxc.cgroup.relative", set_config_cgroup_relative, get_config_cgroup_relative, clr_config_cgroup_relative, },
{ "lxc.cgroup", set_config_cgroup_controller, get_config_cgroup_controller, clr_config_cgroup_controller, },
......@@ -1721,6 +1727,48 @@ static int set_config_cgroup_dir(const char *key, const char *value,
return set_config_string_item(&lxc_conf->cgroup_meta.dir, value);
}
static int set_config_cgroup_monitor_dir(const char *key, const char *value,
struct lxc_conf *lxc_conf, void *data)
{
if (lxc_config_value_empty(value))
return clr_config_cgroup_monitor_dir(key, lxc_conf, NULL);
return set_config_string_item(&lxc_conf->cgroup_meta.monitor_dir,
value);
}
static int set_config_cgroup_container_dir(const char *key, const char *value,
struct lxc_conf *lxc_conf,
void *data)
{
if (lxc_config_value_empty(value))
return clr_config_cgroup_container_dir(key, lxc_conf, NULL);
return set_config_string_item(&lxc_conf->cgroup_meta.container_dir,
value);
}
static int set_config_cgroup_container_inner_dir(const char *key,
const char *value,
struct lxc_conf *lxc_conf,
void *data)
{
if (lxc_config_value_empty(value))
return clr_config_cgroup_container_inner_dir(key, lxc_conf,
NULL);
if (strchr(value, '/') ||
strcmp(value, ".") == 0 ||
strcmp(value, "..") == 0)
{
ERROR("lxc.cgroup.dir.container.inner must be a single directory name");
return -1;
}
return set_config_string_item(&lxc_conf->cgroup_meta.namespace_dir,
value);
}
static int set_config_cgroup_relative(const char *key, const char *value,
struct lxc_conf *lxc_conf, void *data)
{
......@@ -3644,6 +3692,58 @@ static int get_config_cgroup_dir(const char *key, char *retv, int inlen,
return fulllen;
}
static int get_config_cgroup_monitor_dir(const char *key, char *retv, int inlen,
struct lxc_conf *lxc_conf, void *data)
{
int len;
int fulllen = 0;
if (!retv)
inlen = 0;
else
memset(retv, 0, inlen);
strprint(retv, inlen, "%s", lxc_conf->cgroup_meta.monitor_dir);
return fulllen;
}
static int get_config_cgroup_container_dir(const char *key, char *retv,
int inlen,
struct lxc_conf *lxc_conf,
void *data)
{
int len;
int fulllen = 0;
if (!retv)
inlen = 0;
else
memset(retv, 0, inlen);
strprint(retv, inlen, "%s", lxc_conf->cgroup_meta.container_dir);
return fulllen;
}
static int get_config_cgroup_container_inner_dir(const char *key, char *retv,
int inlen,
struct lxc_conf *lxc_conf,
void *data)
{
int len;
int fulllen = 0;
if (!retv)
inlen = 0;
else
memset(retv, 0, inlen);
strprint(retv, inlen, "%s", lxc_conf->cgroup_meta.namespace_dir);
return fulllen;
}
static inline int get_config_cgroup_relative(const char *key, char *retv,
int inlen, struct lxc_conf *lxc_conf,
void *data)
......@@ -4458,6 +4558,30 @@ static int clr_config_cgroup_dir(const char *key, struct lxc_conf *lxc_conf,
return 0;
}
static int clr_config_cgroup_monitor_dir(const char *key,
struct lxc_conf *lxc_conf,
void *data)
{
free_disarm(lxc_conf->cgroup_meta.monitor_dir);
return 0;
}
static int clr_config_cgroup_container_dir(const char *key,
struct lxc_conf *lxc_conf,
void *data)
{
free_disarm(lxc_conf->cgroup_meta.container_dir);
return 0;
}
static int clr_config_cgroup_container_inner_dir(const char *key,
struct lxc_conf *lxc_conf,
void *data)
{
free_disarm(lxc_conf->cgroup_meta.namespace_dir);
return 0;
}
static inline int clr_config_cgroup_relative(const char *key,
struct lxc_conf *lxc_conf,
void *data)
......
......@@ -303,7 +303,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
* the handler the restore task created.
*/
if (!strcmp(opts->action, "dump") || !strcmp(opts->action, "pre-dump")) {
path = lxc_cmd_get_cgroup_path(opts->c->name, opts->c->config_path, controllers[0]);
path = lxc_cmd_get_limiting_cgroup_path(opts->c->name, opts->c->config_path, controllers[0]);
if (!path) {
ERROR("failed to get cgroup path for %s", controllers[0]);
goto err;
......@@ -311,7 +311,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
} else {
const char *p;
p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0]);
p = cgroup_ops->get_limiting_cgroup(cgroup_ops, controllers[0]);
if (!p) {
ERROR("failed to get cgroup path for %s", controllers[0]);
goto err;
......@@ -406,9 +406,9 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
DECLARE_ARG("-t");
DECLARE_ARG(pid);
freezer_relative = lxc_cmd_get_cgroup_path(opts->c->name,
opts->c->config_path,
"freezer");
freezer_relative = lxc_cmd_get_limiting_cgroup_path(opts->c->name,
opts->c->config_path,
"freezer");
if (!freezer_relative) {
ERROR("failed getting freezer path");
goto err;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment