cgroups/cgfsng: implement cgroup2 device controller live update

parent 629b430e
......@@ -92,6 +92,7 @@ struct hierarchy {
char *container_full_path;
char *monitor_full_path;
int version;
int bpf_device_controller:1;
};
struct cgroup_ops {
......
......@@ -195,33 +195,38 @@ int bpf_program_init(struct bpf_program *prog)
return bpf_program_add_instructions(prog, pre_insn, ARRAY_SIZE(pre_insn));
}
int bpf_program_append_device(struct bpf_program *prog, char type, int major,
int minor, const char *access, int allow)
int bpf_program_append_device(struct bpf_program *prog, struct device_item *device)
{
int ret;
int jump_nr = 1;
struct bpf_insn bpf_access_decision[] = {
BPF_MOV64_IMM(BPF_REG_0, allow),
BPF_MOV64_IMM(BPF_REG_0, device->allow),
BPF_EXIT_INSN(),
};
int access_mask;
int device_type;
device_type = bpf_device_type(type);
/* This is a global rule so no need to append anything. */
if (device->global_rule >= 0) {
prog->blacklist = device->global_rule;
return 0;
}
device_type = bpf_device_type(device->type);
if (device_type < 0)
return error_log_errno(EINVAL, "Invalid bpf cgroup device type %c", type);
return error_log_errno(EINVAL, "Invalid bpf cgroup device type %c", device->type);
if (device_type > 0)
jump_nr++;
access_mask = bpf_access_mask(access);
access_mask = bpf_access_mask(device->access);
if (!bpf_device_all_access(access_mask))
jump_nr += 3;
if (major >= 0)
if (device->major != -1)
jump_nr++;
if (minor >= 0)
if (device->minor != -1)
jump_nr++;
if (device_type > 0) {
......@@ -247,9 +252,9 @@ int bpf_program_append_device(struct bpf_program *prog, char type, int major,
return error_log_errno(errno, "Failed to add instructions to bpf cgroup program");
}
if (major >= 0) {
if (device->major >= 0) {
struct bpf_insn ins[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, jump_nr--),
BPF_JMP_IMM(BPF_JNE, BPF_REG_4, device->major, jump_nr--),
};
ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins));
......@@ -257,9 +262,9 @@ int bpf_program_append_device(struct bpf_program *prog, char type, int major,
return error_log_errno(errno, "Failed to add instructions to bpf cgroup program");
}
if (minor >= 0) {
if (device->minor >= 0) {
struct bpf_insn ins[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, jump_nr--),
BPF_JMP_IMM(BPF_JNE, BPF_REG_5, device->minor, jump_nr--),
};
ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins));
......@@ -411,4 +416,94 @@ void lxc_clear_cgroup2_devices(struct lxc_conf *conf)
(void)bpf_program_free(conf->cgroup2_devices);
}
}
int bpf_list_add_device(struct lxc_conf *conf, struct device_item *device)
{
__do_free struct lxc_list *list_elem = NULL;
__do_free struct device_item *new_device = NULL;
struct lxc_list *it;
lxc_list_for_each(it, &conf->devices) {
struct device_item *cur = it->elem;
if (cur->global_rule != -1 && device->global_rule != -1) {
TRACE("Switched from %s to %s",
cur->global_rule == 0 ? "whitelist" : "blacklist",
device->global_rule == 0 ? "whitelist"
: "blacklist");
cur->global_rule = device->global_rule;
return 1;
}
if (cur->type != device->type)
continue;
if (cur->major != device->major)
continue;
if (cur->minor != device->minor)
continue;
if (strcmp(cur->access, device->access))
continue;
/*
* The rule is switched from allow to deny or vica versa so
* don't bother allocating just flip the existing one.
*/
if (cur->allow != device->allow) {
cur->allow = device->allow;
return log_trace(0, "Switched existing rule of bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
cur->type, cur->major, cur->minor,
cur->access, cur->allow,
cur->global_rule);
}
return log_trace(1, "Reusing existing rule of bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
cur->type, cur->major, cur->minor, cur->access,
cur->allow, cur->global_rule);
}
list_elem = malloc(sizeof(*list_elem));
if (!list_elem)
return error_log_errno(ENOMEM, "Failed to allocate new device list");
new_device = memdup(device, sizeof(struct device_item));
if (!new_device)
return error_log_errno(ENOMEM, "Failed to allocate new device item");
lxc_list_add_elem(list_elem, move_ptr(new_device));
lxc_list_add_tail(&conf->devices, move_ptr(list_elem));
return 0;
}
bool bpf_devices_cgroup_supported(void)
{
const struct bpf_insn dummy[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
};
__do_bpf_program_free struct bpf_program *prog = NULL;
int ret;
if (geteuid() != 0)
return log_error(false, EINVAL,
"The bpf device cgroup requires real root");
prog = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
if (prog < 0)
return log_error(false,
errno, "Failed to allocate new bpf device cgroup program");
ret = bpf_program_add_instructions(prog, dummy, ARRAY_SIZE(dummy));
if (ret < 0)
return log_error(false,
errno, "Failed to add new instructions to bpf device cgroup program");
ret = bpf_program_load_kernel(prog, NULL, 0);
if (ret < 0)
return log_error(false,
errno, "Failed to load new bpf device cgroup program");
return log_trace(true, "The bpf device cgroup is supported");
}
#endif
......@@ -5,6 +5,7 @@
#ifndef __LXC_CGROUP2_DEVICES_H
#define __LXC_CGROUP2_DEVICES_H
#include <errno.h>
#include <fcntl.h>
#include <stdbool.h>
#include <stddef.h>
......@@ -79,53 +80,61 @@ struct bpf_program {
#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
struct bpf_program *bpf_program_new(uint32_t prog_type);
int bpf_program_init(struct bpf_program *prog);
int bpf_program_append_device(struct bpf_program *prog, char type, int major,
int minor, const char *access, int allow);
int bpf_program_append_device(struct bpf_program *prog,
struct device_item *device);
int bpf_program_finalize(struct bpf_program *prog);
int bpf_program_cgroup_attach(struct bpf_program *prog, int type,
const char *path, uint32_t flags);
int bpf_program_cgroup_detach(struct bpf_program *prog);
void bpf_program_free(struct bpf_program *prog);
void lxc_clear_cgroup2_devices(struct lxc_conf *conf);
static inline void __do_bpf_program_free(struct bpf_program **prog)
bool bpf_devices_cgroup_supported(void);
static inline void __auto_bpf_program_free__(struct bpf_program **prog)
{
if (*prog) {
bpf_program_free(*prog);
*prog = NULL;
}
}
int bpf_list_add_device(struct lxc_conf *conf, struct device_item *device);
#else
static inline struct bpf_program *bpf_program_new(uint32_t prog_type)
{
errno = ENOSYS;
return NULL;
}
static inline int bpf_program_init(struct bpf_program *prog)
{
return -ENOSYS;
errno = ENOSYS;
return -1;
}
static inline int bpf_program_append_device(struct bpf_program *prog, char type,
int major, int minor,
const char *access, int allow)
{
return -ENOSYS;
errno = ENOSYS;
return -1;
}
static inline int bpf_program_finalize(struct bpf_program *prog)
{
return -ENOSYS;
errno = ENOSYS;
return -1;
}
static inline int bpf_program_cgroup_attach(struct bpf_program *prog, int type,
const char *path, uint32_t flags)
{
return -ENOSYS;
errno = ENOSYS;
return -1;
}
static inline int bpf_program_cgroup_detach(struct bpf_program *prog)
{
return -ENOSYS;
errno = ENOSYS;
return -1;
}
static inline void bpf_program_free(struct bpf_program *prog)
......@@ -136,9 +145,24 @@ static inline void lxc_clear_cgroup2_devices(struct lxc_conf *conf)
{
}
static inline void __do_bpf_program_free(struct bpf_program **prog)
static inline bool bpf_devices_cgroup_supported(void)
{
return false;
}
static inline void __auto_bpf_program_free__(struct bpf_program **prog)
{
}
static inline int bpf_list_add_device(struct lxc_conf *conf,
struct device_item *device)
{
errno = ENOSYS;
return -1;
}
#endif
#define __do_bpf_program_free \
__attribute__((__cleanup__(__auto_bpf_program_free__)))
#endif /* __LXC_CGROUP2_DEVICES_H */
......@@ -39,6 +39,7 @@
#include "af_unix.h"
#include "cgroup.h"
#include "cgroups/cgroup2_devices.h"
#include "commands.h"
#include "commands_utils.h"
#include "conf.h"
......@@ -85,20 +86,21 @@ lxc_log_define(commands, lxc);
static const char *lxc_cmd_str(lxc_cmd_t cmd)
{
static const char *const cmdname[LXC_CMD_MAX] = {
[LXC_CMD_CONSOLE] = "console",
[LXC_CMD_TERMINAL_WINCH] = "terminal_winch",
[LXC_CMD_STOP] = "stop",
[LXC_CMD_GET_STATE] = "get_state",
[LXC_CMD_GET_INIT_PID] = "get_init_pid",
[LXC_CMD_GET_CLONE_FLAGS] = "get_clone_flags",
[LXC_CMD_GET_CGROUP] = "get_cgroup",
[LXC_CMD_GET_CONFIG_ITEM] = "get_config_item",
[LXC_CMD_GET_NAME] = "get_name",
[LXC_CMD_GET_LXCPATH] = "get_lxcpath",
[LXC_CMD_ADD_STATE_CLIENT] = "add_state_client",
[LXC_CMD_CONSOLE_LOG] = "console_log",
[LXC_CMD_SERVE_STATE_CLIENTS] = "serve_state_clients",
[LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER] = "seccomp_notify_add_listener",
[LXC_CMD_CONSOLE] = "console",
[LXC_CMD_TERMINAL_WINCH] = "terminal_winch",
[LXC_CMD_STOP] = "stop",
[LXC_CMD_GET_STATE] = "get_state",
[LXC_CMD_GET_INIT_PID] = "get_init_pid",
[LXC_CMD_GET_CLONE_FLAGS] = "get_clone_flags",
[LXC_CMD_GET_CGROUP] = "get_cgroup",
[LXC_CMD_GET_CONFIG_ITEM] = "get_config_item",
[LXC_CMD_GET_NAME] = "get_name",
[LXC_CMD_GET_LXCPATH] = "get_lxcpath",
[LXC_CMD_ADD_STATE_CLIENT] = "add_state_client",
[LXC_CMD_CONSOLE_LOG] = "console_log",
[LXC_CMD_SERVE_STATE_CLIENTS] = "serve_state_clients",
[LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER] = "seccomp_notify_add_listener",
[LXC_CMD_ADD_BPF_DEVICE_CGROUP] = "add_bpf_device_cgroup",
};
if (cmd >= LXC_CMD_MAX)
......@@ -925,6 +927,118 @@ reap_client_fd:
return 1;
}
int lxc_cmd_add_bpf_device_cgroup(const char *name, const char *lxcpath,
struct device_item *device)
{
#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
int stopped = 0;
struct lxc_cmd_rr cmd = {
.req = {
.cmd = LXC_CMD_ADD_BPF_DEVICE_CGROUP,
.data = device,
.datalen = sizeof(struct device_item),
},
};
int ret;
if (strlen(device->access) > STRLITERALLEN("rwm"))
return error_log_errno(EINVAL, "Invalid access mode specified %s",
device->access);
ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
if (ret < 0 || cmd.rsp.ret < 0)
return error_log_errno(errno, "Failed to add new bpf device cgroup rule");
return 0;
#else
return minus_one_set_errno(ENOSYS);
#endif
}
static int lxc_cmd_add_bpf_device_cgroup_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
{
#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
__do_bpf_program_free struct bpf_program *devices = NULL;
struct lxc_cmd_rsp rsp = {0};
struct lxc_conf *conf = handler->conf;
struct hierarchy *unified = handler->cgroup_ops->unified;
struct lxc_list *list_elem = NULL;
struct device_item *new_device = NULL;
int ret;
struct lxc_list *it;
struct device_item *device;
struct bpf_program *devices_old;
if (req->datalen <= 0)
goto reap_client_fd;
if (req->datalen != sizeof(struct device_item))
goto reap_client_fd;
if (!req->data)
goto reap_client_fd;
device = (struct device_item *)req->data;
rsp.ret = -1;
if (!unified)
goto respond;
ret = bpf_list_add_device(conf, device);
if (ret < 0)
goto respond;
devices = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
if (!devices)
goto respond;
ret = bpf_program_init(devices);
if (ret)
goto respond;
lxc_list_for_each(it, &conf->devices) {
struct device_item *cur = it->elem;
ret = bpf_program_append_device(devices, cur);
if (ret)
goto respond;
}
ret = bpf_program_finalize(devices);
if (ret)
goto respond;
ret = bpf_program_cgroup_attach(devices, BPF_CGROUP_DEVICE,
unified->container_full_path,
BPF_F_ALLOW_MULTI);
if (ret)
goto respond;
/* Replace old bpf program. */
devices_old = move_ptr(conf->cgroup2_devices);
conf->cgroup2_devices = move_ptr(devices);
devices = move_ptr(devices_old);
rsp.ret = 0;
respond:
ret = lxc_cmd_rsp_send(fd, &rsp);
if (ret < 0)
goto reap_client_fd;
return 0;
reap_client_fd:
/* Special indicator to lxc_cmd_handler() to close the fd and do related
* cleanup.
*/
return 1;
#else
return minus_one_set_errno(ENOSYS);
#endif
}
int lxc_cmd_console_log(const char *name, const char *lxcpath,
struct lxc_console_log *log)
{
......@@ -1086,20 +1200,21 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
struct lxc_epoll_descr *);
callback cb[LXC_CMD_MAX] = {
[LXC_CMD_CONSOLE] = lxc_cmd_console_callback,
[LXC_CMD_TERMINAL_WINCH] = lxc_cmd_terminal_winch_callback,
[LXC_CMD_STOP] = lxc_cmd_stop_callback,
[LXC_CMD_GET_STATE] = lxc_cmd_get_state_callback,
[LXC_CMD_GET_INIT_PID] = lxc_cmd_get_init_pid_callback,
[LXC_CMD_GET_CLONE_FLAGS] = lxc_cmd_get_clone_flags_callback,
[LXC_CMD_GET_CGROUP] = lxc_cmd_get_cgroup_callback,
[LXC_CMD_GET_CONFIG_ITEM] = lxc_cmd_get_config_item_callback,
[LXC_CMD_GET_NAME] = lxc_cmd_get_name_callback,
[LXC_CMD_GET_LXCPATH] = lxc_cmd_get_lxcpath_callback,
[LXC_CMD_ADD_STATE_CLIENT] = lxc_cmd_add_state_client_callback,
[LXC_CMD_CONSOLE_LOG] = lxc_cmd_console_log_callback,
[LXC_CMD_SERVE_STATE_CLIENTS] = lxc_cmd_serve_state_clients_callback,
[LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER] = lxc_cmd_seccomp_notify_add_listener_callback,
[LXC_CMD_CONSOLE] = lxc_cmd_console_callback,
[LXC_CMD_TERMINAL_WINCH] = lxc_cmd_terminal_winch_callback,
[LXC_CMD_STOP] = lxc_cmd_stop_callback,
[LXC_CMD_GET_STATE] = lxc_cmd_get_state_callback,
[LXC_CMD_GET_INIT_PID] = lxc_cmd_get_init_pid_callback,
[LXC_CMD_GET_CLONE_FLAGS] = lxc_cmd_get_clone_flags_callback,
[LXC_CMD_GET_CGROUP] = lxc_cmd_get_cgroup_callback,
[LXC_CMD_GET_CONFIG_ITEM] = lxc_cmd_get_config_item_callback,
[LXC_CMD_GET_NAME] = lxc_cmd_get_name_callback,
[LXC_CMD_GET_LXCPATH] = lxc_cmd_get_lxcpath_callback,
[LXC_CMD_ADD_STATE_CLIENT] = lxc_cmd_add_state_client_callback,
[LXC_CMD_CONSOLE_LOG] = lxc_cmd_console_log_callback,
[LXC_CMD_SERVE_STATE_CLIENTS] = lxc_cmd_serve_state_clients_callback,
[LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER] = lxc_cmd_seccomp_notify_add_listener_callback,
[LXC_CMD_ADD_BPF_DEVICE_CGROUP] = lxc_cmd_add_bpf_device_cgroup_callback,
};
if (req->cmd >= LXC_CMD_MAX) {
......
......@@ -47,6 +47,7 @@ typedef enum {
LXC_CMD_CONSOLE_LOG,
LXC_CMD_SERVE_STATE_CLIENTS,
LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER,
LXC_CMD_ADD_BPF_DEVICE_CGROUP,
LXC_CMD_MAX,
} lxc_cmd_t;
......@@ -131,4 +132,8 @@ extern int lxc_cmd_seccomp_notify_add_listener(const char *name,
/* unused */ unsigned int command,
/* unused */ unsigned int flags);
struct device_item;
extern int lxc_cmd_add_bpf_device_cgroup(const char *name, const char *lxcpath,
struct device_item *device);
#endif /* __commands_h */
......@@ -229,6 +229,11 @@ struct device_item {
int minor;
char access[4];
int allow;
/* -1 -> no global rule
* 0 -> whitelist (deny all)
* 1 -> blacklist (allow all)
*/
int global_rule;
};
struct lxc_conf {
......
......@@ -518,6 +518,13 @@ ATTR_UNUSED static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \
__ret__; \
})
#define log_error(__ret__, __errno__, format, ...) \
({ \
errno = __errno__; \
SYSERROR(format, ##__VA_ARGS__); \
__ret__; \
})
extern int lxc_log_fd;
extern int lxc_log_syslog(int facility);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment