Unverified Commit 3e32a626 by Stéphane Graber Committed by GitHub

Merge pull request #3195 from brauner/cgroup2_devices_fixes

cgroup2: add bpf device controller live update
parents 637de040 fda39d45
...@@ -726,6 +726,7 @@ AX_CHECK_COMPILE_FLAG([-Wlogical-op], [CFLAGS="$CFLAGS -Wlogical-op"],,[-Werror] ...@@ -726,6 +726,7 @@ AX_CHECK_COMPILE_FLAG([-Wlogical-op], [CFLAGS="$CFLAGS -Wlogical-op"],,[-Werror]
AX_CHECK_COMPILE_FLAG([-Wmissing-include-dirs], [CFLAGS="$CFLAGS -Wmissing-include-dirs"],,[-Werror]) AX_CHECK_COMPILE_FLAG([-Wmissing-include-dirs], [CFLAGS="$CFLAGS -Wmissing-include-dirs"],,[-Werror])
AX_CHECK_COMPILE_FLAG([-Wold-style-definition], [CFLAGS="$CFLAGS -Wold-style-definition"],,[-Werror]) AX_CHECK_COMPILE_FLAG([-Wold-style-definition], [CFLAGS="$CFLAGS -Wold-style-definition"],,[-Werror])
AX_CHECK_COMPILE_FLAG([-Winit-self], [CFLAGS="$CFLAGS -Winit-self"],,[-Werror]) AX_CHECK_COMPILE_FLAG([-Winit-self], [CFLAGS="$CFLAGS -Winit-self"],,[-Werror])
AX_CHECK_COMPILE_FLAG([-Wunused-but-set-variable], [CFLAGS="$CFLAGS -Wunused-but-set-variable"],,[-Werror])
AX_CHECK_COMPILE_FLAG([-Wfloat-equal], [CFLAGS="$CFLAGS -Wfloat-equal"],,[-Werror]) AX_CHECK_COMPILE_FLAG([-Wfloat-equal], [CFLAGS="$CFLAGS -Wfloat-equal"],,[-Werror])
AX_CHECK_COMPILE_FLAG([-Wsuggest-attribute=noreturn], [CFLAGS="$CFLAGS -Wsuggest-attribute=noreturn"],,[-Werror]) AX_CHECK_COMPILE_FLAG([-Wsuggest-attribute=noreturn], [CFLAGS="$CFLAGS -Wsuggest-attribute=noreturn"],,[-Werror])
AX_CHECK_COMPILE_FLAG([-Werror=return-type], [CFLAGS="$CFLAGS -Werror=return-type"],,[-Werror]) AX_CHECK_COMPILE_FLAG([-Werror=return-type], [CFLAGS="$CFLAGS -Werror=return-type"],,[-Werror])
......
...@@ -24,6 +24,8 @@ ...@@ -24,6 +24,8 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include "config.h"
/* /*
* api_extensions is the list of all API extensions in the order they were * api_extensions is the list of all API extensions in the order they were
* added. * added.
...@@ -50,6 +52,9 @@ static char *api_extensions[] = { ...@@ -50,6 +52,9 @@ static char *api_extensions[] = {
"network_gateway_device_route", "network_gateway_device_route",
"network_phys_macvlan_mtu", "network_phys_macvlan_mtu",
"network_veth_router", "network_veth_router",
#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
"cgroup2_devices",
#endif
}; };
static size_t nr_api_extensions = sizeof(api_extensions) / sizeof(*api_extensions); static size_t nr_api_extensions = sizeof(api_extensions) / sizeof(*api_extensions);
......
...@@ -176,6 +176,11 @@ static void must_append_controller(char **klist, char **nlist, char ***clist, ...@@ -176,6 +176,11 @@ static void must_append_controller(char **klist, char **nlist, char ***clist,
(*clist)[newentry] = copy; (*clist)[newentry] = copy;
} }
static inline bool pure_unified_layout(const struct cgroup_ops *ops)
{
return ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED;
}
/* Given a handler's cgroup data, return the struct hierarchy for the controller /* Given a handler's cgroup data, return the struct hierarchy for the controller
* @c, or NULL if there is none. * @c, or NULL if there is none.
*/ */
...@@ -196,8 +201,12 @@ struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *controller) ...@@ -196,8 +201,12 @@ struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *controller)
if (ops->hierarchies[i]->controllers && if (ops->hierarchies[i]->controllers &&
!ops->hierarchies[i]->controllers[0]) !ops->hierarchies[i]->controllers[0])
return ops->hierarchies[i]; return ops->hierarchies[i];
continue; continue;
} else if (pure_unified_layout(ops) &&
strcmp(controller, "devices") == 0) {
if (ops->unified->bpf_device_controller)
return ops->unified;
break;
} }
if (string_in_list(ops->hierarchies[i]->controllers, controller)) if (string_in_list(ops->hierarchies[i]->controllers, controller))
...@@ -778,9 +787,9 @@ static char **cg_unified_make_empty_controller(void) ...@@ -778,9 +787,9 @@ static char **cg_unified_make_empty_controller(void)
static char **cg_unified_get_controllers(const char *file) static char **cg_unified_get_controllers(const char *file)
{ {
__do_free char *buf = NULL; __do_free char *buf = NULL;
char *tok;
char *sep = " \t\n"; char *sep = " \t\n";
char **aret = NULL; char **aret = NULL;
char *tok;
buf = read_file(file); buf = read_file(file);
if (!buf) if (!buf)
...@@ -2278,12 +2287,117 @@ __cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename, ...@@ -2278,12 +2287,117 @@ __cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename,
return ret; return ret;
} }
static int device_cgroup_rule_parse(struct device_item *device, const char *key,
const char *val)
{
int count, ret;
char temp[50];
if (strcmp("devices.allow", key) == 0)
device->allow = 1;
else
device->allow = 0;
if (strcmp(val, "a") == 0) {
/* global rule */
device->type = 'a';
device->major = -1;
device->minor = -1;
device->global_rule = device->allow
? LXC_BPF_DEVICE_CGROUP_BLACKLIST
: LXC_BPF_DEVICE_CGROUP_WHITELIST;
device->allow = -1;
return 0;
} else {
device->global_rule = LXC_BPF_DEVICE_CGROUP_LOCAL_RULE;
}
switch (*val) {
case 'a':
__fallthrough;
case 'b':
__fallthrough;
case 'c':
device->type = *val;
break;
default:
return -1;
}
val++;
if (!isspace(*val))
return -1;
val++;
if (*val == '*') {
device->major = -1;
val++;
} else if (isdigit(*val)) {
memset(temp, 0, sizeof(temp));
for (count = 0; count < sizeof(temp) - 1; count++) {
temp[count] = *val;
val++;
if (!isdigit(*val))
break;
}
ret = lxc_safe_int(temp, &device->major);
if (ret)
return -1;
} else {
return -1;
}
if (*val != ':')
return -1;
val++;
/* read minor */
if (*val == '*') {
device->minor = -1;
val++;
} else if (isdigit(*val)) {
memset(temp, 0, sizeof(temp));
for (count = 0; count < sizeof(temp) - 1; count++) {
temp[count] = *val;
val++;
if (!isdigit(*val))
break;
}
ret = lxc_safe_int(temp, &device->minor);
if (ret)
return -1;
} else {
return -1;
}
if (!isspace(*val))
return -1;
for (val++, count = 0; count < 3; count++, val++) {
switch (*val) {
case 'r':
device->access[count] = *val;
break;
case 'w':
device->access[count] = *val;
break;
case 'm':
device->access[count] = *val;
break;
case '\n':
case '\0':
count = 3;
break;
default:
return -1;
}
}
return 0;
}
/* Called externally (i.e. from 'lxc-cgroup') to set new cgroup limits. Here we /* Called externally (i.e. from 'lxc-cgroup') to set new cgroup limits. Here we
* don't have a cgroup_data set up, so we ask the running container through the * don't have a cgroup_data set up, so we ask the running container through the
* commands API for the cgroup path. * commands API for the cgroup path.
*/ */
__cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops, __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
const char *filename, const char *value, const char *key, const char *value,
const char *name, const char *lxcpath) const char *name, const char *lxcpath)
{ {
__do_free char *path = NULL; __do_free char *path = NULL;
...@@ -2292,11 +2406,26 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops, ...@@ -2292,11 +2406,26 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
struct hierarchy *h; struct hierarchy *h;
int ret = -1; int ret = -1;
controller = must_copy_string(filename); controller = must_copy_string(key);
p = strchr(controller, '.'); p = strchr(controller, '.');
if (p) if (p)
*p = '\0'; *p = '\0';
if (pure_unified_layout(ops) && strcmp(controller, "devices") == 0) {
struct device_item device = {0};
ret = device_cgroup_rule_parse(&device, key, value);
if (ret < 0)
return error_log_errno(EINVAL, "Failed to parse device string %s=%s",
key, value);
ret = lxc_cmd_add_bpf_device_cgroup(name, lxcpath, &device);
if (ret < 0)
return -1;
return 0;
}
path = lxc_cmd_get_cgroup_path(name, lxcpath, controller); path = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
/* not running */ /* not running */
if (!path) if (!path)
...@@ -2306,7 +2435,7 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops, ...@@ -2306,7 +2435,7 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
if (h) { if (h) {
__do_free char *fullpath = NULL; __do_free char *fullpath = NULL;
fullpath = build_full_cgpath_from_monitorpath(h, path, filename); fullpath = build_full_cgpath_from_monitorpath(h, path, key);
ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666); ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
} }
...@@ -2485,135 +2614,23 @@ out: ...@@ -2485,135 +2614,23 @@ out:
* Some of the parsing logic comes from the original cgroup device v1 * Some of the parsing logic comes from the original cgroup device v1
* implementation in the kernel. * implementation in the kernel.
*/ */
static int bpf_device_cgroup_prepare(struct lxc_conf *conf, const char *key, static int bpf_device_cgroup_prepare(struct cgroup_ops *ops,
struct lxc_conf *conf, const char *key,
const char *val) const char *val)
{ {
#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX #ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
struct device_item { struct device_item device_item = {0};
char type; int ret;
int major;
int minor;
char access[100];
int allow;
} device_item = {0};
int count, ret;
char temp[50];
struct bpf_program *device;
if (conf->cgroup2_devices) {
device = conf->cgroup2_devices;
} else {
device = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
if (device && bpf_program_init(device)) {
ERROR("Failed to initialize bpf program");
return -1;
}
}
if (!device) {
ERROR("Failed to create new ebpf device program");
return -1;
}
conf->cgroup2_devices = device;
if (strcmp("devices.allow", key) == 0)
device_item.allow = 1;
if (strcmp(val, "a") == 0) {
device->blacklist = (device_item.allow == 1);
return 0;
}
switch (*val) {
case 'a':
__fallthrough;
case 'b':
__fallthrough;
case 'c':
device_item.type = *val;
break;
default:
return -1;
}
val++;
if (!isspace(*val))
return -1;
val++;
if (*val == '*') {
device_item.major = ~0;
val++;
} else if (isdigit(*val)) {
memset(temp, 0, sizeof(temp));
for (count = 0; count < sizeof(temp) - 1; count++) {
temp[count] = *val;
val++;
if (!isdigit(*val))
break;
}
ret = lxc_safe_uint(temp, &device_item.major);
if (ret)
return -1;
} else {
return -1;
}
if (*val != ':')
return -1;
val++;
/* read minor */ ret = device_cgroup_rule_parse(&device_item, key, val);
if (*val == '*') { if (ret < 0)
device_item.minor = ~0; return error_log_errno(EINVAL,
val++; "Failed to parse device string %s=%s",
} else if (isdigit(*val)) { key, val);
memset(temp, 0, sizeof(temp));
for (count = 0; count < sizeof(temp) - 1; count++) {
temp[count] = *val;
val++;
if (!isdigit(*val))
break;
}
ret = lxc_safe_uint(temp, &device_item.minor);
if (ret)
return -1;
} else {
return -1;
}
if (!isspace(*val))
return -1;
for (val++, count = 0; count < 3; count++, val++) {
switch (*val) {
case 'r':
device_item.access[count] = *val;
break;
case 'w':
device_item.access[count] = *val;
break;
case 'm':
device_item.access[count] = *val;
break;
case '\n':
case '\0':
count = 3;
break;
default:
return -1;
}
}
ret = bpf_program_append_device(device, device_item.type, device_item.major, ret = bpf_list_add_device(conf, &device_item);
device_item.minor, device_item.access, if (ret < 0)
device_item.allow);
if (ret) {
ERROR("Failed to add new rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d",
device_item.type, device_item.major, device_item.minor,
device_item.access, device_item.allow);
return -1; return -1;
} else {
TRACE("Added new rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d",
device_item.type, device_item.major, device_item.minor,
device_item.access, device_item.allow);
}
#endif #endif
return 0; return 0;
} }
...@@ -2637,7 +2654,7 @@ static bool __cg_unified_setup_limits(struct cgroup_ops *ops, ...@@ -2637,7 +2654,7 @@ static bool __cg_unified_setup_limits(struct cgroup_ops *ops,
struct lxc_cgroup *cg = iterator->elem; struct lxc_cgroup *cg = iterator->elem;
if (strncmp("devices", cg->subsystem, 7) == 0) { if (strncmp("devices", cg->subsystem, 7) == 0) {
ret = bpf_device_cgroup_prepare(conf, cg->subsystem, ret = bpf_device_cgroup_prepare(ops, conf, cg->subsystem,
cg->value); cg->value);
} else { } else {
fullpath = must_make_path(h->container_full_path, fullpath = must_make_path(h->container_full_path,
...@@ -2661,26 +2678,59 @@ __cgfsng_ops bool cgfsng_devices_activate(struct cgroup_ops *ops, ...@@ -2661,26 +2678,59 @@ __cgfsng_ops bool cgfsng_devices_activate(struct cgroup_ops *ops,
struct lxc_handler *handler) struct lxc_handler *handler)
{ {
#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX #ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
__do_bpf_program_free struct bpf_program *devices = NULL;
struct lxc_conf *conf = handler->conf;
struct hierarchy *unified = ops->unified;
int ret; int ret;
struct hierarchy *h = ops->unified; struct lxc_list *it;
struct bpf_program *device = handler->conf->cgroup2_devices; struct bpf_program *devices_old;
if (!h) if (!unified)
return false; return false;
if (!device) if (lxc_list_empty(&conf->devices))
return true; return true;
ret = bpf_program_finalize(device); devices = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
if (!devices)
return log_error(false, ENOMEM,
"Failed to create new bpf program");
ret = bpf_program_init(devices);
if (ret) if (ret)
return false; return log_error(false, ENOMEM,
"Failed to initialize bpf program");
return bpf_program_cgroup_attach(device, BPF_CGROUP_DEVICE, lxc_list_for_each(it, &conf->devices) {
h->container_full_path, struct device_item *cur = it->elem;
BPF_F_ALLOW_MULTI) == 0;
#else ret = bpf_program_append_device(devices, cur);
return true; if (ret)
return log_error(false,
ENOMEM, "Failed to add new rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
cur->type, cur->major, cur->minor,
cur->access, cur->allow, cur->global_rule);
TRACE("Added rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
cur->type, cur->major, cur->minor, cur->access,
cur->allow, cur->global_rule);
}
ret = bpf_program_finalize(devices);
if (ret)
return log_error(false, ENOMEM, "Failed to finalize bpf program");
ret = bpf_program_cgroup_attach(devices, BPF_CGROUP_DEVICE,
unified->container_full_path,
BPF_F_ALLOW_MULTI);
if (ret)
return log_error(false, ENOMEM, "Failed to attach bpf program");
/* Replace old bpf program. */
devices_old = move_ptr(conf->cgroup2_devices);
conf->cgroup2_devices = move_ptr(devices);
devices = move_ptr(devices_old);
#endif #endif
return true;
} }
__cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops, __cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops,
...@@ -2989,6 +3039,9 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative, ...@@ -2989,6 +3039,9 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative,
if (!unprivileged) if (!unprivileged)
cg_unified_delegate(&new->cgroup2_chown); cg_unified_delegate(&new->cgroup2_chown);
if (bpf_devices_cgroup_supported())
new->bpf_device_controller = 1;
ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED; ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
ops->unified = new; ops->unified = new;
return CGROUP2_SUPER_MAGIC; return CGROUP2_SUPER_MAGIC;
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <unistd.h> #include <unistd.h>
#include "cgroup.h" #include "cgroup.h"
#include "cgroup2_devices.h"
#include "conf.h" #include "conf.h"
#include "config.h" #include "config.h"
#include "initutils.h" #include "initutils.h"
...@@ -86,6 +87,9 @@ void cgroup_exit(struct cgroup_ops *ops) ...@@ -86,6 +87,9 @@ void cgroup_exit(struct cgroup_ops *ops)
free(ops->cgroup_pattern); free(ops->cgroup_pattern);
free(ops->container_cgroup); free(ops->container_cgroup);
if (ops->cgroup2_devices)
bpf_program_free(ops->cgroup2_devices);
for (it = ops->hierarchies; it && *it; it++) { for (it = ops->hierarchies; it && *it; it++) {
char **p; char **p;
......
...@@ -92,6 +92,7 @@ struct hierarchy { ...@@ -92,6 +92,7 @@ struct hierarchy {
char *container_full_path; char *container_full_path;
char *monitor_full_path; char *monitor_full_path;
int version; int version;
int bpf_device_controller:1;
}; };
struct cgroup_ops { struct cgroup_ops {
...@@ -119,6 +120,13 @@ struct cgroup_ops { ...@@ -119,6 +120,13 @@ struct cgroup_ops {
struct hierarchy *unified; struct hierarchy *unified;
/* /*
* @cgroup2_devices
* bpf program to limit device access; only applicable to privileged
* containers.
*/
struct bpf_program *cgroup2_devices;
/*
* @cgroup_layout * @cgroup_layout
* - What cgroup layout the container is running with. * - What cgroup layout the container is running with.
* - CGROUP_LAYOUT_UNKNOWN * - CGROUP_LAYOUT_UNKNOWN
......
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
#endif #endif
#include <errno.h> #include <errno.h>
#include <fcntl.h> #include <fcntl.h>
#include <linux/filter.h>
#include <stdbool.h> #include <stdbool.h>
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
...@@ -24,6 +23,7 @@ ...@@ -24,6 +23,7 @@
#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX #ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
#include <linux/bpf.h> #include <linux/bpf.h>
#include <linux/filter.h>
lxc_log_define(cgroup2_devices, cgroup); lxc_log_define(cgroup2_devices, cgroup);
...@@ -51,6 +51,9 @@ static int bpf_program_add_instructions(struct bpf_program *prog, ...@@ -51,6 +51,9 @@ static int bpf_program_add_instructions(struct bpf_program *prog,
void bpf_program_free(struct bpf_program *prog) void bpf_program_free(struct bpf_program *prog)
{ {
if (!prog)
return;
(void)bpf_program_cgroup_detach(prog); (void)bpf_program_cgroup_detach(prog);
if (prog->kernel_fd >= 0) if (prog->kernel_fd >= 0)
...@@ -170,12 +173,19 @@ struct bpf_program *bpf_program_new(uint32_t prog_type) ...@@ -170,12 +173,19 @@ struct bpf_program *bpf_program_new(uint32_t prog_type)
prog->prog_type = prog_type; prog->prog_type = prog_type;
prog->kernel_fd = -EBADF; prog->kernel_fd = -EBADF;
/*
* By default a whitelist is used unless the user tells us otherwise.
*/
prog->device_list_type = LXC_BPF_DEVICE_CGROUP_WHITELIST;
return move_ptr(prog); return move_ptr(prog);
} }
int bpf_program_init(struct bpf_program *prog) int bpf_program_init(struct bpf_program *prog)
{ {
if (!prog)
return minus_one_set_errno(EINVAL);
const struct bpf_insn pre_insn[] = { const struct bpf_insn pre_insn[] = {
/* load device type to r2 */ /* load device type to r2 */
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct bpf_cgroup_dev_ctx, access_type)), BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct bpf_cgroup_dev_ctx, access_type)),
...@@ -195,33 +205,41 @@ int bpf_program_init(struct bpf_program *prog) ...@@ -195,33 +205,41 @@ int bpf_program_init(struct bpf_program *prog)
return bpf_program_add_instructions(prog, pre_insn, ARRAY_SIZE(pre_insn)); return bpf_program_add_instructions(prog, pre_insn, ARRAY_SIZE(pre_insn));
} }
int bpf_program_append_device(struct bpf_program *prog, char type, int major, int bpf_program_append_device(struct bpf_program *prog, struct device_item *device)
int minor, const char *access, int allow)
{ {
int ret; int ret;
int jump_nr = 1; int jump_nr = 1;
struct bpf_insn bpf_access_decision[] = { struct bpf_insn bpf_access_decision[] = {
BPF_MOV64_IMM(BPF_REG_0, allow), BPF_MOV64_IMM(BPF_REG_0, device->allow),
BPF_EXIT_INSN(), BPF_EXIT_INSN(),
}; };
int access_mask; int access_mask;
int device_type; int device_type;
device_type = bpf_device_type(type); if (!prog || !device)
return minus_one_set_errno(EINVAL);
/* This is a global rule so no need to append anything. */
if (device->global_rule > LXC_BPF_DEVICE_CGROUP_LOCAL_RULE) {
prog->device_list_type = device->global_rule;
return 0;
}
device_type = bpf_device_type(device->type);
if (device_type < 0) if (device_type < 0)
return error_log_errno(EINVAL, "Invalid bpf cgroup device type %c", type); return error_log_errno(EINVAL, "Invalid bpf cgroup device type %c", device->type);
if (device_type > 0) if (device_type > 0)
jump_nr++; jump_nr++;
access_mask = bpf_access_mask(access); access_mask = bpf_access_mask(device->access);
if (!bpf_device_all_access(access_mask)) if (!bpf_device_all_access(access_mask))
jump_nr += 3; jump_nr += 3;
if (major >= 0) if (device->major != -1)
jump_nr++; jump_nr++;
if (minor >= 0) if (device->minor != -1)
jump_nr++; jump_nr++;
if (device_type > 0) { if (device_type > 0) {
...@@ -247,9 +265,9 @@ int bpf_program_append_device(struct bpf_program *prog, char type, int major, ...@@ -247,9 +265,9 @@ int bpf_program_append_device(struct bpf_program *prog, char type, int major,
return error_log_errno(errno, "Failed to add instructions to bpf cgroup program"); return error_log_errno(errno, "Failed to add instructions to bpf cgroup program");
} }
if (major >= 0) { if (device->major >= 0) {
struct bpf_insn ins[] = { struct bpf_insn ins[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, jump_nr--), BPF_JMP_IMM(BPF_JNE, BPF_REG_4, device->major, jump_nr--),
}; };
ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins)); ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins));
...@@ -257,9 +275,9 @@ int bpf_program_append_device(struct bpf_program *prog, char type, int major, ...@@ -257,9 +275,9 @@ int bpf_program_append_device(struct bpf_program *prog, char type, int major,
return error_log_errno(errno, "Failed to add instructions to bpf cgroup program"); return error_log_errno(errno, "Failed to add instructions to bpf cgroup program");
} }
if (minor >= 0) { if (device->minor >= 0) {
struct bpf_insn ins[] = { struct bpf_insn ins[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, jump_nr--), BPF_JMP_IMM(BPF_JNE, BPF_REG_5, device->minor, jump_nr--),
}; };
ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins)); ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins));
...@@ -278,12 +296,17 @@ int bpf_program_append_device(struct bpf_program *prog, char type, int major, ...@@ -278,12 +296,17 @@ int bpf_program_append_device(struct bpf_program *prog, char type, int major,
int bpf_program_finalize(struct bpf_program *prog) int bpf_program_finalize(struct bpf_program *prog)
{ {
struct bpf_insn ins[] = { struct bpf_insn ins[] = {
BPF_MOV64_IMM(BPF_REG_0, prog->blacklist ? 1 : 0), BPF_MOV64_IMM(BPF_REG_0, prog->device_list_type),
BPF_EXIT_INSN(), BPF_EXIT_INSN(),
}; };
if (!prog)
return minus_one_set_errno(EINVAL);
TRACE("Implementing %s bpf device cgroup program", TRACE("Implementing %s bpf device cgroup program",
prog->blacklist ? "blacklist" : "whitelist"); prog->device_list_type == LXC_BPF_DEVICE_CGROUP_BLACKLIST
? "blacklist"
: "whitelist");
return bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins)); return bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins));
} }
...@@ -322,6 +345,9 @@ int bpf_program_cgroup_attach(struct bpf_program *prog, int type, ...@@ -322,6 +345,9 @@ int bpf_program_cgroup_attach(struct bpf_program *prog, int type,
union bpf_attr attr; union bpf_attr attr;
int ret; int ret;
if (!prog)
return minus_one_set_errno(EINVAL);
if (flags & ~(BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI)) if (flags & ~(BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI))
return error_log_errno(EINVAL, "Invalid flags for bpf program"); return error_log_errno(EINVAL, "Invalid flags for bpf program");
...@@ -359,7 +385,7 @@ int bpf_program_cgroup_attach(struct bpf_program *prog, int type, ...@@ -359,7 +385,7 @@ int bpf_program_cgroup_attach(struct bpf_program *prog, int type,
if (ret < 0) if (ret < 0)
return error_log_errno(errno, "Failed to attach bpf program"); return error_log_errno(errno, "Failed to attach bpf program");
free_and_replace(prog->attached_path, copy); free_replace_move_ptr(prog->attached_path, copy);
prog->attached_type = type; prog->attached_type = type;
prog->attached_flags = flags; prog->attached_flags = flags;
...@@ -411,4 +437,97 @@ void lxc_clear_cgroup2_devices(struct lxc_conf *conf) ...@@ -411,4 +437,97 @@ void lxc_clear_cgroup2_devices(struct lxc_conf *conf)
(void)bpf_program_free(conf->cgroup2_devices); (void)bpf_program_free(conf->cgroup2_devices);
} }
} }
int bpf_list_add_device(struct lxc_conf *conf, struct device_item *device)
{
__do_free struct lxc_list *list_elem = NULL;
__do_free struct device_item *new_device = NULL;
struct lxc_list *it;
lxc_list_for_each(it, &conf->devices) {
struct device_item *cur = it->elem;
if (cur->global_rule != -1 && device->global_rule != -1) {
TRACE("Switched from %s to %s",
cur->global_rule == LXC_BPF_DEVICE_CGROUP_WHITELIST
? "whitelist"
: "blacklist",
device->global_rule == LXC_BPF_DEVICE_CGROUP_WHITELIST
? "whitelist"
: "blacklist");
cur->global_rule = device->global_rule;
return 1;
}
if (cur->type != device->type)
continue;
if (cur->major != device->major)
continue;
if (cur->minor != device->minor)
continue;
if (strcmp(cur->access, device->access))
continue;
/*
* The rule is switched from allow to deny or vica versa so
* don't bother allocating just flip the existing one.
*/
if (cur->allow != device->allow) {
cur->allow = device->allow;
return log_trace(0, "Switched existing rule of bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
cur->type, cur->major, cur->minor,
cur->access, cur->allow,
cur->global_rule);
}
return log_trace(1, "Reusing existing rule of bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
cur->type, cur->major, cur->minor, cur->access,
cur->allow, cur->global_rule);
}
list_elem = malloc(sizeof(*list_elem));
if (!list_elem)
return error_log_errno(ENOMEM, "Failed to allocate new device list");
new_device = memdup(device, sizeof(struct device_item));
if (!new_device)
return error_log_errno(ENOMEM, "Failed to allocate new device item");
lxc_list_add_elem(list_elem, move_ptr(new_device));
lxc_list_add_tail(&conf->devices, move_ptr(list_elem));
return 0;
}
bool bpf_devices_cgroup_supported(void)
{
const struct bpf_insn dummy[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
};
__do_bpf_program_free struct bpf_program *prog = NULL;
int ret;
if (geteuid() != 0)
return log_error(false, EINVAL,
"The bpf device cgroup requires real root");
prog = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
if (prog < 0)
return log_error(false,
errno, "Failed to allocate new bpf device cgroup program");
ret = bpf_program_add_instructions(prog, dummy, ARRAY_SIZE(dummy));
if (ret < 0)
return log_error(false,
errno, "Failed to add new instructions to bpf device cgroup program");
ret = bpf_program_load_kernel(prog, NULL, 0);
if (ret < 0)
return log_error(false,
errno, "Failed to load new bpf device cgroup program");
return log_trace(true, "The bpf device cgroup is supported");
}
#endif #endif
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#ifndef __LXC_CGROUP2_DEVICES_H #ifndef __LXC_CGROUP2_DEVICES_H
#define __LXC_CGROUP2_DEVICES_H #define __LXC_CGROUP2_DEVICES_H
#include <errno.h>
#include <fcntl.h> #include <fcntl.h>
#include <stdbool.h> #include <stdbool.h>
#include <stddef.h> #include <stddef.h>
...@@ -62,7 +63,7 @@ static inline int missing_bpf(int cmd, union bpf_attr *attr, size_t size) ...@@ -62,7 +63,7 @@ static inline int missing_bpf(int cmd, union bpf_attr *attr, size_t size)
#endif #endif
struct bpf_program { struct bpf_program {
bool blacklist; int device_list_type;
int kernel_fd; int kernel_fd;
uint32_t prog_type; uint32_t prog_type;
...@@ -79,53 +80,61 @@ struct bpf_program { ...@@ -79,53 +80,61 @@ struct bpf_program {
#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX #ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
struct bpf_program *bpf_program_new(uint32_t prog_type); struct bpf_program *bpf_program_new(uint32_t prog_type);
int bpf_program_init(struct bpf_program *prog); int bpf_program_init(struct bpf_program *prog);
int bpf_program_append_device(struct bpf_program *prog, char type, int major, int bpf_program_append_device(struct bpf_program *prog,
int minor, const char *access, int allow); struct device_item *device);
int bpf_program_finalize(struct bpf_program *prog); int bpf_program_finalize(struct bpf_program *prog);
int bpf_program_cgroup_attach(struct bpf_program *prog, int type, int bpf_program_cgroup_attach(struct bpf_program *prog, int type,
const char *path, uint32_t flags); const char *path, uint32_t flags);
int bpf_program_cgroup_detach(struct bpf_program *prog); int bpf_program_cgroup_detach(struct bpf_program *prog);
void bpf_program_free(struct bpf_program *prog); void bpf_program_free(struct bpf_program *prog);
void lxc_clear_cgroup2_devices(struct lxc_conf *conf); void lxc_clear_cgroup2_devices(struct lxc_conf *conf);
static inline void __do_bpf_program_free(struct bpf_program **prog) bool bpf_devices_cgroup_supported(void);
static inline void __auto_bpf_program_free__(struct bpf_program **prog)
{ {
if (*prog) { if (*prog) {
bpf_program_free(*prog); bpf_program_free(*prog);
*prog = NULL; *prog = NULL;
} }
} }
int bpf_list_add_device(struct lxc_conf *conf, struct device_item *device);
#else #else
static inline struct bpf_program *bpf_program_new(uint32_t prog_type) static inline struct bpf_program *bpf_program_new(uint32_t prog_type)
{ {
errno = ENOSYS;
return NULL; return NULL;
} }
static inline int bpf_program_init(struct bpf_program *prog) static inline int bpf_program_init(struct bpf_program *prog)
{ {
return -ENOSYS; errno = ENOSYS;
return -1;
} }
static inline int bpf_program_append_device(struct bpf_program *prog, char type, static inline int bpf_program_append_device(struct bpf_program *prog, char type,
int major, int minor, int major, int minor,
const char *access, int allow) const char *access, int allow)
{ {
return -ENOSYS; errno = ENOSYS;
return -1;
} }
static inline int bpf_program_finalize(struct bpf_program *prog) static inline int bpf_program_finalize(struct bpf_program *prog)
{ {
return -ENOSYS; errno = ENOSYS;
return -1;
} }
static inline int bpf_program_cgroup_attach(struct bpf_program *prog, int type, static inline int bpf_program_cgroup_attach(struct bpf_program *prog, int type,
const char *path, uint32_t flags) const char *path, uint32_t flags)
{ {
return -ENOSYS; errno = ENOSYS;
return -1;
} }
static inline int bpf_program_cgroup_detach(struct bpf_program *prog) static inline int bpf_program_cgroup_detach(struct bpf_program *prog)
{ {
return -ENOSYS; errno = ENOSYS;
return -1;
} }
static inline void bpf_program_free(struct bpf_program *prog) static inline void bpf_program_free(struct bpf_program *prog)
...@@ -136,9 +145,24 @@ static inline void lxc_clear_cgroup2_devices(struct lxc_conf *conf) ...@@ -136,9 +145,24 @@ static inline void lxc_clear_cgroup2_devices(struct lxc_conf *conf)
{ {
} }
static inline void __do_bpf_program_free(struct bpf_program **prog) static inline bool bpf_devices_cgroup_supported(void)
{
return false;
}
static inline void __auto_bpf_program_free__(struct bpf_program **prog)
{
}
static inline int bpf_list_add_device(struct lxc_conf *conf,
struct device_item *device)
{ {
errno = ENOSYS;
return -1;
} }
#endif #endif
#define __do_bpf_program_free \
__attribute__((__cleanup__(__auto_bpf_program_free__)))
#endif /* __LXC_CGROUP2_DEVICES_H */ #endif /* __LXC_CGROUP2_DEVICES_H */
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include "af_unix.h" #include "af_unix.h"
#include "cgroup.h" #include "cgroup.h"
#include "cgroups/cgroup2_devices.h"
#include "commands.h" #include "commands.h"
#include "commands_utils.h" #include "commands_utils.h"
#include "conf.h" #include "conf.h"
...@@ -85,20 +86,21 @@ lxc_log_define(commands, lxc); ...@@ -85,20 +86,21 @@ lxc_log_define(commands, lxc);
static const char *lxc_cmd_str(lxc_cmd_t cmd) static const char *lxc_cmd_str(lxc_cmd_t cmd)
{ {
static const char *const cmdname[LXC_CMD_MAX] = { static const char *const cmdname[LXC_CMD_MAX] = {
[LXC_CMD_CONSOLE] = "console", [LXC_CMD_CONSOLE] = "console",
[LXC_CMD_TERMINAL_WINCH] = "terminal_winch", [LXC_CMD_TERMINAL_WINCH] = "terminal_winch",
[LXC_CMD_STOP] = "stop", [LXC_CMD_STOP] = "stop",
[LXC_CMD_GET_STATE] = "get_state", [LXC_CMD_GET_STATE] = "get_state",
[LXC_CMD_GET_INIT_PID] = "get_init_pid", [LXC_CMD_GET_INIT_PID] = "get_init_pid",
[LXC_CMD_GET_CLONE_FLAGS] = "get_clone_flags", [LXC_CMD_GET_CLONE_FLAGS] = "get_clone_flags",
[LXC_CMD_GET_CGROUP] = "get_cgroup", [LXC_CMD_GET_CGROUP] = "get_cgroup",
[LXC_CMD_GET_CONFIG_ITEM] = "get_config_item", [LXC_CMD_GET_CONFIG_ITEM] = "get_config_item",
[LXC_CMD_GET_NAME] = "get_name", [LXC_CMD_GET_NAME] = "get_name",
[LXC_CMD_GET_LXCPATH] = "get_lxcpath", [LXC_CMD_GET_LXCPATH] = "get_lxcpath",
[LXC_CMD_ADD_STATE_CLIENT] = "add_state_client", [LXC_CMD_ADD_STATE_CLIENT] = "add_state_client",
[LXC_CMD_CONSOLE_LOG] = "console_log", [LXC_CMD_CONSOLE_LOG] = "console_log",
[LXC_CMD_SERVE_STATE_CLIENTS] = "serve_state_clients", [LXC_CMD_SERVE_STATE_CLIENTS] = "serve_state_clients",
[LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER] = "seccomp_notify_add_listener", [LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER] = "seccomp_notify_add_listener",
[LXC_CMD_ADD_BPF_DEVICE_CGROUP] = "add_bpf_device_cgroup",
}; };
if (cmd >= LXC_CMD_MAX) if (cmd >= LXC_CMD_MAX)
...@@ -925,6 +927,118 @@ reap_client_fd: ...@@ -925,6 +927,118 @@ reap_client_fd:
return 1; return 1;
} }
int lxc_cmd_add_bpf_device_cgroup(const char *name, const char *lxcpath,
struct device_item *device)
{
#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
int stopped = 0;
struct lxc_cmd_rr cmd = {
.req = {
.cmd = LXC_CMD_ADD_BPF_DEVICE_CGROUP,
.data = device,
.datalen = sizeof(struct device_item),
},
};
int ret;
if (strlen(device->access) > STRLITERALLEN("rwm"))
return error_log_errno(EINVAL, "Invalid access mode specified %s",
device->access);
ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
if (ret < 0 || cmd.rsp.ret < 0)
return error_log_errno(errno, "Failed to add new bpf device cgroup rule");
return 0;
#else
return minus_one_set_errno(ENOSYS);
#endif
}
static int lxc_cmd_add_bpf_device_cgroup_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
{
#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
__do_bpf_program_free struct bpf_program *devices = NULL;
struct lxc_cmd_rsp rsp = {0};
struct lxc_conf *conf = handler->conf;
struct hierarchy *unified = handler->cgroup_ops->unified;
struct lxc_list *list_elem = NULL;
struct device_item *new_device = NULL;
int ret;
struct lxc_list *it;
struct device_item *device;
struct bpf_program *devices_old;
if (req->datalen <= 0)
goto reap_client_fd;
if (req->datalen != sizeof(struct device_item))
goto reap_client_fd;
if (!req->data)
goto reap_client_fd;
device = (struct device_item *)req->data;
rsp.ret = -1;
if (!unified)
goto respond;
ret = bpf_list_add_device(conf, device);
if (ret < 0)
goto respond;
devices = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
if (!devices)
goto respond;
ret = bpf_program_init(devices);
if (ret)
goto respond;
lxc_list_for_each(it, &conf->devices) {
struct device_item *cur = it->elem;
ret = bpf_program_append_device(devices, cur);
if (ret)
goto respond;
}
ret = bpf_program_finalize(devices);
if (ret)
goto respond;
ret = bpf_program_cgroup_attach(devices, BPF_CGROUP_DEVICE,
unified->container_full_path,
BPF_F_ALLOW_MULTI);
if (ret)
goto respond;
/* Replace old bpf program. */
devices_old = move_ptr(conf->cgroup2_devices);
conf->cgroup2_devices = move_ptr(devices);
devices = move_ptr(devices_old);
rsp.ret = 0;
respond:
ret = lxc_cmd_rsp_send(fd, &rsp);
if (ret < 0)
goto reap_client_fd;
return 0;
reap_client_fd:
/* Special indicator to lxc_cmd_handler() to close the fd and do related
* cleanup.
*/
return 1;
#else
return minus_one_set_errno(ENOSYS);
#endif
}
int lxc_cmd_console_log(const char *name, const char *lxcpath, int lxc_cmd_console_log(const char *name, const char *lxcpath,
struct lxc_console_log *log) struct lxc_console_log *log)
{ {
...@@ -1084,7 +1198,6 @@ static int lxc_cmd_seccomp_notify_add_listener_callback(int fd, ...@@ -1084,7 +1198,6 @@ static int lxc_cmd_seccomp_notify_add_listener_callback(int fd,
#ifdef HAVE_SECCOMP_NOTIFY #ifdef HAVE_SECCOMP_NOTIFY
int ret; int ret;
__do_close_prot_errno int recv_fd = -EBADF; __do_close_prot_errno int recv_fd = -EBADF;
int notify_fd = -EBADF;
ret = lxc_abstract_unix_recv_fds(fd, &recv_fd, 1, NULL, 0); ret = lxc_abstract_unix_recv_fds(fd, &recv_fd, 1, NULL, 0);
if (ret <= 0) { if (ret <= 0) {
...@@ -1105,7 +1218,7 @@ static int lxc_cmd_seccomp_notify_add_listener_callback(int fd, ...@@ -1105,7 +1218,7 @@ static int lxc_cmd_seccomp_notify_add_listener_callback(int fd,
rsp.ret = -errno; rsp.ret = -errno;
goto out; goto out;
} }
notify_fd = move_fd(recv_fd); move_fd(recv_fd);
out: out:
#else #else
...@@ -1123,20 +1236,21 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req, ...@@ -1123,20 +1236,21 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
struct lxc_epoll_descr *); struct lxc_epoll_descr *);
callback cb[LXC_CMD_MAX] = { callback cb[LXC_CMD_MAX] = {
[LXC_CMD_CONSOLE] = lxc_cmd_console_callback, [LXC_CMD_CONSOLE] = lxc_cmd_console_callback,
[LXC_CMD_TERMINAL_WINCH] = lxc_cmd_terminal_winch_callback, [LXC_CMD_TERMINAL_WINCH] = lxc_cmd_terminal_winch_callback,
[LXC_CMD_STOP] = lxc_cmd_stop_callback, [LXC_CMD_STOP] = lxc_cmd_stop_callback,
[LXC_CMD_GET_STATE] = lxc_cmd_get_state_callback, [LXC_CMD_GET_STATE] = lxc_cmd_get_state_callback,
[LXC_CMD_GET_INIT_PID] = lxc_cmd_get_init_pid_callback, [LXC_CMD_GET_INIT_PID] = lxc_cmd_get_init_pid_callback,
[LXC_CMD_GET_CLONE_FLAGS] = lxc_cmd_get_clone_flags_callback, [LXC_CMD_GET_CLONE_FLAGS] = lxc_cmd_get_clone_flags_callback,
[LXC_CMD_GET_CGROUP] = lxc_cmd_get_cgroup_callback, [LXC_CMD_GET_CGROUP] = lxc_cmd_get_cgroup_callback,
[LXC_CMD_GET_CONFIG_ITEM] = lxc_cmd_get_config_item_callback, [LXC_CMD_GET_CONFIG_ITEM] = lxc_cmd_get_config_item_callback,
[LXC_CMD_GET_NAME] = lxc_cmd_get_name_callback, [LXC_CMD_GET_NAME] = lxc_cmd_get_name_callback,
[LXC_CMD_GET_LXCPATH] = lxc_cmd_get_lxcpath_callback, [LXC_CMD_GET_LXCPATH] = lxc_cmd_get_lxcpath_callback,
[LXC_CMD_ADD_STATE_CLIENT] = lxc_cmd_add_state_client_callback, [LXC_CMD_ADD_STATE_CLIENT] = lxc_cmd_add_state_client_callback,
[LXC_CMD_CONSOLE_LOG] = lxc_cmd_console_log_callback, [LXC_CMD_CONSOLE_LOG] = lxc_cmd_console_log_callback,
[LXC_CMD_SERVE_STATE_CLIENTS] = lxc_cmd_serve_state_clients_callback, [LXC_CMD_SERVE_STATE_CLIENTS] = lxc_cmd_serve_state_clients_callback,
[LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER] = lxc_cmd_seccomp_notify_add_listener_callback, [LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER] = lxc_cmd_seccomp_notify_add_listener_callback,
[LXC_CMD_ADD_BPF_DEVICE_CGROUP] = lxc_cmd_add_bpf_device_cgroup_callback,
}; };
if (req->cmd >= LXC_CMD_MAX) { if (req->cmd >= LXC_CMD_MAX) {
......
...@@ -47,6 +47,7 @@ typedef enum { ...@@ -47,6 +47,7 @@ typedef enum {
LXC_CMD_CONSOLE_LOG, LXC_CMD_CONSOLE_LOG,
LXC_CMD_SERVE_STATE_CLIENTS, LXC_CMD_SERVE_STATE_CLIENTS,
LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER, LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER,
LXC_CMD_ADD_BPF_DEVICE_CGROUP,
LXC_CMD_MAX, LXC_CMD_MAX,
} lxc_cmd_t; } lxc_cmd_t;
...@@ -131,4 +132,8 @@ extern int lxc_cmd_seccomp_notify_add_listener(const char *name, ...@@ -131,4 +132,8 @@ extern int lxc_cmd_seccomp_notify_add_listener(const char *name,
/* unused */ unsigned int command, /* unused */ unsigned int command,
/* unused */ unsigned int flags); /* unused */ unsigned int flags);
struct device_item;
extern int lxc_cmd_add_bpf_device_cgroup(const char *name, const char *lxcpath,
struct device_item *device);
#endif /* __commands_h */ #endif /* __commands_h */
...@@ -2736,6 +2736,7 @@ struct lxc_conf *lxc_conf_init(void) ...@@ -2736,6 +2736,7 @@ struct lxc_conf *lxc_conf_init(void)
new->logfd = -1; new->logfd = -1;
lxc_list_init(&new->cgroup); lxc_list_init(&new->cgroup);
lxc_list_init(&new->cgroup2); lxc_list_init(&new->cgroup2);
lxc_list_init(&new->devices);
lxc_list_init(&new->network); lxc_list_init(&new->network);
lxc_list_init(&new->mount_list); lxc_list_init(&new->mount_list);
lxc_list_init(&new->caps); lxc_list_init(&new->caps);
...@@ -3883,6 +3884,17 @@ int lxc_clear_cgroups(struct lxc_conf *c, const char *key, int version) ...@@ -3883,6 +3884,17 @@ int lxc_clear_cgroups(struct lxc_conf *c, const char *key, int version)
return 0; return 0;
} }
static void lxc_clear_devices(struct lxc_conf *conf)
{
struct lxc_list *list = &conf->devices;
struct lxc_list *it, *next;
lxc_list_for_each_safe(it, list, next) {
lxc_list_del(it);
free(it);
}
}
int lxc_clear_limits(struct lxc_conf *c, const char *key) int lxc_clear_limits(struct lxc_conf *c, const char *key)
{ {
struct lxc_list *it, *next; struct lxc_list *it, *next;
...@@ -4119,6 +4131,7 @@ void lxc_conf_free(struct lxc_conf *conf) ...@@ -4119,6 +4131,7 @@ void lxc_conf_free(struct lxc_conf *conf)
lxc_clear_config_keepcaps(conf); lxc_clear_config_keepcaps(conf);
lxc_clear_cgroups(conf, "lxc.cgroup", CGROUP_SUPER_MAGIC); lxc_clear_cgroups(conf, "lxc.cgroup", CGROUP_SUPER_MAGIC);
lxc_clear_cgroups(conf, "lxc.cgroup2", CGROUP2_SUPER_MAGIC); lxc_clear_cgroups(conf, "lxc.cgroup2", CGROUP2_SUPER_MAGIC);
lxc_clear_devices(conf);
lxc_clear_cgroup2_devices(conf); lxc_clear_cgroup2_devices(conf);
lxc_clear_hooks(conf, "lxc.hook"); lxc_clear_hooks(conf, "lxc.hook");
lxc_clear_mount_entries(conf); lxc_clear_mount_entries(conf);
......
...@@ -230,6 +230,26 @@ struct lxc_state_client { ...@@ -230,6 +230,26 @@ struct lxc_state_client {
lxc_state_t states[MAX_STATE]; lxc_state_t states[MAX_STATE];
}; };
enum {
LXC_BPF_DEVICE_CGROUP_WHITELIST = 0,
LXC_BPF_DEVICE_CGROUP_BLACKLIST = 1,
LXC_BPF_DEVICE_CGROUP_LOCAL_RULE = -1,
};
struct device_item {
char type;
int major;
int minor;
char access[4];
int allow;
/*
* LXC_BPF_DEVICE_CGROUP_LOCAL_RULE -> no global rule
* LXC_BPF_DEVICE_CGROUP_WHITELIST -> whitelist (deny all)
* LXC_BPF_DEVICE_CGROUP_BLACKLIST -> blacklist (allow all)
*/
int global_rule;
};
struct lxc_conf { struct lxc_conf {
/* Pointer to the name of the container. Do not free! */ /* Pointer to the name of the container. Do not free! */
const char *name; const char *name;
...@@ -242,6 +262,8 @@ struct lxc_conf { ...@@ -242,6 +262,8 @@ struct lxc_conf {
struct lxc_list cgroup; struct lxc_list cgroup;
struct lxc_list cgroup2; struct lxc_list cgroup2;
struct bpf_program *cgroup2_devices; struct bpf_program *cgroup2_devices;
/* This should be reimplemented as a hashmap. */
struct lxc_list devices;
}; };
struct { struct {
......
...@@ -512,6 +512,19 @@ ATTR_UNUSED static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ ...@@ -512,6 +512,19 @@ ATTR_UNUSED static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \
-1; \ -1; \
}) })
#define log_trace(__ret__, format, ...) \
({ \
TRACE(format, ##__VA_ARGS__); \
__ret__; \
})
#define log_error(__ret__, __errno__, format, ...) \
({ \
errno = __errno__; \
SYSERROR(format, ##__VA_ARGS__); \
__ret__; \
})
extern int lxc_log_fd; extern int lxc_log_fd;
extern int lxc_log_syslog(int facility); extern int lxc_log_syslog(int facility);
......
...@@ -460,6 +460,14 @@ enum { ...@@ -460,6 +460,14 @@ enum {
-1; \ -1; \
}) })
#define free_replace_move_ptr(a, b) \
({ \
free(a); \
(a) = (b); \
(b) = NULL; \
0; \
})
/* Container's specific file/directory names */ /* Container's specific file/directory names */
#define LXC_CONFIG_FNAME "config" #define LXC_CONFIG_FNAME "config"
#define LXC_PARTIAL_FNAME "partial" #define LXC_PARTIAL_FNAME "partial"
...@@ -467,77 +475,6 @@ enum { ...@@ -467,77 +475,6 @@ enum {
#define LXC_TIMESTAMP_FNAME "ts" #define LXC_TIMESTAMP_FNAME "ts"
#define LXC_COMMENT_FNAME "comment" #define LXC_COMMENT_FNAME "comment"
/* Taken from systemd. */
#define free_and_replace(a, b) \
({ \
free(a); \
(a) = (b); \
(b) = NULL; \
0; \
})
#define XCONCATENATE(x, y) x##y
#define CONCATENATE(x, y) XCONCATENATE(x, y)
#define UNIQ_T(x, uniq) CONCATENATE(__unique_prefix_, CONCATENATE(x, uniq))
#define UNIQ __COUNTER__
#undef MIN
#define MIN(a, b) __MIN(UNIQ, (a), UNIQ, (b))
#define __MIN(aq, a, bq, b) \
({ \
const typeof(a) UNIQ_T(A, aq) = (a); \
const typeof(b) UNIQ_T(B, bq) = (b); \
UNIQ_T(A, aq) < UNIQ_T(B, bq) ? UNIQ_T(A, aq) : UNIQ_T(B, bq); \
})
/* Taken from the kernel. */
/*
* min()/max()/clamp() macros must accomplish three things:
*
* - avoid multiple evaluations of the arguments (so side-effects like
* "x++" happen only once) when non-constant.
* - perform strict type-checking (to generate warnings instead of
* nasty runtime surprises). See the "unnecessary" pointer comparison
* in __typecheck().
* - retain result as a constant expressions when called with only
* constant expressions (to avoid tripping VLA warnings in stack
* allocation usage).
*/
#define __typecheck(x, y) (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
/*
* This returns a constant expression while determining if an argument is
* a constant expression, most importantly without evaluating the argument.
* Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
*/
#define __is_constexpr(x) \
(sizeof(int) == sizeof(*(8 ? ((void *)((long)(x)*0l)) : (int *)8)))
#define __no_side_effects(x, y) (__is_constexpr(x) && __is_constexpr(y))
#define __safe_cmp(x, y) (__typecheck(x, y) && __no_side_effects(x, y))
#define __cmp(x, y, op) ((x)op(y) ? (x) : (y))
#define __cmp_once(x, y, unique_x, unique_y, op) \
({ \
typeof(x) unique_x = (x); \
typeof(y) unique_y = (y); \
__cmp(unique_x, unique_y, op); \
})
#define __careful_cmp(x, y, op) \
__builtin_choose_expr(__safe_cmp(x, y), __cmp(x, y, op), \
__cmp_once(x, y, __UNIQUE_ID(__x), \
__UNIQUE_ID(__y), op))
/**
* min - return minimum of two values of the same or compatible types
* @x: first value
* @y: second value
*/
#define min(x, y) __careful_cmp(x, y, <)
#define ARRAY_SIZE(x) \ #define ARRAY_SIZE(x) \
(__builtin_choose_expr(!__builtin_types_compatible_p(typeof(x), \ (__builtin_choose_expr(!__builtin_types_compatible_p(typeof(x), \
typeof(&*(x))), \ typeof(&*(x))), \
......
...@@ -71,4 +71,12 @@ static inline void __auto_close__(int *fd) ...@@ -71,4 +71,12 @@ static inline void __auto_close__(int *fd)
#define __do_fclose __attribute__((__cleanup__(__auto_fclose__))) #define __do_fclose __attribute__((__cleanup__(__auto_fclose__)))
#define __do_closedir __attribute__((__cleanup__(__auto_closedir__))) #define __do_closedir __attribute__((__cleanup__(__auto_closedir__)))
static inline void *memdup(const void *data, size_t len)
{
void *copy = NULL;
copy = len ? malloc(len) : NULL;
return copy ? memcpy(copy, data, len) : NULL;
}
#endif /* __LXC_MEMORY_UTILS_H */ #endif /* __LXC_MEMORY_UTILS_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment