Merge pull request #3684 from brauner/2021-02-18/fixes_2

bpf: device cgroup improvements

Merge pull request #3684 from brauner/2021-02-18/fixes_2
c33840f6 · Stéphane Graber · GitHub · 02882d83 · 54dbe498 · c33840f6
Unverified Commit c33840f6 authored Feb 19, 2021 by Stéphane Graber Committed by GitHub Feb 19, 2021
7 changed files
--- a/doc/lxc.container.conf.sgml.in
+++ b/doc/lxc.container.conf.sgml.in
@@ -1518,7 +1518,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
    </refsect2>
    <refsect2>
-      <title>Control group</title>
+      <title>Control groups ("cgroups")</title>
      <para>
        The control group section contains the configuration for the
        different subsystem. <command>lxc</command> does not check the
@@ -1527,10 +1527,195 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
        started, but has the advantage of permitting any future
        subsystem.
      </para>
+      <para>
+	The kernel implementation of cgroups has changed significantly over the
+	years. With Linux 4.5 support for a new cgroup filesystem was added
+	usually referred to as "cgroup2" or "unified hierarchy". Since then the
+	old cgroup filesystem is usually referred to as "cgroup1" or the
+	"legacy hierarchies". Please see the cgroups manual page for a detailed
+	explanation of the differences between the two versions.
+      </para>
+      <para>
+	LXC distinguishes settings for the legacy and the unified hierarchy by
+	using different configuration key prefixes. To alter settings for
+	controllers in a legacy hierarchy the key prefix
+	<option>lxc.cgroup.</option> must be used and in order to alter the
+	settings for a controller in the unified hierarchy the
+	<option>lxc.cgroup2.</option> key must be used. Note that LXC will
+	ignore <option>lxc.cgroup.</option> settings on systems that only use
+	the unified hierarchy. Conversely, it will ignore
+	<option>lxc.cgroup2.</option> options on systems that only use legacy
+	hierachies.
+      </para>
+      <para>
+	At its core a cgroup hierarchy is a way to hierarchically organize
+	processes. Usually a cgroup hierarchy will have one or more
+	"controllers" enabled. A "controller" in a cgroup hierarchy is usually
+	responsible for distributing a specific type of system resource along
+	the hierarchy. Controllers include the "pids" controller, the "cpu"
+	controller, the "memory" controller and others. Some controllers
+	however do not fall into the category of distributing a system
+	resource, instead they are often referred to as "utility" controllers.
+	One utility controller is the device controller. Instead of
+	distributing a system resource it allows to manage device access.
+      </para>
+      <para>
+	In the legacy hierarchy the device controller was implemented like most
+	other controllers as a set of files that could be written to. These
+	files where named "devices.allow" and "devices.deny". The legacy device
+	controller allowed the implementation of both "allowlists" and
+	"denylists".
+      </para>
+      <para>
+	An allowlist is a device program that by default blocks access to all
+	devices. In order to access specific devices "allow rules" for
+	particular devices or device classes must be specified. In contrast, a
+	denylist is a device program that by default allows access to all
+	devices. In order to restrict access to specific devices "deny rules"
+	for particular devices or device classes must be specified.
+      </para>
+      <para>
+	In the unified cgroup hierarchy the implementation of the device
+	controller has completely changed. Instead of files to read from and
+	write to a eBPF program of
+	<option>BPF_PROG_TYPE_CGROUP_DEVICE</option> can be attached to a
+	cgroup. Even though the kernel implementation has changed completely
+	LXC tries to allow for the same semantics to be followed in the legacy
+	device cgroup and the unified eBPF-based device controller. The
+	following paragraphs explain the semantics for the unified eBPF-based
+	device controller.
+      </para>
+      <para>
+	As mentioned the format for specifying device rules for the unified
+	eBPF-based device controller is the same as for the legacy cgroup
+	device controller; only the configuration key prefix has changed.
+	Specifically, device rules for the legacy cgroup device controller are
+	specified via <option>lxc.cgroup.devices.allow</option> and
+	<option>lxc.cgroup.devices.deny</option> whereas for the
+	cgroup2 eBPF-based device controller
+	<option>lxc.cgroup.devices.allow</option> and
+	<option>lxc.cgroup.devices.deny</option> must be used.
+      </para>
+      <para>
+        <itemizedlist>
+          <listitem>
+	    <para>
+	      A allowlist device rule
+              <programlisting>
+	        lxc.cgroup2.devices.deny = a
+              </programlisting>
+	      will cause LXC to instruct the kernel to block access to all
+	      devices by default. To grant access to devices allow device rules
+	      must be added via the <option>lxc.cgroup2.devices.allow</option>
+	      key. This is referred to as a "allowlist" device program.
+	    </para>
+	  </listitem>
+          <listitem>
+	    <para>
+	      A denylist device rule
+              <programlisting>
+	        lxc.cgroup2.devices.allow = a
+              </programlisting>
+	      will cause LXC to instruct the kernel to allow access to all
+	      devices by default. To deny access to devices deny device rules
+	      must be added via <option>lxc.cgroup2.devices.deny</option> key.
+	      This is referred to as a "denylist" device program.
+	    </para>
+	  </listitem>
+          <listitem>
+	    <para>
+	      Specifying any of the aformentioned two rules will cause all
+	      previous rules to be cleared, i.e. the device list will be reset.
+	    </para>
+	  </listitem>
+          <listitem>
+	    <para>
+	    When an allowlist program is requested, i.e. access to all devices
+	    is blocked by default, specific deny rules for individual devices
+	    or device classes are ignored.
+	    </para>
+	  </listitem>
+          <listitem>
+	    <para>
+	    When a denylist program is requested, i.e. access to all devices
+	    is allowed by default, specific allow rules for individual devices
+	    or device classes are ignored.
+	    </para>
+	  </listitem>
+        </itemizedlist>
+      </para>
+      <para>
+        For example the set of rules:
+        <programlisting>
+          lxc.cgroup2.devices.deny = a
+          lxc.cgroup2.devices.allow = c *:* m
+          lxc.cgroup2.devices.allow = b *:* m
+          lxc.cgroup2.devices.allow = c 1:3 rwm
+        </programlisting>
+	implements an allowlist device program, i.e. the kernel will block
+	access to all devices not specifically allowed in this list. This
+	particular program states that all character and block devices may be
+	created but only /dev/null might be read or written.
+      </para>
+      <para>
+        If we instead switch to the following set of rules:
+        <programlisting>
+          lxc.cgroup2.devices.allow = a
+          lxc.cgroup2.devices.deny = c *:* m
+          lxc.cgroup2.devices.deny = b *:* m
+          lxc.cgroup2.devices.deny = c 1:3 rwm
+        </programlisting>
+	 then LXC would instruct the kernel to implement a denylist, i.e. the
+	 kernel will allow access to all devices not specifically denied in
+	 this list. This particular program states that no character devices or
+	 block devices might be created and that /dev/null is not allow allowed
+	 to be read, written, or created.
+      </para>
+      <para>
+	 Now consider the same program but followed by a "global rule"
+	 which determines the type of device program (allowlist or
+	 denylist) as explained above:
+        <programlisting>
+          lxc.cgroup2.devices.allow = a
+          lxc.cgroup2.devices.deny = c *:* m
+          lxc.cgroup2.devices.deny = b *:* m
+          lxc.cgroup2.devices.deny = c 1:3 rwm
+          lxc.cgroup2.devices.allow = a
+        </programlisting>
+	The last line will cause LXC to reset the device list without changing
+	the type of device program.
+      </para>
+      <para>
+	If we specify:
+        <programlisting>
+          lxc.cgroup2.devices.allow = a
+          lxc.cgroup2.devices.deny = c *:* m
+          lxc.cgroup2.devices.deny = b *:* m
+          lxc.cgroup2.devices.deny = c 1:3 rwm
+          lxc.cgroup2.devices.deny = a
+        </programlisting>
+	instead then the last line will cause LXC to reset the device list and
+	switch from a allowlist program to a denylist program.
+      </para>
      <variablelist>
        <varlistentry>
          <term>
-            <option>lxc.cgroup.[controller name]</option>
+            <option>lxc.cgroup.[controller name].[controller file]</option>
          </term>
          <listitem>
            <para>
@@ -1545,7 +1730,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
        </varlistentry>
        <varlistentry>
          <term>
-            <option>lxc.cgroup2.[controller name]</option>
+            <option>lxc.cgroup2.[controller name].[controller file]</option>
          </term>
          <listitem>
            <para>

--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -504,7 +504,7 @@ static int add_hierarchy(struct cgroup_ops *ops, char **clist, char *mountpoint,
 	int idx;
 	if (abspath(container_base_path))
-		return syserrno(-errno, "Container base path must be relative to controller mount");
+		return syserrno_set(-EINVAL, "Container base path must be relative to controller mount");
 	if (!controllers && type != CGROUP2_SUPER_MAGIC)
 		return syserrno_set(-EINVAL, "Empty controller list for non-unified cgroup hierarchy passed");
@@ -2773,19 +2773,9 @@ static int device_cgroup_rule_parse(struct device_item *device, const char *key,
 		device->type = 'a';
 		device->major = -1;
 		device->minor = -1;
-		if (device->allow) /* allow all devices */
-			device->global_rule = LXC_BPF_DEVICE_CGROUP_DENYLIST;
-		else /* deny all devices */
-			device->global_rule = LXC_BPF_DEVICE_CGROUP_ALLOWLIST;
-		device->allow = -1;
 		return 0;
 	}
-	/* local rule */
-	device->global_rule = LXC_BPF_DEVICE_CGROUP_LOCAL_RULE;
 	switch (*val) {
 	case 'a':
 		__fallthrough;
@@ -2968,7 +2958,6 @@ static int device_cgroup_rule_parse_devpath(struct device_item *device,
 	device->major = MAJOR(sb.st_rdev);
 	device->minor = MINOR(sb.st_rdev);
 	device->allow = 1;
-	device->global_rule = LXC_BPF_DEVICE_CGROUP_LOCAL_RULE;
 	return 0;
 }
@@ -3099,16 +3088,22 @@ static int bpf_device_cgroup_prepare(struct cgroup_ops *ops,
 	struct device_item device_item = {};
 	int ret;
-	if (strequal("devices.allow", key) && *val == '/')
+	if (strequal("devices.allow", key) && abspath(val))
 		ret = device_cgroup_rule_parse_devpath(&device_item, val);
 	else
 		ret = device_cgroup_rule_parse(&device_item, key, val);
 	if (ret < 0)
-		return log_error_errno(-1, EINVAL, "Failed to parse device string %s=%s", key, val);
+		return syserrno_set(EINVAL, "Failed to parse device rule %s=%s", key, val);
-	ret = bpf_list_add_device(&conf->devices, &device_item);
+	/*
+	 * Note that bpf_list_add_device() returns 1 if it altered the device
+	 * list and 0 if it didn't; both return values indicate success.
+	 * Only a negative return value indicates an error.
+	 */
+	ret = bpf_list_add_device(&conf->bpf_devices, &device_item);
 	if (ret < 0)
 		return -1;
 	return 0;
 }
@@ -3180,10 +3175,11 @@ __cgfsng_ops static bool cgfsng_devices_activate(struct cgroup_ops *ops, struct 
 	unified = ops->unified;
 	if (!unified || !unified->bpf_device_controller ||
-	    !unified->container_full_path || lxc_list_empty(&conf->devices))
+	    !unified->container_full_path ||
+	    lxc_list_empty(&(conf->bpf_devices).device_item))
 		return true;
-	return bpf_cgroup_devices_attach(ops, &conf->devices);
+	return bpf_cgroup_devices_attach(ops, &conf->bpf_devices);
 }
 static bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cgroup)

--- a/src/lxc/cgroups/cgroup2_devices.c
+++ b/src/lxc/cgroups/cgroup2_devices.c
@@ -211,12 +211,6 @@ int bpf_program_append_device(struct bpf_program *prog, struct device_item *devi
 	if (!prog || !device)
 		return ret_set_errno(-1, EINVAL);
-	/* This is a global rule so no need to append anything. */
-	if (device->global_rule > LXC_BPF_DEVICE_CGROUP_LOCAL_RULE) {
-		prog->device_list_type = device->global_rule;
-		return 0;
-	}
 	ret = bpf_access_mask(device->access, &access_mask);
 	if (ret < 0)
 		return log_error_errno(ret, -ret, "Invalid access mask specified %s", device->access);
@@ -296,10 +290,10 @@ int bpf_program_finalize(struct bpf_program *prog)
 	if (!prog)
 		return ret_set_errno(-1, EINVAL);
-	TRACE("Implementing %s bpf device cgroup program",
+	TRACE("Device bpf program %s all devices by default",
-	      prog->device_list_type == LXC_BPF_DEVICE_CGROUP_DENYLIST
+	      prog->device_list_type == LXC_BPF_DEVICE_CGROUP_ALLOWLIST
-		  ? "denylist"
+		  ? "blocks"
-		  : "allowlist");
+		  : "allows");
 	ins[0] = BPF_MOV64_IMM(BPF_REG_0, prog->device_list_type);
 	ins[1] = BPF_EXIT_INSN();
@@ -436,31 +430,61 @@ void bpf_device_program_free(struct cgroup_ops *ops)
 	}
 }
-int bpf_list_add_device(struct lxc_list *devices, struct device_item *device)
+static inline bool bpf_device_list_block_all(const struct bpf_devices *bpf_devices)
+{
+	/* LXC_BPF_DEVICE_CGROUP_ALLOWLIST  -> block ("allowlist") all devices. */
+	return bpf_devices->list_type == LXC_BPF_DEVICE_CGROUP_ALLOWLIST;
+}
+static inline bool bpf_device_add(const struct bpf_devices *bpf_devices,
+				  struct device_item *device)
+{
+	/* We're blocking all devices so skip individual deny rules. */
+	if (bpf_device_list_block_all(bpf_devices) && !device->allow)
+		return log_trace(false, "Device cgroup blocks all devices; skipping specific deny rules");
+	/* We're allowing all devices so skip individual allow rules. */
+	if (!bpf_device_list_block_all(bpf_devices) && device->allow)
+		return log_trace(false, "Device cgroup allows all devices; skipping specific allow rules");
+	return true;
+}
+int bpf_list_add_device(struct bpf_devices *bpf_devices,
+			struct device_item *device)
 {
 	__do_free struct lxc_list *list_elem = NULL;
 	__do_free struct device_item *new_device = NULL;
 	struct lxc_list *it;
-	if (!devices || !device)
+	if (!bpf_devices || !device)
 		return ret_errno(EINVAL);
-	lxc_list_for_each(it, devices) {
+	/* Check whether this determines the list type. */
-		struct device_item *cur = it->elem;
+	if (device->type == 'a' &&
+	    device->major < 0 &&
-		if (cur->global_rule > LXC_BPF_DEVICE_CGROUP_LOCAL_RULE &&
+	    device->minor < 0 &&
-		    device->global_rule > LXC_BPF_DEVICE_CGROUP_LOCAL_RULE) {
+	    is_empty_string(device->access)) {
-			TRACE("Switched from %s to %s",
+		if (device->allow) {
-			      cur->global_rule == LXC_BPF_DEVICE_CGROUP_ALLOWLIST
+			bpf_devices->list_type = LXC_BPF_DEVICE_CGROUP_DENYLIST;
-				  ? "allowlist"
+			TRACE("Device cgroup will allow (\"denylist\") all devices by default");
-				  : "denylist",
+		} else {
-			      device->global_rule == LXC_BPF_DEVICE_CGROUP_ALLOWLIST
+			bpf_devices->list_type = LXC_BPF_DEVICE_CGROUP_ALLOWLIST;
-				  ? "allowlist"
+			TRACE("Device cgroup will block (\"allowlist\") all devices by default");
-				  : "denylist");
-			cur->global_rule = device->global_rule;
-			return 1;
 		}
+		/* Reset the device list. */
+		lxc_clear_cgroup2_devices(bpf_devices);
+		TRACE("Resetting cgroup device list");
+		return 1; /* The device list was altered. */
+	}
+	TRACE("Processing new device rule: type %c, major %d, minor %d, access %s, allow %d",
+	      device->type, device->major, device->minor, device->access, device->allow);
+	lxc_list_for_each(it, &bpf_devices->device_item) {
+		struct device_item *cur = it->elem;
 		if (cur->type != device->type)
 			continue;
 		if (cur->major != device->major)
@@ -470,35 +494,35 @@ int bpf_list_add_device(struct lxc_list *devices, struct device_item *device)
 		if (!strequal(cur->access, device->access))
 			continue;
+		if (!bpf_device_add(bpf_devices, cur))
+			continue;
 		/*
 		 * The rule is switched from allow to deny or vica versa so
 		 * don't bother allocating just flip the existing one.
 		 */
 		if (cur->allow != device->allow) {
 			cur->allow = device->allow;
-			return log_trace(0, "Switched existing rule of bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
-					 cur->type, cur->major, cur->minor,
+			return log_trace(1, "Switched existing device rule"); /* The device list was altered. */
-					 cur->access, cur->allow,
-					 cur->global_rule);
 		}
-		return log_trace(1, "Reusing existing rule of bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
-				 cur->type, cur->major, cur->minor, cur->access,
+		return log_trace(0, "Reused existing device rule"); /* The device list wasn't altered. */
-				 cur->allow, cur->global_rule);
 	}
 	list_elem = malloc(sizeof(*list_elem));
 	if (!list_elem)
-		return log_error_errno(-1, ENOMEM, "Failed to allocate new device list");
+		return syserrno_set(ENOMEM, "Failed to allocate new device list");
 	new_device = memdup(device, sizeof(struct device_item));
 	if (!new_device)
-		return log_error_errno(-1, ENOMEM, "Failed to allocate new device item");
+		return syserrno_set(ENOMEM, "Failed to allocate new device item");
 	lxc_list_add_elem(list_elem, move_ptr(new_device));
-	lxc_list_add_tail(devices, move_ptr(list_elem));
+	lxc_list_add_tail(&bpf_devices->device_item, move_ptr(list_elem));
-	return 0;
+	return log_trace(1, "Added new device rule"); /* The device list was altered. */
 }
 bool bpf_devices_cgroup_supported(void)
@@ -533,7 +557,7 @@ bool bpf_devices_cgroup_supported(void)
 	return log_trace(true, "The bpf device cgroup is supported");
 }
-static struct bpf_program *__bpf_cgroup_devices(struct lxc_list *devices)
+static struct bpf_program *__bpf_cgroup_devices(struct bpf_devices *bpf_devices)
 {
 	__do_bpf_program_free struct bpf_program *prog = NULL;
 	int ret;
@@ -547,41 +571,40 @@ static struct bpf_program *__bpf_cgroup_devices(struct lxc_list *devices)
 	if (ret)
 		return syserrno(NULL, "Failed to initialize bpf program");
-	bpf_device_set_type(prog, devices);
+	prog->device_list_type = bpf_devices->list_type;
-	TRACE("Device bpf %s all devices by default",
+	TRACE("Device cgroup %s all devices by default",
-	      bpf_device_block_all(prog) ? "blocks" : "allows");
+	      bpf_device_list_block_all(bpf_devices) ? "blocks" : "allows");
-	lxc_list_for_each(it, devices) {
+	lxc_list_for_each(it, &bpf_devices->device_item) {
 		struct device_item *cur = it->elem;
-		if (!bpf_device_add(prog, cur)) {
+		TRACE("Processing device rule: type %c, major %d, minor %d, access %s, allow %d",
-			TRACE("Skipping rule: type %c, major %d, minor %d, access %s, allow %d",
+		      cur->type, cur->major, cur->minor, cur->access, cur->allow);
-			      cur->type, cur->major, cur->minor, cur->access, cur->allow);
+		if (!bpf_device_add(bpf_devices, cur))
 			continue;
-		}
 		ret = bpf_program_append_device(prog, cur);
 		if (ret)
-			return syserrno(NULL, "Failed adding rule: type %c, major %d, minor %d, access %s, allow %d",
+			return syserrno(NULL, "Failed adding new device rule");
-					cur->type, cur->major, cur->minor, cur->access, cur->allow);
-		TRACE("Added rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d",
+		TRACE("Added new device rule");
-		      cur->type, cur->major, cur->minor, cur->access, cur->allow);
 	}
 	ret = bpf_program_finalize(prog);
 	if (ret)
-		return syserrno(NULL, "Failed to finalize bpf program");
+		return syserrno(NULL, "Failed to finalize device program");
 	return move_ptr(prog);
 }
-bool bpf_cgroup_devices_attach(struct cgroup_ops *ops, struct lxc_list *devices)
+bool bpf_cgroup_devices_attach(struct cgroup_ops *ops,
+			       struct bpf_devices *bpf_devices)
 {
 	__do_bpf_program_free struct bpf_program *prog = NULL;
 	int ret;
-	prog = __bpf_cgroup_devices(devices);
+	prog = __bpf_cgroup_devices(bpf_devices);
 	if (!prog)
 		return syserrno(false, "Failed to create bpf program");
@@ -597,8 +620,8 @@ bool bpf_cgroup_devices_attach(struct cgroup_ops *ops, struct lxc_list *devices)
 }
 bool bpf_cgroup_devices_update(struct cgroup_ops *ops,
-			       struct device_item *new,
+			       struct bpf_devices *bpf_devices,
-			       struct lxc_list *devices)
+			       struct device_item *new)
 {
 	__do_bpf_program_free struct bpf_program *prog = NULL;
 	static int can_use_bpf_replace = -1;
@@ -615,16 +638,24 @@ bool bpf_cgroup_devices_update(struct cgroup_ops *ops,
 	if (ops->unified->cgfd_limit < 0)
 		return ret_set_errno(false, EBADF);
-	ret = bpf_list_add_device(devices, new);
+	/*
+	 * Note that bpf_list_add_device() returns 1 if it altered the device
+	 * list and 0 if it didn't; both return values indicate success.
+	 * Only a negative return value indicates an error.
+	 */
+	ret = bpf_list_add_device(bpf_devices, new);
 	if (ret < 0)
 		return false;
+	if (ret == 0)
+		return log_trace(true, "Device bpf program unaltered");
 	/* No previous device program attached. */
 	prog_old = ops->cgroup2_devices;
 	if (!prog_old)
-		return bpf_cgroup_devices_attach(ops, devices);
+		return bpf_cgroup_devices_attach(ops, bpf_devices);
-	prog = __bpf_cgroup_devices(devices);
+	prog = __bpf_cgroup_devices(bpf_devices);
 	if (!prog)
 		return syserrno(false, "Failed to create bpf program");

--- a/src/lxc/cgroups/cgroup2_devices.h
+++ b/src/lxc/cgroups/cgroup2_devices.h
@@ -48,41 +48,6 @@ struct bpf_program {
 	__u32 attached_flags;
 };
-static inline bool bpf_device_block_all(const struct bpf_program *prog)
-{
-	/* LXC_BPF_DEVICE_CGROUP_ALLOWLIST  -> allowlist (deny all) */
-	return prog->device_list_type == LXC_BPF_DEVICE_CGROUP_ALLOWLIST;
-}
-static inline bool bpf_device_add(const struct bpf_program *prog,
-				  struct device_item *device)
-{
-	if (device->global_rule > LXC_BPF_DEVICE_CGROUP_LOCAL_RULE)
-		return false;
-	/* We're blocking all devices so skip individual deny rules. */
-	if (bpf_device_block_all(prog) && !device->allow)
-		return false;
-	/* We're allowing all devices so skip individual allow rules. */
-	if (!bpf_device_block_all(prog) && device->allow)
-		return false;
-	return true;
-}
-static inline void bpf_device_set_type(struct bpf_program *prog,
-				       struct lxc_list *devices)
-{
-	struct lxc_list *it;
-	lxc_list_for_each (it, devices) {
-		struct device_item *cur = it->elem;
-		if (cur->global_rule > LXC_BPF_DEVICE_CGROUP_LOCAL_RULE)
-			prog->device_list_type = cur->global_rule;
-	}
-}
 __hidden extern struct bpf_program *bpf_program_new(__u32 prog_type);
 __hidden extern int bpf_program_init(struct bpf_program *prog);
 __hidden extern int bpf_program_append_device(struct bpf_program *prog, struct device_item *device);
@@ -91,13 +56,18 @@ __hidden extern int bpf_program_cgroup_detach(struct bpf_program *prog);
 __hidden extern void bpf_device_program_free(struct cgroup_ops *ops);
 __hidden extern bool bpf_devices_cgroup_supported(void);
-__hidden extern int bpf_list_add_device(struct lxc_list *devices,
+/*
+ * Note that bpf_list_add_device() returns 1 if it altered the device list and
+ * 0 if it didn't; both return values indicate success. Only a negative return
+ * value indicates an error.
+ */
+__hidden extern int bpf_list_add_device(struct bpf_devices *bpf_devices,
 					struct device_item *device);
 __hidden extern bool bpf_cgroup_devices_attach(struct cgroup_ops *ops,
-					       struct lxc_list *devices);
+					       struct bpf_devices *bpf_devices);
 __hidden extern bool bpf_cgroup_devices_update(struct cgroup_ops *ops,
-					       struct device_item *new,
+					       struct bpf_devices *bpf_devices,
-					       struct lxc_list *devices);
+					       struct device_item *device);
 static inline void bpf_program_free(struct bpf_program *prog)
 {

--- a/src/lxc/commands.c
+++ b/src/lxc/commands.c
@@ -1195,7 +1195,6 @@ static int lxc_cmd_add_bpf_device_cgroup_callback(int fd, struct lxc_cmd_req *re
 {
 	int ret;
 	struct lxc_cmd_rsp rsp = {};
-	struct device_item *device;
 	struct lxc_conf *conf;
 	if (req->datalen <= 0)
@@ -1207,9 +1206,10 @@ static int lxc_cmd_add_bpf_device_cgroup_callback(int fd, struct lxc_cmd_req *re
 	if (!req->data)
 		return LXC_CMD_REAP_CLIENT_FD;
-	device = (struct device_item *)req->data;
 	conf = handler->conf;
-	if (!bpf_cgroup_devices_update(handler->cgroup_ops, device, &conf->devices))
+	if (!bpf_cgroup_devices_update(handler->cgroup_ops,
+				       &conf->bpf_devices,
+				       (struct device_item *)req->data))
 		rsp.ret = -1;
 	else
 		rsp.ret = 0;

--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -2678,7 +2678,9 @@ struct lxc_conf *lxc_conf_init(void)
 	new->logfd = -1;
 	lxc_list_init(&new->cgroup);
 	lxc_list_init(&new->cgroup2);
-	lxc_list_init(&new->devices);
+	/* Block ("allowlist") all devices by default. */
+	new->bpf_devices.list_type = LXC_BPF_DEVICE_CGROUP_ALLOWLIST;
+	lxc_list_init(&(new->bpf_devices).device_item);
 	lxc_list_init(&new->network);
 	lxc_list_init(&new->mount_list);
 	lxc_list_init(&new->caps);
@@ -3672,17 +3674,17 @@ int lxc_clear_cgroups(struct lxc_conf *c, const char *key, int version)
 	bool all = false;
 	if (version == CGROUP2_SUPER_MAGIC) {
-		global_token = "lxc.cgroup2";
+		global_token		= "lxc.cgroup2";
-		namespaced_token = "lxc.cgroup2.";
+		namespaced_token	= "lxc.cgroup2.";
-		namespaced_token_len = STRLITERALLEN("lxc.cgroup2.");
+		namespaced_token_len	= STRLITERALLEN("lxc.cgroup2.");
 		list = &c->cgroup2;
 	} else if (version == CGROUP_SUPER_MAGIC) {
-		global_token = "lxc.cgroup";
+		global_token		= "lxc.cgroup";
-		namespaced_token = "lxc.cgroup.";
+		namespaced_token	= "lxc.cgroup.";
-		namespaced_token_len = STRLITERALLEN("lxc.cgroup.");
+		namespaced_token_len	= STRLITERALLEN("lxc.cgroup.");
 		list = &c->cgroup;
 	} else {
-		return -EINVAL;
+		return ret_errno(EINVAL);
 	}
 	if (strequal(key, global_token))
@@ -3690,7 +3692,7 @@ int lxc_clear_cgroups(struct lxc_conf *c, const char *key, int version)
 	else if (strnequal(key, namespaced_token, namespaced_token_len))
 		k += namespaced_token_len;
 	else
-		return -EINVAL;
+		return ret_errno(EINVAL);
 	lxc_list_for_each_safe (it, list, next) {
 		struct lxc_cgroup *cg = it->elem;
@@ -3708,15 +3710,9 @@ int lxc_clear_cgroups(struct lxc_conf *c, const char *key, int version)
 	return 0;
 }
-static void lxc_clear_devices(struct lxc_conf *conf)
+static inline void lxc_clear_cgroups_devices(struct lxc_conf *conf)
 {
-	struct lxc_list *list = &conf->devices;
+	lxc_clear_cgroup2_devices(&conf->bpf_devices);
-	struct lxc_list *it, *next;
-	lxc_list_for_each_safe(it, list, next) {
-		lxc_list_del(it);
-		free(it);
-	}
 }
 int lxc_clear_limits(struct lxc_conf *c, const char *key)
@@ -3956,7 +3952,7 @@ void lxc_conf_free(struct lxc_conf *conf)
 	lxc_clear_config_keepcaps(conf);
 	lxc_clear_cgroups(conf, "lxc.cgroup", CGROUP_SUPER_MAGIC);
 	lxc_clear_cgroups(conf, "lxc.cgroup2", CGROUP2_SUPER_MAGIC);
-	lxc_clear_devices(conf);
+	lxc_clear_cgroups_devices(conf);
 	lxc_clear_hooks(conf, "lxc.hook");
 	lxc_clear_mount_entries(conf);
 	lxc_clear_idmaps(conf);

--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -270,7 +270,6 @@ struct lxc_state_client {
 };
 typedef enum lxc_bpf_devices_rule_t {
-	LXC_BPF_DEVICE_CGROUP_LOCAL_RULE	= -1,
 	LXC_BPF_DEVICE_CGROUP_ALLOWLIST		= 0,
 	LXC_BPF_DEVICE_CGROUP_DENYLIST		= 1,
 } lxc_bpf_devices_rule_t;
@@ -281,12 +280,11 @@ struct device_item {
 	int minor;
 	char access[4];
 	int allow;
-	/*
+};
-	 * LXC_BPF_DEVICE_CGROUP_LOCAL_RULE -> no global rule
-	 * LXC_BPF_DEVICE_CGROUP_ALLOWLIST  -> allowlist (deny all)
+struct bpf_devices {
-	 * LXC_BPF_DEVICE_CGROUP_DENYLIST   -> denylist (allow all)
+	lxc_bpf_devices_rule_t list_type;
-	 */
+	struct lxc_list device_item;
-	int global_rule;
 };
 struct timens_offsets {
@@ -310,8 +308,7 @@ struct lxc_conf {
 	struct {
 		struct lxc_list cgroup;
 		struct lxc_list cgroup2;
-		/* This should be reimplemented as a hashmap. */
+		struct bpf_devices bpf_devices;
-		struct lxc_list devices;
 	};
 	struct {
@@ -573,4 +570,17 @@ static inline void put_lxc_rootfs(struct lxc_rootfs *rootfs, bool unpin)
 	}
 }
+static inline void lxc_clear_cgroup2_devices(struct bpf_devices *bpf_devices)
+{
+	struct lxc_list *list = &bpf_devices->device_item;
+	struct lxc_list *it, *next;
+	lxc_list_for_each_safe (it, list, next) {
+		lxc_list_del(it);
+		free(it);
+	}
+	lxc_list_init(&bpf_devices->device_item);
+}
 #endif /* __LXC_CONF_H */