autodev: adapt to changes in Linux 4.18

Starting with commit 55956b59df33 ("vfs: Allow userns root to call mknod on owned filesystems.") Linux will allow mknod() in user namespaces for userns root if CAP_MKNOD is available. However, these device nodes are useless since static struct super_block *alloc_super(struct file_system_type *type, int flags, struct user_namespace *user_ns) { /* <snip> */ if (s->s_user_ns != &init_user_ns) s->s_iflags |= SB_I_NODEV; /* <snip> */ } will set the SB_I_NODEV flag on the filesystem. When a device node created in non-init userns is open()ed the call chain will hit: bool may_open_dev(const struct path *path) { return !(path->mnt->mnt_flags & MNT_NODEV) && !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV); } which will cause an EPERM because the device node is located on an fs owned by non-init-userns and thus doesn't grant access to device nodes due to SB_I_NODEV. This commit enables LXC to deal with such kernels. Signed-off-by: 's avatarChristian Brauner <christian.brauner@ubuntu.com>
parent c414d6fd
...@@ -989,6 +989,7 @@ static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, ...@@ -989,6 +989,7 @@ static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs,
int ret; int ret;
size_t clen; size_t clen;
char *path; char *path;
mode_t cur_mask;
INFO("Preparing \"/dev\""); INFO("Preparing \"/dev\"");
...@@ -1000,37 +1001,45 @@ static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, ...@@ -1000,37 +1001,45 @@ static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs,
if (ret < 0 || (size_t)ret >= clen) if (ret < 0 || (size_t)ret >= clen)
return -1; return -1;
if (!dir_exists(path)) { cur_mask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
WARN("\"/dev\" directory does not exist. Proceeding without " ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
"autodev being set up"); if (ret < 0 && errno != EEXIST) {
return 0; SYSERROR("Failed to create \"/dev\" directory");
ret = -errno;
goto reset_umask;
} }
ret = safe_mount("none", path, "tmpfs", 0, "size=500000,mode=755", ret = safe_mount("none", path, "tmpfs", 0, "size=500000,mode=755",
rootfs->path ? rootfs->mount : NULL); rootfs->path ? rootfs->mount : NULL);
if (ret < 0) { if (ret < 0) {
SYSERROR("Failed to mount tmpfs on \"%s\"", path); SYSERROR("Failed to mount tmpfs on \"%s\"", path);
return -1; goto reset_umask;
} }
INFO("Mounted tmpfs on \"%s\"", path); TRACE("Mounted tmpfs on \"%s\"", path);
ret = snprintf(path, clen, "%s/dev/pts", rootfs->path ? rootfs->mount : ""); ret = snprintf(path, clen, "%s/dev/pts", rootfs->path ? rootfs->mount : "");
if (ret < 0 || (size_t)ret >= clen) if (ret < 0 || (size_t)ret >= clen) {
return -1; ret = -1;
goto reset_umask;
}
/* If we are running on a devtmpfs mapping, dev/pts may already exist. /* If we are running on a devtmpfs mapping, dev/pts may already exist.
* If not, then create it and exit if that fails... * If not, then create it and exit if that fails...
*/ */
if (!dir_exists(path)) { ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); if (ret < 0 && errno != EEXIST) {
if (ret < 0) { SYSERROR("Failed to create directory \"%s\"", path);
SYSERROR("Failed to create directory \"%s\"", path); ret = -errno;
return -1; goto reset_umask;
}
} }
ret = 0;
reset_umask:
(void)umask(cur_mask);
INFO("Prepared \"/dev\""); INFO("Prepared \"/dev\"");
return 0; return ret;
} }
struct lxc_device_node { struct lxc_device_node {
...@@ -1049,16 +1058,23 @@ static const struct lxc_device_node lxc_devices[] = { ...@@ -1049,16 +1058,23 @@ static const struct lxc_device_node lxc_devices[] = {
{ "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 }, { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
}; };
enum {
LXC_DEVNODE_BIND,
LXC_DEVNODE_MKNOD,
LXC_DEVNODE_PARTIAL,
LXC_DEVNODE_OPEN,
};
static int lxc_fill_autodev(const struct lxc_rootfs *rootfs) static int lxc_fill_autodev(const struct lxc_rootfs *rootfs)
{ {
int i, ret; int i, ret;
char path[MAXPATHLEN]; char path[PATH_MAX];
mode_t cmask; mode_t cmask;
bool can_mknod = true; int use_mknod = LXC_DEVNODE_MKNOD;
ret = snprintf(path, MAXPATHLEN, "%s/dev", ret = snprintf(path, PATH_MAX, "%s/dev",
rootfs->path ? rootfs->mount : ""); rootfs->path ? rootfs->mount : "");
if (ret < 0 || ret >= MAXPATHLEN) if (ret < 0 || ret >= PATH_MAX)
return -1; return -1;
/* ignore, just don't try to fill in */ /* ignore, just don't try to fill in */
...@@ -1069,41 +1085,65 @@ static int lxc_fill_autodev(const struct lxc_rootfs *rootfs) ...@@ -1069,41 +1085,65 @@ static int lxc_fill_autodev(const struct lxc_rootfs *rootfs)
cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH); cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
for (i = 0; i < sizeof(lxc_devices) / sizeof(lxc_devices[0]); i++) { for (i = 0; i < sizeof(lxc_devices) / sizeof(lxc_devices[0]); i++) {
char hostpath[MAXPATHLEN]; char hostpath[PATH_MAX];
const struct lxc_device_node *device = &lxc_devices[i]; const struct lxc_device_node *device = &lxc_devices[i];
ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", ret = snprintf(path, PATH_MAX, "%s/dev/%s",
rootfs->path ? rootfs->mount : "", device->name); rootfs->path ? rootfs->mount : "", device->name);
if (ret < 0 || ret >= MAXPATHLEN) if (ret < 0 || ret >= PATH_MAX)
return -1; return -1;
if (can_mknod) { if (use_mknod >= LXC_DEVNODE_MKNOD) {
ret = mknod(path, device->mode, makedev(device->maj, device->min)); ret = mknod(path, device->mode, makedev(device->maj, device->min));
if (ret == 0 || (ret < 0 && errno == EEXIST)) { if (ret == 0 || (ret < 0 && errno == EEXIST)) {
DEBUG("Created device node \"%s\"", path); DEBUG("Created device node \"%s\"", path);
continue; } else if (ret < 0) {
} if (errno != EPERM) {
SYSERROR("Failed to create device node \"%s\"", path);
return -1;
}
if (errno != EPERM) { use_mknod = LXC_DEVNODE_BIND;
SYSERROR("Failed to create device node \"%s\"", path);
return -1;
} }
/* This can e.g. happen when the container is /* Device nodes are fully useable. */
* unprivileged or CAP_MKNOD has been dropped. if (use_mknod == LXC_DEVNODE_OPEN)
*/ continue;
can_mknod = false;
if (use_mknod == LXC_DEVNODE_MKNOD) {
/* See
* - https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=55956b59df336f6738da916dbb520b6e37df9fbd
* - https://lists.linuxfoundation.org/pipermail/containers/2018-June/039176.html
*/
ret = open(path, O_RDONLY | O_CLOEXEC);
if (ret >= 0) {
close(ret);
/* Device nodes are fully useable. */
use_mknod = LXC_DEVNODE_OPEN;
continue;
}
TRACE("Failed to open \"%s\" device", path);
/* Device nodes are only partially useable. */
use_mknod = LXC_DEVNODE_PARTIAL;
}
} }
ret = mknod(path, S_IFREG, 0); if (use_mknod != LXC_DEVNODE_PARTIAL) {
if (ret < 0 && errno != EEXIST) { /* If we are dealing with partially functional device
SYSERROR("Failed to create file \"%s\"", path); * nodes the prio mknod() call will have created the
return -1; * device node so we can use it as a bind-mount target.
*/
ret = mknod(path, S_IFREG | 0000, 0);
if (ret < 0 && errno != EEXIST) {
SYSERROR("Failed to create file \"%s\"", path);
return -1;
}
} }
/* Fallback to bind-mounting the device from the host. */ /* Fallback to bind-mounting the device from the host. */
ret = snprintf(hostpath, MAXPATHLEN, "/dev/%s", device->name); ret = snprintf(hostpath, PATH_MAX, "/dev/%s", device->name);
if (ret < 0 || ret >= MAXPATHLEN) if (ret < 0 || ret >= PATH_MAX)
return -1; return -1;
ret = safe_mount(hostpath, path, 0, MS_BIND, NULL, ret = safe_mount(hostpath, path, 0, MS_BIND, NULL,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment