conf: support idmapping directories

parent f3dde9c3
......@@ -488,11 +488,18 @@ int run_script(const char *name, const char *section, const char *script, ...)
*/
int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns)
{
__do_close int dfd_path = -EBADF, fd_pin = -EBADF;
__do_close int dfd_path = -EBADF, fd_pin = -EBADF, fd_userns = -EBADF;
int ret;
struct stat st;
struct statfs stfs;
if (!is_empty_string(rootfs->mnt_opts.userns_path)) {
fd_userns = open_at(-EBADF, rootfs->mnt_opts.userns_path,
PROTECT_OPEN_WITH_TRAILING_SYMLINKS, 0, 0);
if (fd_userns < 0)
return syserror("Failed to open user namespace");
}
if (rootfs->path) {
if (rootfs->bdev_type &&
(strequal(rootfs->bdev_type, "overlay") ||
......@@ -504,13 +511,17 @@ int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns)
dfd_path = open_at(-EBADF, "/", PROTECT_OPATH_FILE, PROTECT_LOOKUP_ABSOLUTE, 0);
}
if (dfd_path < 0)
return log_error_errno(-errno, errno, "Failed to open \"%s\"", rootfs->path);
return syserror("Failed to open \"%s\"", rootfs->path);
if (!rootfs->path)
return log_trace(0, "Not pinning because container does not have a rootfs");
if (!rootfs->path) {
TRACE("Not pinning because container does not have a rootfs");
goto out;
}
if (userns)
return log_trace(0, "Not pinning because container runs in user namespace");
if (userns) {
TRACE("Not pinning because container runs in user namespace");
goto out;
}
ret = fstat(dfd_path, &st);
if (ret < 0)
......@@ -524,7 +535,7 @@ int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns)
PROTECT_LOOKUP_BENEATH,
S_IWUSR | S_IRUSR);
if (fd_pin < 0)
return log_error_errno(-errno, errno, "Failed to pin rootfs");
return syserror("Failed to pin rootfs");
TRACE("Pinned rootfs %d(.lxc_keep)", fd_pin);
......@@ -546,6 +557,7 @@ int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns)
out:
rootfs->fd_path_pin = move_fd(fd_pin);
rootfs->mnt_opts.userns_fd = move_fd(fd_userns);
return 0;
}
......@@ -2105,6 +2117,7 @@ const char *lxc_mount_options_info[LXC_MOUNT_MAX] = {
/* Remove "optional", "create=dir", and "create=file" from mntopt */
int parse_lxc_mntopts(struct lxc_mount_options *opts, char *mnt_opts)
{
__do_close int fd_userns = -EBADF;
for (size_t i = LXC_MOUNT_CREATE_DIR; i < LXC_MOUNT_MAX; i++) {
const char *opt_name = lxc_mount_options_info[i];
......@@ -2140,7 +2153,12 @@ int parse_lxc_mntopts(struct lxc_mount_options *opts, char *mnt_opts)
if (is_empty_string(opts->userns_path))
return syserror_set(-EINVAL, "Missing idmap path for \"idmap=<path>\" LXC specific mount option");
TRACE("Parse LXC specific mount option \"idmap=%s\"", opts->userns_path);
close_prot_errno_disarm(fd_userns);
fd_userns = open(opts->userns_path, O_RDONLY | O_NOCTTY | O_CLOEXEC);
if (fd_userns < 0)
return syserror("Failed to open user namespace");
TRACE("Parse LXC specific mount option %d->\"idmap=%s\"", fd_userns, opts->userns_path);
break;
default:
return syserror_set(-EINVAL, "Unknown LXC specific mount option");
......@@ -2726,6 +2744,7 @@ struct lxc_conf *lxc_conf_init(void)
new->rootfs.dfd_dev = -EBADF;
new->rootfs.dfd_host = -EBADF;
new->rootfs.fd_path_pin = -EBADF;
new->rootfs.mnt_opts.userns_fd = -EBADF;
new->logfd = -1;
lxc_list_init(&new->cgroup);
lxc_list_init(&new->cgroup2);
......
......@@ -198,6 +198,7 @@ struct lxc_mount_options {
int optional : 1;
int relative : 1;
char userns_path[PATH_MAX];
int userns_fd;
};
/* Defines a structure to store the rootfs location, the
......@@ -575,12 +576,18 @@ static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs)
return !is_empty_string(rootfs->path) ? rootfs->mount : s;
}
static inline bool idmapped_rootfs_mnt(const struct lxc_rootfs *rootfs)
{
return rootfs->mnt_opts.userns_fd >= 0;
}
static inline void put_lxc_rootfs(struct lxc_rootfs *rootfs, bool unpin)
{
if (rootfs) {
close_prot_errno_disarm(rootfs->dfd_host);
close_prot_errno_disarm(rootfs->dfd_mnt);
close_prot_errno_disarm(rootfs->dfd_dev);
close_prot_errno_disarm(rootfs->mnt_opts.userns_fd);
if (unpin)
close_prot_errno_disarm(rootfs->fd_path_pin);
}
......
......@@ -236,12 +236,15 @@ int fs_attach(int fd_fs,
return 0;
}
int fd_bind_mount(int dfd_from, const char *path_from,
__u64 o_flags_from, __u64 resolve_flags_from,
int dfd_to, const char *path_to,
__u64 o_flags_to, __u64 resolve_flags_to,
unsigned int attr_flags, bool recursive)
static int __fd_bind_mount(int dfd_from, const char *path_from,
__u64 o_flags_from, __u64 resolve_flags_from,
int dfd_to, const char *path_to, __u64 o_flags_to,
__u64 resolve_flags_to, unsigned int attr_flags,
int userns_fd, bool recursive)
{
struct lxc_mount_attr attr = {
.attr_set = attr_flags,
};
__do_close int __fd_from = -EBADF, __fd_to = -EBADF;
__do_close int fd_tree_from = -EBADF;
unsigned int open_tree_flags = AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC;
......@@ -266,7 +269,20 @@ int fd_bind_mount(int dfd_from, const char *path_from,
fd_tree_from = open_tree(fd_from, "", open_tree_flags);
if (fd_tree_from < 0)
return log_error_errno(-errno, errno, "Failed to create detached mount");
return syserror("Failed to create detached mount");
if (userns_fd >= 0) {
attr.attr_set |= MOUNT_ATTR_IDMAP;
attr.userns_fd = userns_fd;
}
if (attr.attr_set) {
ret = mount_setattr(fd_tree_from, "",
AT_EMPTY_PATH | AT_RECURSIVE,
&attr, sizeof(attr));
if (ret < 0)
return syserror("Failed to change mount attributes");
}
if (!is_empty_string(path_to)) {
struct lxc_open_how how = {
......@@ -284,12 +300,34 @@ int fd_bind_mount(int dfd_from, const char *path_from,
ret = move_mount(fd_tree_from, "", fd_to, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH);
if (ret)
return log_error_errno(-errno, errno, "Failed to attach detached mount %d to filesystem at %d", fd_tree_from, fd_to);
return syserror("Failed to attach detached mount %d to filesystem at %d", fd_tree_from, fd_to);
TRACE("Attach detached mount %d to filesystem at %d", fd_tree_from, fd_to);
return 0;
}
int fd_mount_idmapped(int dfd_from, const char *path_from,
__u64 o_flags_from, __u64 resolve_flags_from,
int dfd_to, const char *path_to,
__u64 o_flags_to, __u64 resolve_flags_to,
unsigned int attr_flags, int userns_fd, bool recursive)
{
return __fd_bind_mount(dfd_from, path_from, o_flags_from, resolve_flags_from,
dfd_to, path_to, o_flags_to, resolve_flags_to,
attr_flags, userns_fd, recursive);
}
int fd_bind_mount(int dfd_from, const char *path_from,
__u64 o_flags_from, __u64 resolve_flags_from,
int dfd_to, const char *path_to,
__u64 o_flags_to, __u64 resolve_flags_to,
unsigned int attr_flags, bool recursive)
{
return __fd_bind_mount(dfd_from, path_from, o_flags_from, resolve_flags_from,
dfd_to, path_to, o_flags_to, resolve_flags_to,
attr_flags, -EBADF, recursive);
}
int calc_remount_flags_new(int dfd_from, const char *path_from,
__u64 o_flags_from, __u64 resolve_flags_from,
bool remount, unsigned long cur_flags,
......
......@@ -189,6 +189,13 @@ __hidden extern int fd_bind_mount(int dfd_from, const char *path_from,
__u64 o_flags_to, __u64 resolve_flags_to,
unsigned int attr_flags, bool recursive);
__hidden extern int fd_mount_idmapped(int dfd_from, const char *path_from,
__u64 o_flags_from, __u64 resolve_flags_from,
int dfd_to, const char *path_to,
__u64 o_flags_to, __u64 resolve_flags_to,
unsigned int attr_flags, int userns_fd,
bool recursive);
__hidden extern int calc_remount_flags_new(int dfd_from, const char *path_from,
__u64 o_flags_from,
__u64 resolve_flags_from,
......
......@@ -1645,16 +1645,6 @@ static int lxc_spawn(struct lxc_handler *handler)
goto out_delete_net;
}
/* If the rootfs is not a blockdev, prevent the container from marking
* it readonly.
* If the container is unprivileged then skip rootfs pinning.
*/
ret = lxc_rootfs_prepare(&conf->rootfs, wants_to_map_ids);
if (ret) {
ERROR("Failed to handle rootfs pinning for container \"%s\"", handler->name);
goto out_delete_net;
}
/* Create a process in a new set of namespaces. */
if (share_ns) {
pid_t attacher_pid;
......@@ -2040,9 +2030,20 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops,
goto out_abort;
}
/* If the rootfs is not a blockdev, prevent the container from marking
* it readonly.
* If the container is unprivileged then skip rootfs pinning.
*/
ret = lxc_rootfs_prepare(&conf->rootfs, !lxc_list_empty(&conf->id_map));
if (ret) {
ERROR("Failed to handle rootfs pinning for container \"%s\"", handler->name);
ret = -1;
goto out_abort;
}
if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
/* If the backing store is a device, mount it here and now. */
if (rootfs_is_blockdev(conf)) {
if (idmapped_rootfs_mnt(&conf->rootfs) || rootfs_is_blockdev(conf)) {
ret = unshare(CLONE_NEWNS);
if (ret < 0) {
ERROR("Failed to unshare CLONE_NEWNS");
......
......@@ -148,23 +148,46 @@ int dir_mount(struct lxc_storage *bdev)
src = lxc_storage_get_path(bdev->src, bdev->type);
ret = mount(src, bdev->dest, "bind", MS_BIND | MS_REC | mntflags | pflags, mntdata);
if (ret < 0)
return log_error_errno(-errno, errno, "Failed to mount \"%s\" on \"%s\"", src, bdev->dest);
if (ret == 0 && (mntflags & MS_RDONLY)) {
mflags = add_required_remount_flags(src, bdev->dest, MS_BIND | MS_REC | mntflags | pflags | MS_REMOUNT);
ret = mount(src, bdev->dest, "bind", mflags, mntdata);
if (can_use_bind_mounts()) {
__do_close int fd_source = -EBADF, fd_target = -EBADF;
fd_source = open_at(-EBADF, src, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_ABSOLUTE, 0);
if (fd_source < 0)
return syserror("Failed to open \"%s\"", src);
fd_target = open_at(-EBADF, bdev->dest, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_ABSOLUTE, 0);
if (fd_target < 0)
return syserror("Failed to open \"%s\"", bdev->dest);
ret = fd_mount_idmapped(fd_source, "", PROTECT_OPATH_DIRECTORY,
PROTECT_LOOKUP_BENEATH, fd_target, "",
PROTECT_OPATH_DIRECTORY,
PROTECT_LOOKUP_BENEATH, 0,
bdev->rootfs->mnt_opts.userns_fd, true);
if (ret < 0)
return syserror("Failed to mount \"%s\" onto \"%s\"", src, bdev->dest);
} else {
ret = mount(src, bdev->dest, "bind", MS_BIND | MS_REC | mntflags | pflags, mntdata);
if (ret < 0)
return log_error_errno(-errno, errno, "Failed to remount \"%s\" on \"%s\" read-only with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
else
DEBUG("Remounted \"%s\" on \"%s\" read-only with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
return log_error_errno(-errno, errno, "Failed to mount \"%s\" on \"%s\"", src, bdev->dest);
if (ret == 0 && (mntflags & MS_RDONLY)) {
mflags = add_required_remount_flags(src, bdev->dest, MS_BIND | MS_REC | mntflags | pflags | MS_REMOUNT);
ret = mount(src, bdev->dest, "bind", mflags, mntdata);
if (ret < 0)
return log_error_errno(-errno, errno, "Failed to remount \"%s\" on \"%s\" read-only with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
else
DEBUG("Remounted \"%s\" on \"%s\" read-only with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
}
TRACE("Mounted \"%s\" on \"%s\" with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
}
TRACE("Mounted \"%s\" on \"%s\" with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
TRACE("Mounted \"%s\" onto \"%s\"", src, bdev->dest);
return 0;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment