conf: improve idmapped mounts support

Setting up a detached idmapped mount is a privileged operation, mounting it doesn't have to be. Signed-off-by: 's avatarChristian Brauner <christian.brauner@ubuntu.com>
parent 239f29c9
......@@ -545,11 +545,6 @@ int lxc_rootfs_init(struct lxc_conf *conf, bool userns)
if (rootfs->bdev_type && !strequal(rootfs->bdev_type, "dir"))
return syserror_set(-EINVAL, "Idmapped rootfs currently only supports the \"dir\" storage driver");
fd_userns = open_at(-EBADF, rootfs->mnt_opts.userns_path,
PROTECT_OPEN_WITH_TRAILING_SYMLINKS, 0, 0);
if (fd_userns < 0)
return syserror("Failed to open user namespace");
}
if (rootfs->path) {
......@@ -613,6 +608,51 @@ out:
return 0;
}
int lxc_rootfs_prepare_parent(struct lxc_handler *handler)
{
__do_close int dfd_idmapped = -EBADF, fd_userns = -EBADF;
struct lxc_rootfs *rootfs = &handler->conf->rootfs;
struct lxc_storage *storage = rootfs->storage;
int ret;
const char *path_source;
if (lxc_list_empty(&handler->conf->id_map))
return 0;
if (is_empty_string(rootfs->mnt_opts.userns_path))
return 0;
if (handler->conf->rootfs_setup)
return 0;
if (rootfs_is_blockdev(handler->conf))
return syserror_set(-EOPNOTSUPP, "Idmapped mounts on block-backed storage not yet supported");
if (!can_use_bind_mounts())
return syserror_set(-EOPNOTSUPP, "Kernel does not support the new mount api");
if (rootfs->mnt_opts.userns_self)
fd_userns = dup_cloexec(handler->nsfd[LXC_NS_USER]);
else
fd_userns = open_at(-EBADF, rootfs->mnt_opts.userns_path,
PROTECT_OPEN_WITH_TRAILING_SYMLINKS, 0, 0);
if (fd_userns < 0)
return syserror("Failed to open user namespace");
path_source = lxc_storage_get_path(storage->src, storage->type);
dfd_idmapped = create_detached_idmapped_mount(path_source, fd_userns, true);
if (dfd_idmapped < 0)
return syserror("Failed to create detached idmapped mount");
ret = lxc_abstract_unix_send_fds(handler->data_sock[0], &dfd_idmapped, 1, NULL, 0);
if (ret < 0)
return syserror("Failed to send detached idmapped mount fd");
TRACE("Created detached idmapped mount %d", dfd_idmapped);
return 0;
}
static int add_shmount_to_list(struct lxc_conf *conf)
{
char new_mount[PATH_MAX];
......@@ -2197,9 +2237,13 @@ int parse_lxc_mntopts(struct lxc_mount_options *opts, char *mnt_opts)
if (is_empty_string(opts->userns_path))
return syserror_set(-EINVAL, "Missing idmap path for \"idmap=<path>\" LXC specific mount option");
fd_userns = open(opts->userns_path, O_RDONLY | O_NOCTTY | O_CLOEXEC);
if (fd_userns < 0)
return syserror("Failed to open user namespace");
if (strequal(opts->userns_path, "container")) {
opts->userns_self = 1;
} else {
fd_userns = open(opts->userns_path, O_RDONLY | O_NOCTTY | O_CLOEXEC);
if (fd_userns < 0)
return syserror("Failed to open user namespace");
}
TRACE("Parse LXC specific mount option %d->\"idmap=%s\"", fd_userns, opts->userns_path);
break;
......@@ -2790,6 +2834,7 @@ struct lxc_conf *lxc_conf_init(void)
new->rootfs.dfd_dev = -EBADF;
new->rootfs.dfd_host = -EBADF;
new->rootfs.fd_path_pin = -EBADF;
new->rootfs.dfd_idmapped = -EBADF;
new->rootfs.mnt_opts.userns_fd = -EBADF;
new->logfd = -1;
lxc_list_init(&new->cgroup);
......@@ -3523,12 +3568,40 @@ static int lxc_setup_keyring(struct lsm_ops *lsm_ops, const struct lxc_conf *con
return ret;
}
static int lxc_rootfs_prepare_child(struct lxc_handler *handler)
{
struct lxc_rootfs *rootfs = &handler->conf->rootfs;
int dfd_idmapped = -EBADF;
int ret;
if (lxc_list_empty(&handler->conf->id_map))
return 0;
if (is_empty_string(rootfs->mnt_opts.userns_path))
return 0;
if (handler->conf->rootfs_setup)
return 0;
ret = lxc_abstract_unix_recv_one_fd(handler->data_sock[1], &dfd_idmapped, NULL, 0);
if (ret < 0)
return syserror("Failed to receive idmapped mount fd");
rootfs->dfd_idmapped = dfd_idmapped;
TRACE("Received detached idmapped mount %d", rootfs->dfd_idmapped);
return 0;
}
int lxc_setup(struct lxc_handler *handler)
{
int ret;
const char *lxcpath = handler->lxcpath, *name = handler->name;
struct lxc_conf *lxc_conf = handler->conf;
ret = lxc_rootfs_prepare_child(handler);
if (ret < 0)
return syserror("Failed to prepare rootfs");
ret = lxc_setup_rootfs_prepare_root(lxc_conf, name, lxcpath);
if (ret < 0)
return log_error(-1, "Failed to setup rootfs");
......
......@@ -198,6 +198,7 @@ struct lxc_mount_options {
int create_file : 1;
int optional : 1;
int relative : 1;
int userns_self : 1;
char userns_path[PATH_MAX];
int userns_fd;
unsigned long mnt_flags;
......@@ -221,6 +222,7 @@ struct lxc_rootfs {
char *path;
int fd_path_pin;
int dfd_idmapped;
int dfd_mnt;
char *mount;
......@@ -506,6 +508,7 @@ __hidden extern int lxc_storage_prepare(struct lxc_conf *conf);
__hidden extern int lxc_rootfs_prepare(struct lxc_conf *conf, bool userns);
__hidden extern void lxc_storage_put(struct lxc_conf *conf);
__hidden extern int lxc_rootfs_init(struct lxc_conf *conf, bool userns);
__hidden extern int lxc_rootfs_prepare_parent(struct lxc_handler *handler);
__hidden extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid);
__hidden extern int lxc_create_tty(const char *name, struct lxc_conf *conf);
__hidden extern void lxc_delete_tty(struct lxc_tty_info *ttys);
......@@ -581,17 +584,13 @@ static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs)
return !is_empty_string(rootfs->path) ? rootfs->mount : s;
}
static inline bool idmapped_rootfs_mnt(const struct lxc_rootfs *rootfs)
{
return rootfs->mnt_opts.userns_fd >= 0;
}
static inline void put_lxc_mount_options(struct lxc_mount_options *mnt_opts)
{
mnt_opts->create_dir = 0;
mnt_opts->create_file = 0;
mnt_opts->optional = 0;
mnt_opts->relative = 0;
mnt_opts->userns_self = 0;
mnt_opts->userns_path[0] = '\0';
mnt_opts->mnt_flags = 0;
mnt_opts->prop_flags = 0;
......@@ -609,6 +608,7 @@ static inline void put_lxc_rootfs(struct lxc_rootfs *rootfs, bool unpin)
close_prot_errno_disarm(rootfs->mnt_opts.userns_fd);
if (unpin)
close_prot_errno_disarm(rootfs->fd_path_pin);
close_prot_errno_disarm(rootfs->dfd_idmapped);
put_lxc_mount_options(&rootfs->mnt_opts);
storage_put(rootfs->storage);
rootfs->storage = NULL;
......
......@@ -243,6 +243,7 @@ int create_detached_idmapped_mount(const char *path, int userns_fd, bool recursi
struct lxc_mount_attr attr = {
.attr_set = MOUNT_ATTR_IDMAP,
.userns_fd = userns_fd,
.propagation = MS_SLAVE,
};
int ret;
......
......@@ -1786,6 +1786,12 @@ static int lxc_spawn(struct lxc_handler *handler)
}
}
ret = lxc_rootfs_prepare_parent(handler);
if (ret) {
ERROR("Failed to prepare rootfs");
goto out_delete_net;
}
if (!lxc_sync_wake_child(handler, START_SYNC_STARTUP))
goto out_delete_net;
......@@ -2043,21 +2049,9 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops,
if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
/*
* This handles two cases: mounting real block devices and
* creating idmapped mounts. The block device case should be
* obivous, i.e. no real filesystem can currently be mounted
* from inside a user namespace.
*
* Idmapped mounts can currently only be created if the caller
* is privileged wrt to the user namespace in which the
* underlying block device has been mounted in. This basically
* (with few exceptions) means we need to be CAP_SYS_ADMIN in
* the initial user namespace since almost no interesting
* filesystems can be mounted inside of user namespaces. This
* is way we need to do the rootfs setup here. In the future
* this may change.
* Most filesystems can't be mounted inside a userns so handle them here.
*/
if (idmapped_rootfs_mnt(&conf->rootfs) || rootfs_is_blockdev(conf)) {
if (rootfs_is_blockdev(conf)) {
ret = unshare(CLONE_NEWNS);
if (ret < 0) {
ERROR("Failed to unshare CLONE_NEWNS");
......
......@@ -127,7 +127,8 @@ bool dir_detect(const char *path)
int dir_mount(struct lxc_storage *bdev)
{
struct lxc_mount_options *mnt_opts = &bdev->rootfs->mnt_opts;
struct lxc_rootfs *rootfs = bdev->rootfs;
struct lxc_mount_options *mnt_opts = &rootfs->mnt_opts;
__do_free char *mntdata = NULL;
unsigned long mflags = 0;
int ret;
......@@ -141,22 +142,31 @@ int dir_mount(struct lxc_storage *bdev)
src = lxc_storage_get_path(bdev->src, bdev->type);
if (rootfs->dfd_idmapped >= 0 && !can_use_bind_mounts())
return syserror_set(-EOPNOTSUPP, "Idmapped mount requested but kernel doesn't support new mount API");
if (can_use_bind_mounts()) {
__do_close int fd_source = -EBADF, fd_target = -EBADF;
fd_source = open_at(-EBADF, src, PROTECT_OPATH_DIRECTORY, 0, 0);
if (fd_source < 0)
return syserror("Failed to open \"%s\"", src);
fd_target = open_at(-EBADF, bdev->dest, PROTECT_OPATH_DIRECTORY, 0, 0);
if (fd_target < 0)
return syserror("Failed to open \"%s\"", bdev->dest);
ret = fd_mount_idmapped(fd_source, "", PROTECT_OPATH_DIRECTORY,
PROTECT_LOOKUP_BENEATH, fd_target, "",
PROTECT_OPATH_DIRECTORY,
PROTECT_LOOKUP_BENEATH, 0,
mnt_opts->userns_fd, true);
if (rootfs->dfd_idmapped >= 0) {
ret = move_detached_mount(rootfs->dfd_idmapped, fd_target, "",
PROTECT_OPATH_DIRECTORY,
PROTECT_LOOKUP_BENEATH);
} else {
fd_source = open_at(-EBADF, src, PROTECT_OPATH_DIRECTORY, 0, 0);
if (fd_source < 0)
return syserror("Failed to open \"%s\"", src);
ret = fd_bind_mount(fd_source, "",
PROTECT_OPATH_DIRECTORY,
PROTECT_LOOKUP_BENEATH, fd_target,
"", PROTECT_OPATH_DIRECTORY,
PROTECT_LOOKUP_BENEATH, 0, true);
}
if (ret < 0)
return syserror("Failed to mount \"%s\" onto \"%s\"", src, bdev->dest);
} else {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment