conf: improve idmapped mounts support

Setting up a detached idmapped mount is a privileged operation, mounting it doesn't have to be. Signed-off-by: 's avatarChristian Brauner <christian.brauner@ubuntu.com>
parent 5f5df0dc
...@@ -545,11 +545,6 @@ int lxc_rootfs_init(struct lxc_conf *conf, bool userns) ...@@ -545,11 +545,6 @@ int lxc_rootfs_init(struct lxc_conf *conf, bool userns)
if (rootfs->bdev_type && !strequal(rootfs->bdev_type, "dir")) if (rootfs->bdev_type && !strequal(rootfs->bdev_type, "dir"))
return syserror_set(-EINVAL, "Idmapped rootfs currently only supports the \"dir\" storage driver"); return syserror_set(-EINVAL, "Idmapped rootfs currently only supports the \"dir\" storage driver");
fd_userns = open_at(-EBADF, rootfs->mnt_opts.userns_path,
PROTECT_OPEN_WITH_TRAILING_SYMLINKS, 0, 0);
if (fd_userns < 0)
return syserror("Failed to open user namespace");
} }
if (rootfs->path) { if (rootfs->path) {
...@@ -613,6 +608,51 @@ out: ...@@ -613,6 +608,51 @@ out:
return 0; return 0;
} }
int lxc_rootfs_prepare_parent(struct lxc_handler *handler)
{
__do_close int dfd_idmapped = -EBADF, fd_userns = -EBADF;
struct lxc_rootfs *rootfs = &handler->conf->rootfs;
struct lxc_storage *storage = rootfs->storage;
int ret;
const char *path_source;
if (lxc_list_empty(&handler->conf->id_map))
return 0;
if (is_empty_string(rootfs->mnt_opts.userns_path))
return 0;
if (handler->conf->rootfs_setup)
return 0;
if (rootfs_is_blockdev(handler->conf))
return syserror_set(-EOPNOTSUPP, "Idmapped mounts on block-backed storage not yet supported");
if (!can_use_bind_mounts())
return syserror_set(-EOPNOTSUPP, "Kernel does not support the new mount api");
if (rootfs->mnt_opts.userns_self)
fd_userns = dup_cloexec(handler->nsfd[LXC_NS_USER]);
else
fd_userns = open_at(-EBADF, rootfs->mnt_opts.userns_path,
PROTECT_OPEN_WITH_TRAILING_SYMLINKS, 0, 0);
if (fd_userns < 0)
return syserror("Failed to open user namespace");
path_source = lxc_storage_get_path(storage->src, storage->type);
dfd_idmapped = create_detached_idmapped_mount(path_source, fd_userns, true);
if (dfd_idmapped < 0)
return syserror("Failed to create detached idmapped mount");
ret = lxc_abstract_unix_send_fds(handler->data_sock[0], &dfd_idmapped, 1, NULL, 0);
if (ret < 0)
return syserror("Failed to send detached idmapped mount fd");
TRACE("Created detached idmapped mount %d", dfd_idmapped);
return 0;
}
static int add_shmount_to_list(struct lxc_conf *conf) static int add_shmount_to_list(struct lxc_conf *conf)
{ {
char new_mount[PATH_MAX]; char new_mount[PATH_MAX];
...@@ -2197,9 +2237,13 @@ int parse_lxc_mntopts(struct lxc_mount_options *opts, char *mnt_opts) ...@@ -2197,9 +2237,13 @@ int parse_lxc_mntopts(struct lxc_mount_options *opts, char *mnt_opts)
if (is_empty_string(opts->userns_path)) if (is_empty_string(opts->userns_path))
return syserror_set(-EINVAL, "Missing idmap path for \"idmap=<path>\" LXC specific mount option"); return syserror_set(-EINVAL, "Missing idmap path for \"idmap=<path>\" LXC specific mount option");
fd_userns = open(opts->userns_path, O_RDONLY | O_NOCTTY | O_CLOEXEC); if (strequal(opts->userns_path, "container")) {
if (fd_userns < 0) opts->userns_self = 1;
return syserror("Failed to open user namespace"); } else {
fd_userns = open(opts->userns_path, O_RDONLY | O_NOCTTY | O_CLOEXEC);
if (fd_userns < 0)
return syserror("Failed to open user namespace");
}
TRACE("Parse LXC specific mount option %d->\"idmap=%s\"", fd_userns, opts->userns_path); TRACE("Parse LXC specific mount option %d->\"idmap=%s\"", fd_userns, opts->userns_path);
break; break;
...@@ -2790,6 +2834,7 @@ struct lxc_conf *lxc_conf_init(void) ...@@ -2790,6 +2834,7 @@ struct lxc_conf *lxc_conf_init(void)
new->rootfs.dfd_dev = -EBADF; new->rootfs.dfd_dev = -EBADF;
new->rootfs.dfd_host = -EBADF; new->rootfs.dfd_host = -EBADF;
new->rootfs.fd_path_pin = -EBADF; new->rootfs.fd_path_pin = -EBADF;
new->rootfs.dfd_idmapped = -EBADF;
new->rootfs.mnt_opts.userns_fd = -EBADF; new->rootfs.mnt_opts.userns_fd = -EBADF;
new->logfd = -1; new->logfd = -1;
lxc_list_init(&new->cgroup); lxc_list_init(&new->cgroup);
...@@ -3523,12 +3568,40 @@ static int lxc_setup_keyring(struct lsm_ops *lsm_ops, const struct lxc_conf *con ...@@ -3523,12 +3568,40 @@ static int lxc_setup_keyring(struct lsm_ops *lsm_ops, const struct lxc_conf *con
return ret; return ret;
} }
static int lxc_rootfs_prepare_child(struct lxc_handler *handler)
{
struct lxc_rootfs *rootfs = &handler->conf->rootfs;
int dfd_idmapped = -EBADF;
int ret;
if (lxc_list_empty(&handler->conf->id_map))
return 0;
if (is_empty_string(rootfs->mnt_opts.userns_path))
return 0;
if (handler->conf->rootfs_setup)
return 0;
ret = lxc_abstract_unix_recv_one_fd(handler->data_sock[1], &dfd_idmapped, NULL, 0);
if (ret < 0)
return syserror("Failed to receive idmapped mount fd");
rootfs->dfd_idmapped = dfd_idmapped;
TRACE("Received detached idmapped mount %d", rootfs->dfd_idmapped);
return 0;
}
int lxc_setup(struct lxc_handler *handler) int lxc_setup(struct lxc_handler *handler)
{ {
int ret; int ret;
const char *lxcpath = handler->lxcpath, *name = handler->name; const char *lxcpath = handler->lxcpath, *name = handler->name;
struct lxc_conf *lxc_conf = handler->conf; struct lxc_conf *lxc_conf = handler->conf;
ret = lxc_rootfs_prepare_child(handler);
if (ret < 0)
return syserror("Failed to prepare rootfs");
ret = lxc_setup_rootfs_prepare_root(lxc_conf, name, lxcpath); ret = lxc_setup_rootfs_prepare_root(lxc_conf, name, lxcpath);
if (ret < 0) if (ret < 0)
return log_error(-1, "Failed to setup rootfs"); return log_error(-1, "Failed to setup rootfs");
......
...@@ -198,6 +198,7 @@ struct lxc_mount_options { ...@@ -198,6 +198,7 @@ struct lxc_mount_options {
int create_file : 1; int create_file : 1;
int optional : 1; int optional : 1;
int relative : 1; int relative : 1;
int userns_self : 1;
char userns_path[PATH_MAX]; char userns_path[PATH_MAX];
int userns_fd; int userns_fd;
unsigned long mnt_flags; unsigned long mnt_flags;
...@@ -221,6 +222,7 @@ struct lxc_rootfs { ...@@ -221,6 +222,7 @@ struct lxc_rootfs {
char *path; char *path;
int fd_path_pin; int fd_path_pin;
int dfd_idmapped;
int dfd_mnt; int dfd_mnt;
char *mount; char *mount;
...@@ -506,6 +508,7 @@ __hidden extern int lxc_storage_prepare(struct lxc_conf *conf); ...@@ -506,6 +508,7 @@ __hidden extern int lxc_storage_prepare(struct lxc_conf *conf);
__hidden extern int lxc_rootfs_prepare(struct lxc_conf *conf, bool userns); __hidden extern int lxc_rootfs_prepare(struct lxc_conf *conf, bool userns);
__hidden extern void lxc_storage_put(struct lxc_conf *conf); __hidden extern void lxc_storage_put(struct lxc_conf *conf);
__hidden extern int lxc_rootfs_init(struct lxc_conf *conf, bool userns); __hidden extern int lxc_rootfs_init(struct lxc_conf *conf, bool userns);
__hidden extern int lxc_rootfs_prepare_parent(struct lxc_handler *handler);
__hidden extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid); __hidden extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid);
__hidden extern int lxc_create_tty(const char *name, struct lxc_conf *conf); __hidden extern int lxc_create_tty(const char *name, struct lxc_conf *conf);
__hidden extern void lxc_delete_tty(struct lxc_tty_info *ttys); __hidden extern void lxc_delete_tty(struct lxc_tty_info *ttys);
...@@ -581,17 +584,13 @@ static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs) ...@@ -581,17 +584,13 @@ static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs)
return !is_empty_string(rootfs->path) ? rootfs->mount : s; return !is_empty_string(rootfs->path) ? rootfs->mount : s;
} }
static inline bool idmapped_rootfs_mnt(const struct lxc_rootfs *rootfs)
{
return rootfs->mnt_opts.userns_fd >= 0;
}
static inline void put_lxc_mount_options(struct lxc_mount_options *mnt_opts) static inline void put_lxc_mount_options(struct lxc_mount_options *mnt_opts)
{ {
mnt_opts->create_dir = 0; mnt_opts->create_dir = 0;
mnt_opts->create_file = 0; mnt_opts->create_file = 0;
mnt_opts->optional = 0; mnt_opts->optional = 0;
mnt_opts->relative = 0; mnt_opts->relative = 0;
mnt_opts->userns_self = 0;
mnt_opts->userns_path[0] = '\0'; mnt_opts->userns_path[0] = '\0';
mnt_opts->mnt_flags = 0; mnt_opts->mnt_flags = 0;
mnt_opts->prop_flags = 0; mnt_opts->prop_flags = 0;
...@@ -609,6 +608,7 @@ static inline void put_lxc_rootfs(struct lxc_rootfs *rootfs, bool unpin) ...@@ -609,6 +608,7 @@ static inline void put_lxc_rootfs(struct lxc_rootfs *rootfs, bool unpin)
close_prot_errno_disarm(rootfs->mnt_opts.userns_fd); close_prot_errno_disarm(rootfs->mnt_opts.userns_fd);
if (unpin) if (unpin)
close_prot_errno_disarm(rootfs->fd_path_pin); close_prot_errno_disarm(rootfs->fd_path_pin);
close_prot_errno_disarm(rootfs->dfd_idmapped);
put_lxc_mount_options(&rootfs->mnt_opts); put_lxc_mount_options(&rootfs->mnt_opts);
storage_put(rootfs->storage); storage_put(rootfs->storage);
rootfs->storage = NULL; rootfs->storage = NULL;
......
...@@ -243,6 +243,7 @@ int create_detached_idmapped_mount(const char *path, int userns_fd, bool recursi ...@@ -243,6 +243,7 @@ int create_detached_idmapped_mount(const char *path, int userns_fd, bool recursi
struct lxc_mount_attr attr = { struct lxc_mount_attr attr = {
.attr_set = MOUNT_ATTR_IDMAP, .attr_set = MOUNT_ATTR_IDMAP,
.userns_fd = userns_fd, .userns_fd = userns_fd,
.propagation = MS_SLAVE,
}; };
int ret; int ret;
......
...@@ -1786,6 +1786,12 @@ static int lxc_spawn(struct lxc_handler *handler) ...@@ -1786,6 +1786,12 @@ static int lxc_spawn(struct lxc_handler *handler)
} }
} }
ret = lxc_rootfs_prepare_parent(handler);
if (ret) {
ERROR("Failed to prepare rootfs");
goto out_delete_net;
}
if (!lxc_sync_wake_child(handler, START_SYNC_STARTUP)) if (!lxc_sync_wake_child(handler, START_SYNC_STARTUP))
goto out_delete_net; goto out_delete_net;
...@@ -2043,21 +2049,9 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, ...@@ -2043,21 +2049,9 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops,
if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) { if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
/* /*
* This handles two cases: mounting real block devices and * Most filesystems can't be mounted inside a userns so handle them here.
* creating idmapped mounts. The block device case should be
* obivous, i.e. no real filesystem can currently be mounted
* from inside a user namespace.
*
* Idmapped mounts can currently only be created if the caller
* is privileged wrt to the user namespace in which the
* underlying block device has been mounted in. This basically
* (with few exceptions) means we need to be CAP_SYS_ADMIN in
* the initial user namespace since almost no interesting
* filesystems can be mounted inside of user namespaces. This
* is way we need to do the rootfs setup here. In the future
* this may change.
*/ */
if (idmapped_rootfs_mnt(&conf->rootfs) || rootfs_is_blockdev(conf)) { if (rootfs_is_blockdev(conf)) {
ret = unshare(CLONE_NEWNS); ret = unshare(CLONE_NEWNS);
if (ret < 0) { if (ret < 0) {
ERROR("Failed to unshare CLONE_NEWNS"); ERROR("Failed to unshare CLONE_NEWNS");
......
...@@ -127,7 +127,8 @@ bool dir_detect(const char *path) ...@@ -127,7 +127,8 @@ bool dir_detect(const char *path)
int dir_mount(struct lxc_storage *bdev) int dir_mount(struct lxc_storage *bdev)
{ {
struct lxc_mount_options *mnt_opts = &bdev->rootfs->mnt_opts; struct lxc_rootfs *rootfs = bdev->rootfs;
struct lxc_mount_options *mnt_opts = &rootfs->mnt_opts;
__do_free char *mntdata = NULL; __do_free char *mntdata = NULL;
unsigned long mflags = 0; unsigned long mflags = 0;
int ret; int ret;
...@@ -141,22 +142,31 @@ int dir_mount(struct lxc_storage *bdev) ...@@ -141,22 +142,31 @@ int dir_mount(struct lxc_storage *bdev)
src = lxc_storage_get_path(bdev->src, bdev->type); src = lxc_storage_get_path(bdev->src, bdev->type);
if (rootfs->dfd_idmapped >= 0 && !can_use_bind_mounts())
return syserror_set(-EOPNOTSUPP, "Idmapped mount requested but kernel doesn't support new mount API");
if (can_use_bind_mounts()) { if (can_use_bind_mounts()) {
__do_close int fd_source = -EBADF, fd_target = -EBADF; __do_close int fd_source = -EBADF, fd_target = -EBADF;
fd_source = open_at(-EBADF, src, PROTECT_OPATH_DIRECTORY, 0, 0);
if (fd_source < 0)
return syserror("Failed to open \"%s\"", src);
fd_target = open_at(-EBADF, bdev->dest, PROTECT_OPATH_DIRECTORY, 0, 0); fd_target = open_at(-EBADF, bdev->dest, PROTECT_OPATH_DIRECTORY, 0, 0);
if (fd_target < 0) if (fd_target < 0)
return syserror("Failed to open \"%s\"", bdev->dest); return syserror("Failed to open \"%s\"", bdev->dest);
ret = fd_mount_idmapped(fd_source, "", PROTECT_OPATH_DIRECTORY, if (rootfs->dfd_idmapped >= 0) {
PROTECT_LOOKUP_BENEATH, fd_target, "", ret = move_detached_mount(rootfs->dfd_idmapped, fd_target, "",
PROTECT_OPATH_DIRECTORY, PROTECT_OPATH_DIRECTORY,
PROTECT_LOOKUP_BENEATH, 0, PROTECT_LOOKUP_BENEATH);
mnt_opts->userns_fd, true); } else {
fd_source = open_at(-EBADF, src, PROTECT_OPATH_DIRECTORY, 0, 0);
if (fd_source < 0)
return syserror("Failed to open \"%s\"", src);
ret = fd_bind_mount(fd_source, "",
PROTECT_OPATH_DIRECTORY,
PROTECT_LOOKUP_BENEATH, fd_target,
"", PROTECT_OPATH_DIRECTORY,
PROTECT_LOOKUP_BENEATH, 0, true);
}
if (ret < 0) if (ret < 0)
return syserror("Failed to mount \"%s\" onto \"%s\"", src, bdev->dest); return syserror("Failed to mount \"%s\" onto \"%s\"", src, bdev->dest);
} else { } else {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment