Unverified Commit c11af973 by Stéphane Graber Committed by GitHub

Merge pull request #3709 from brauner/2021-03-17/idmapped_mounts_v2

Initial support for idmapped mounts
parents 12cf9f5a fa8e75f0
......@@ -652,8 +652,9 @@ AC_CHECK_HEADER([ifaddrs.h],
AC_HEADER_MAJOR
# Check for some syscalls functions
AC_CHECK_FUNCS([setns pivot_root sethostname unshare rand_r confstr faccessat gettid memfd_create move_mount open_tree execveat clone3 fsopen fspick fsconfig fsmount, openat2, close_range, statvfs])
AC_CHECK_FUNCS([setns pivot_root sethostname unshare rand_r confstr faccessat gettid memfd_create move_mount open_tree execveat clone3 fsopen fspick fsconfig fsmount, openat2, close_range, statvfs, mount_setattr])
AC_CHECK_TYPES([__aligned_u64], [], [], [[#include <linux/types.h>]])
AC_CHECK_TYPES([struct mount_attr], [], [], [[#include <linux/mount.h>]])
AC_CHECK_TYPES([struct open_how], [], [], [[#include <linux/openat2.h>]])
AC_CHECK_TYPES([struct clone_args], [], [], [[#include <linux/sched.h>]])
AC_CHECK_MEMBERS([struct clone_args.set_tid],[],[],[[#include <linux/sched.h>]])
......
......@@ -181,10 +181,12 @@ static struct attach_context *alloc_attach_context(void)
if (!ctx)
return ret_set_errno(NULL, ENOMEM);
ctx->init_pid = -ESRCH;
ctx->dfd_self_pid = -EBADF;
ctx->dfd_init_pid = -EBADF;
ctx->init_pidfd = -EBADF;
ctx->init_pid = -ESRCH;
ctx->setup_ns_uid = LXC_INVALID_UID;
ctx->setup_ns_gid = LXC_INVALID_GID;
ctx->target_ns_uid = LXC_INVALID_UID;
......@@ -192,7 +194,7 @@ static struct attach_context *alloc_attach_context(void)
ctx->target_host_uid = LXC_INVALID_UID;
ctx->target_host_gid = LXC_INVALID_GID;
for (int i = 0; i < LXC_NS_MAX; i++)
for (lxc_namespace_t i = 0; i < LXC_NS_MAX; i++)
ctx->ns_fd[i] = -EBADF;
return ctx;
......@@ -436,7 +438,7 @@ static int get_attach_context(struct attach_context *ctx,
if (options->namespaces == -1)
return log_error_errno(-EINVAL, EINVAL, "Failed to automatically determine the namespaces which the container uses");
for (int i = 0; i < LXC_NS_MAX; i++) {
for (lxc_namespace_t i = 0; i < LXC_NS_MAX; i++) {
if (ns_info[i].clone_flag & CLONE_NEWCGROUP)
if (!(options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) ||
!cgns_supported())
......@@ -531,7 +533,7 @@ static int same_ns(int dfd_pid1, int dfd_pid2, const char *ns_path)
static int __prepare_namespaces_pidfd(struct attach_context *ctx)
{
for (int i = 0; i < LXC_NS_MAX; i++) {
for (lxc_namespace_t i = 0; i < LXC_NS_MAX; i++) {
int ret;
ret = same_nsfd(ctx->dfd_self_pid,
......@@ -559,8 +561,8 @@ static int __prepare_namespaces_pidfd(struct attach_context *ctx)
static int __prepare_namespaces_nsfd(struct attach_context *ctx,
lxc_attach_options_t *options)
{
for (int i = 0; i < LXC_NS_MAX; i++) {
int j;
for (lxc_namespace_t i = 0; i < LXC_NS_MAX; i++) {
lxc_namespace_t j;
if (options->namespaces & ns_info[i].clone_flag)
ctx->ns_fd[i] = open_at(ctx->dfd_init_pid,
......@@ -642,7 +644,7 @@ static int __attach_namespaces_nsfd(struct attach_context *ctx,
{
int fret = 0;
for (int i = 0; i < LXC_NS_MAX; i++) {
for (lxc_namespace_t i = 0; i < LXC_NS_MAX; i++) {
int ret;
if (ctx->ns_fd[i] < 0)
......@@ -670,7 +672,7 @@ static int attach_namespaces(struct attach_context *ctx,
lxc_attach_options_t *options)
{
if (lxc_log_trace()) {
for (int i = 0; i < LXC_NS_MAX; i++) {
for (lxc_namespace_t i = 0; i < LXC_NS_MAX; i++) {
if (ns_info[i].clone_flag & options->namespaces) {
TRACE("Attaching to %s namespace", ns_info[i].proc_name);
continue;
......
......@@ -98,6 +98,10 @@
#include <../include/prlimit.h>
#endif
#ifndef HAVE_STRLCPY
#include "include/strlcpy.h"
#endif
lxc_log_define(conf, lxc);
/*
......@@ -484,11 +488,24 @@ int run_script(const char *name, const char *section, const char *script, ...)
*/
int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns)
{
__do_close int dfd_path = -EBADF, fd_pin = -EBADF;
__do_close int dfd_path = -EBADF, fd_pin = -EBADF, fd_userns = -EBADF;
int ret;
struct stat st;
struct statfs stfs;
if (!is_empty_string(rootfs->mnt_opts.userns_path)) {
if (!rootfs->path)
return syserror_set(-EINVAL, "Idmapped rootfs currently only supported with separate rootfs for container");
if (rootfs->bdev_type && !strequal(rootfs->bdev_type, "dir"))
return syserror_set(-EINVAL, "Idmapped rootfs currently only supports the \"dir\" storage driver");
fd_userns = open_at(-EBADF, rootfs->mnt_opts.userns_path,
PROTECT_OPEN_WITH_TRAILING_SYMLINKS, 0, 0);
if (fd_userns < 0)
return syserror("Failed to open user namespace");
}
if (rootfs->path) {
if (rootfs->bdev_type &&
(strequal(rootfs->bdev_type, "overlay") ||
......@@ -500,13 +517,17 @@ int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns)
dfd_path = open_at(-EBADF, "/", PROTECT_OPATH_FILE, PROTECT_LOOKUP_ABSOLUTE, 0);
}
if (dfd_path < 0)
return log_error_errno(-errno, errno, "Failed to open \"%s\"", rootfs->path);
return syserror("Failed to open \"%s\"", rootfs->path);
if (!rootfs->path)
return log_trace(0, "Not pinning because container does not have a rootfs");
if (!rootfs->path) {
TRACE("Not pinning because container does not have a rootfs");
goto out;
}
if (userns)
return log_trace(0, "Not pinning because container runs in user namespace");
if (userns) {
TRACE("Not pinning because container runs in user namespace");
goto out;
}
ret = fstat(dfd_path, &st);
if (ret < 0)
......@@ -520,7 +541,7 @@ int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns)
PROTECT_LOOKUP_BENEATH,
S_IWUSR | S_IRUSR);
if (fd_pin < 0)
return log_error_errno(-errno, errno, "Failed to pin rootfs");
return syserror("Failed to pin rootfs");
TRACE("Pinned rootfs %d(.lxc_keep)", fd_pin);
......@@ -542,6 +563,7 @@ int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns)
out:
rootfs->fd_path_pin = move_fd(fd_pin);
rootfs->mnt_opts.userns_fd = move_fd(fd_userns);
return 0;
}
......@@ -2090,34 +2112,70 @@ skipremount:
return 0;
}
const char *lxc_mount_options_info[LXC_MOUNT_MAX] = {
"create=dir",
"create=file",
"optional",
"relative",
"idmap=",
};
/* Remove "optional", "create=dir", and "create=file" from mntopt */
static void cull_mntent_opt(struct mntent *mntent)
int parse_lxc_mntopts(struct lxc_mount_options *opts, char *mnt_opts)
{
int i;
char *list[] = {
"create=dir",
"create=file",
"optional",
"relative",
NULL
};
for (i = 0; list[i]; i++) {
char *p, *p2;
for (size_t i = LXC_MOUNT_CREATE_DIR; i < LXC_MOUNT_MAX; i++) {
__do_close int fd_userns = -EBADF;
const char *opt_name = lxc_mount_options_info[i];
size_t len;
char *idmap_path, *p, *p2;
p = strstr(mntent->mnt_opts, list[i]);
p = strstr(mnt_opts, opt_name);
if (!p)
continue;
p2 = strchr(p, ',');
if (!p2) {
/* no more mntopts, so just chop it here */
*p = '\0';
continue;
switch (i) {
case LXC_MOUNT_CREATE_DIR:
opts->create_dir = 1;
break;
case LXC_MOUNT_CREATE_FILE:
opts->create_file = 1;
break;
case LXC_MOUNT_OPTIONAL:
opts->optional = 1;
break;
case LXC_MOUNT_RELATIVE:
opts->relative = 1;
break;
case LXC_MOUNT_IDMAP:
p2 = p;
p2 += STRLITERALLEN("idmap=");
idmap_path = strchrnul(p2, ',');
len = strlcpy(opts->userns_path, p2, idmap_path - p2 + 1);
if (len >= sizeof(opts->userns_path))
return syserror_set(-EIO, "Excessive idmap path length for \"idmap=<path>\" LXC specific mount option");
if (is_empty_string(opts->userns_path))
return syserror_set(-EINVAL, "Missing idmap path for \"idmap=<path>\" LXC specific mount option");
fd_userns = open(opts->userns_path, O_RDONLY | O_NOCTTY | O_CLOEXEC);
if (fd_userns < 0)
return syserror("Failed to open user namespace");
TRACE("Parse LXC specific mount option %d->\"idmap=%s\"", fd_userns, opts->userns_path);
break;
default:
return syserror_set(-EINVAL, "Unknown LXC specific mount option");
}
memmove(p, p2 + 1, strlen(p2 + 1) + 1);
p2 = strchr(p, ',');
if (!p2)
*p = '\0'; /* no more mntopts, so just chop it here */
else
memmove(p, p2 + 1, strlen(p2 + 1) + 1);
}
return 0;
}
static int mount_entry_create_dir_file(const struct mntent *mntent,
......@@ -2178,6 +2236,7 @@ static inline int mount_entry_on_generic(struct mntent *mntent,
char *rootfs_path = NULL;
int ret;
bool dev, optional, relative;
struct lxc_mount_options opts = {};
optional = hasmntopt(mntent, "optional") != NULL;
dev = hasmntopt(mntent, "dev") != NULL;
......@@ -2194,7 +2253,13 @@ static inline int mount_entry_on_generic(struct mntent *mntent,
return -1;
}
cull_mntent_opt(mntent);
ret = parse_lxc_mntopts(&opts, mntent->mnt_opts);
if (ret < 0)
return ret;
if (!is_empty_string(opts.userns_path))
return syserror_set(-EINVAL, "Idmapped mount entries not yet supported");
ret = parse_propagationopts(mntent->mnt_opts, &pflags);
if (ret < 0)
......@@ -2686,6 +2751,7 @@ struct lxc_conf *lxc_conf_init(void)
new->rootfs.dfd_dev = -EBADF;
new->rootfs.dfd_host = -EBADF;
new->rootfs.fd_path_pin = -EBADF;
new->rootfs.mnt_opts.userns_fd = -EBADF;
new->logfd = -1;
lxc_list_init(&new->cgroup);
lxc_list_init(&new->cgroup2);
......
......@@ -181,6 +181,26 @@ struct lxc_tty_info {
struct lxc_terminal_info *tty;
};
typedef enum lxc_mount_options_t {
LXC_MOUNT_CREATE_DIR = 0,
LXC_MOUNT_CREATE_FILE = 1,
LXC_MOUNT_OPTIONAL = 2,
LXC_MOUNT_RELATIVE = 3,
LXC_MOUNT_IDMAP = 4,
LXC_MOUNT_MAX = 5,
} lxc_mount_options_t;
__hidden extern const char *lxc_mount_options_info[LXC_MOUNT_MAX];
struct lxc_mount_options {
int create_dir : 1;
int create_file : 1;
int optional : 1;
int relative : 1;
char userns_path[PATH_MAX];
int userns_fd;
};
/* Defines a structure to store the rootfs location, the
* optionals pivot_root, rootfs mount paths
* @path : the rootfs source (directory or device)
......@@ -211,6 +231,7 @@ struct lxc_rootfs {
unsigned long mountflags;
char *data;
bool managed;
struct lxc_mount_options mnt_opts;
};
/*
......@@ -509,6 +530,7 @@ __hidden extern int userns_exec_full(struct lxc_conf *conf, int (*fn)(void *), v
const char *fn_name);
__hidden extern int parse_mntopts(const char *mntopts, unsigned long *mntflags, char **mntdata);
__hidden extern int parse_propagationopts(const char *mntopts, unsigned long *pflags);
__hidden extern int parse_lxc_mntopts(struct lxc_mount_options *opts, char *mnt_opts);
__hidden extern void tmp_proc_unmount(struct lxc_conf *lxc_conf);
__hidden extern void suggest_default_idmap(void);
__hidden extern FILE *make_anonymous_mount_file(struct lxc_list *mount, bool include_nesting_helpers);
......@@ -554,12 +576,18 @@ static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs)
return !is_empty_string(rootfs->path) ? rootfs->mount : s;
}
static inline bool idmapped_rootfs_mnt(const struct lxc_rootfs *rootfs)
{
return rootfs->mnt_opts.userns_fd >= 0;
}
static inline void put_lxc_rootfs(struct lxc_rootfs *rootfs, bool unpin)
{
if (rootfs) {
close_prot_errno_disarm(rootfs->dfd_host);
close_prot_errno_disarm(rootfs->dfd_mnt);
close_prot_errno_disarm(rootfs->dfd_dev);
close_prot_errno_disarm(rootfs->mnt_opts.userns_fd);
if (unpin)
close_prot_errno_disarm(rootfs->fd_path_pin);
}
......
......@@ -2790,7 +2790,7 @@ static int set_config_rootfs_mount(const char *key, const char *value,
static int set_config_rootfs_options(const char *key, const char *value,
struct lxc_conf *lxc_conf, void *data)
{
__do_free char *mdata = NULL, *opts = NULL;
__do_free char *dup = NULL, *mdata = NULL, *opts = NULL;
unsigned long mflags = 0, pflags = 0;
struct lxc_rootfs *rootfs = &lxc_conf->rootfs;
int ret;
......@@ -2799,18 +2799,30 @@ static int set_config_rootfs_options(const char *key, const char *value,
if (lxc_config_value_empty(value))
return 0;
ret = parse_mntopts(value, &mflags, &mdata);
dup = strdup(value);
if (!dup)
return -ENOMEM;
ret = parse_lxc_mntopts(&rootfs->mnt_opts, dup);
if (ret < 0)
return ret;
ret = parse_mntopts(dup, &mflags, &mdata);
if (ret < 0)
return ret_errno(EINVAL);
ret = parse_propagationopts(value, &pflags);
ret = parse_propagationopts(dup, &pflags);
if (ret < 0)
return ret_errno(EINVAL);
ret = set_config_string_item(&opts, value);
ret = set_config_string_item(&opts, dup);
if (ret < 0)
return ret_errno(ENOMEM);
if (rootfs->mnt_opts.create_dir || rootfs->mnt_opts.create_file ||
rootfs->mnt_opts.optional || rootfs->mnt_opts.relative)
return syserror_set(-EINVAL, "Invalid LXC specifc mount option for rootfs mount");
rootfs->mountflags = mflags | pflags;
rootfs->options = move_ptr(opts);
rootfs->data = move_ptr(mdata);
......
......@@ -1165,15 +1165,15 @@ static int apparmor_process_label_fd_get(struct lsm_ops *ops, pid_t pid, bool on
static int apparmor_process_label_set_at(struct lsm_ops *ops, int label_fd, const char *label, bool on_exec)
{
__do_free char *command = NULL;
int ret = -1;
size_t len;
__do_free char *command = NULL;
if (on_exec)
log_trace(0, "Changing AppArmor profile on exec not supported");
TRACE("Changing AppArmor profile on exec not supported");
len = strlen(label) + strlen("changeprofile ") + 1;
command = malloc(len);
command = zalloc(len);
if (!command)
return ret_errno(ENOMEM);
......
......@@ -236,16 +236,76 @@ int fs_attach(int fd_fs,
return 0;
}
int fd_bind_mount(int dfd_from, const char *path_from,
__u64 o_flags_from, __u64 resolve_flags_from,
int dfd_to, const char *path_to,
__u64 o_flags_to, __u64 resolve_flags_to,
unsigned int attr_flags, bool recursive)
int create_detached_idmapped_mount(const char *path, int userns_fd, bool recursive)
{
__do_close int fd_tree_from = -EBADF;
unsigned int open_tree_flags = OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC;
struct lxc_mount_attr attr = {
.attr_set = MOUNT_ATTR_IDMAP,
.userns_fd = userns_fd,
};
int ret;
TRACE("Idmapped mount \"%s\" requested with user namespace fd %d", path, userns_fd);
if (recursive)
open_tree_flags |= AT_RECURSIVE;
fd_tree_from = open_tree(-EBADF, path, open_tree_flags);
if (fd_tree_from < 0)
return syserror("Failed to create detached mount");
ret = mount_setattr(fd_tree_from, "",
AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0),
&attr, sizeof(attr));
if (ret < 0)
return syserror("Failed to change mount attributes");
return move_fd(fd_tree_from);
}
int move_detached_mount(int dfd_from, int dfd_to, const char *path_to,
__u64 o_flags_to, __u64 resolve_flags_to)
{
__do_close int __fd_to = -EBADF;
int fd_to, ret;
if (!is_empty_string(path_to)) {
struct lxc_open_how how = {
.flags = o_flags_to,
.resolve = resolve_flags_to,
};
__fd_to = openat2(dfd_to, path_to, &how, sizeof(how));
if (__fd_to < 0)
return -errno;
fd_to = __fd_to;
} else {
fd_to = dfd_to;
}
ret = move_mount(dfd_from, "", fd_to, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH);
if (ret)
return syserror("Failed to attach detached mount %d to filesystem at %d", dfd_from, fd_to);
TRACE("Attach detached mount %d to filesystem at %d", dfd_from, fd_to);
return 0;
}
static int __fd_bind_mount(int dfd_from, const char *path_from,
__u64 o_flags_from, __u64 resolve_flags_from,
int dfd_to, const char *path_to, __u64 o_flags_to,
__u64 resolve_flags_to, unsigned int attr_flags,
int userns_fd, bool recursive)
{
__do_close int __fd_from = -EBADF, __fd_to = -EBADF;
struct lxc_mount_attr attr = {
.attr_set = attr_flags,
};
__do_close int __fd_from = -EBADF;
__do_close int fd_tree_from = -EBADF;
unsigned int open_tree_flags = AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC;
int fd_from, fd_to, ret;
int fd_from, ret;
if (!is_empty_string(path_from)) {
struct lxc_open_how how = {
......@@ -266,28 +326,46 @@ int fd_bind_mount(int dfd_from, const char *path_from,
fd_tree_from = open_tree(fd_from, "", open_tree_flags);
if (fd_tree_from < 0)
return log_error_errno(-errno, errno, "Failed to create detached mount");
return syserror("Failed to create detached mount");
if (!is_empty_string(path_to)) {
struct lxc_open_how how = {
.flags = o_flags_to,
.resolve = resolve_flags_to,
};
if (userns_fd >= 0) {
attr.attr_set |= MOUNT_ATTR_IDMAP;
attr.userns_fd = userns_fd;
TRACE("Idmapped mount requested with user namespace fd %d", userns_fd);
}
__fd_to = openat2(dfd_to, path_to, &how, sizeof(how));
if (__fd_to < 0)
return -errno;
fd_to = __fd_to;
} else {
fd_to = dfd_to;
if (attr.attr_set) {
ret = mount_setattr(fd_tree_from, "",
AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0),
&attr, sizeof(attr));
if (ret < 0)
return syserror("Failed to change mount attributes");
}
ret = move_mount(fd_tree_from, "", fd_to, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH);
if (ret)
return log_error_errno(-errno, errno, "Failed to attach detached mount %d to filesystem at %d", fd_tree_from, fd_to);
return move_detached_mount(fd_tree_from, dfd_to, path_to, o_flags_to,
resolve_flags_to);
}
TRACE("Attach detached mount %d to filesystem at %d", fd_tree_from, fd_to);
return 0;
int fd_mount_idmapped(int dfd_from, const char *path_from,
__u64 o_flags_from, __u64 resolve_flags_from,
int dfd_to, const char *path_to,
__u64 o_flags_to, __u64 resolve_flags_to,
unsigned int attr_flags, int userns_fd, bool recursive)
{
return __fd_bind_mount(dfd_from, path_from, o_flags_from, resolve_flags_from,
dfd_to, path_to, o_flags_to, resolve_flags_to,
attr_flags, userns_fd, recursive);
}
int fd_bind_mount(int dfd_from, const char *path_from,
__u64 o_flags_from, __u64 resolve_flags_from,
int dfd_to, const char *path_to,
__u64 o_flags_to, __u64 resolve_flags_to,
unsigned int attr_flags, bool recursive)
{
return __fd_bind_mount(dfd_from, path_from, o_flags_from, resolve_flags_from,
dfd_to, path_to, o_flags_to, resolve_flags_to,
attr_flags, -EBADF, recursive);
}
int calc_remount_flags_new(int dfd_from, const char *path_from,
......@@ -488,3 +566,28 @@ bool can_use_mount_api(void)
return supported == 1;
}
bool can_use_bind_mounts(void)
{
static int supported = -1;
if (supported == -1) {
int ret;
if (!can_use_mount_api()) {
supported = 0;
return false;
}
ret = mount_setattr(-EBADF, NULL, 0, NULL, 0);
if (!ret || errno == ENOSYS) {
supported = 0;
return false;
}
supported = 1;
TRACE("Kernel supports bind mounts in the new mount api");
}
return supported == 1;
}
......@@ -152,6 +152,10 @@
#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */
#endif
#ifndef MOUNT_ATTR_IDMAP
#define MOUNT_ATTR_IDMAP 0x00100000
#endif
__hidden extern int mnt_attributes_new(unsigned int old_flags, unsigned int *new_flags);
__hidden extern int mnt_attributes_old(unsigned int new_flags, unsigned int *old_flags);
......@@ -185,6 +189,18 @@ __hidden extern int fd_bind_mount(int dfd_from, const char *path_from,
__u64 o_flags_to, __u64 resolve_flags_to,
unsigned int attr_flags, bool recursive);
__hidden extern int fd_mount_idmapped(int dfd_from, const char *path_from,
__u64 o_flags_from, __u64 resolve_flags_from,
int dfd_to, const char *path_to,
__u64 o_flags_to, __u64 resolve_flags_to,
unsigned int attr_flags, int userns_fd,
bool recursive);
__hidden extern int create_detached_idmapped_mount(const char *path,
int userns_fd, bool recursive);
__hidden extern int move_detached_mount(int dfd_from, int dfd_to,
const char *path_to, __u64 o_flags_to,
__u64 resolve_flags_to);
__hidden extern int calc_remount_flags_new(int dfd_from, const char *path_from,
__u64 o_flags_from,
__u64 resolve_flags_from,
......@@ -202,5 +218,6 @@ __hidden extern unsigned long add_required_remount_flags(const char *s,
unsigned long flags);
__hidden extern bool can_use_mount_api(void);
__hidden extern bool can_use_bind_mounts(void);
#endif /* __LXC_MOUNT_UTILS_H */
......@@ -1645,16 +1645,6 @@ static int lxc_spawn(struct lxc_handler *handler)
goto out_delete_net;
}
/* If the rootfs is not a blockdev, prevent the container from marking
* it readonly.
* If the container is unprivileged then skip rootfs pinning.
*/
ret = lxc_rootfs_prepare(&conf->rootfs, wants_to_map_ids);
if (ret) {
ERROR("Failed to handle rootfs pinning for container \"%s\"", handler->name);
goto out_delete_net;
}
/* Create a process in a new set of namespaces. */
if (share_ns) {
pid_t attacher_pid;
......@@ -2040,9 +2030,34 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops,
goto out_abort;
}
/* If the rootfs is not a blockdev, prevent the container from marking
* it readonly.
* If the container is unprivileged then skip rootfs pinning.
*/
ret = lxc_rootfs_prepare(&conf->rootfs, !lxc_list_empty(&conf->id_map));
if (ret) {
ERROR("Failed to handle rootfs pinning for container \"%s\"", handler->name);
ret = -1;
goto out_abort;
}
if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
/* If the backing store is a device, mount it here and now. */
if (rootfs_is_blockdev(conf)) {
/*
* This handles two cases: mounting real block devices and
* creating idmapped mounts. The block device case should be
* obivous, i.e. no real filesystem can currently be mounted
* from inside a user namespace.
*
* Idmapped mounts can currently only be created if the caller
* is privileged wrt to the user namespace in which the
* underlying block device has been mounted in. This basically
* (with few exceptions) means we need to be CAP_SYS_ADMIN in
* the initial user namespace since almost no interesting
* filesystems can be mounted inside of user namespaces. This
* is way we need to do the rootfs setup here. In the future
* this may change.
*/
if (idmapped_rootfs_mnt(&conf->rootfs) || rootfs_is_blockdev(conf)) {
ret = unshare(CLONE_NEWNS);
if (ret < 0) {
ERROR("Failed to unshare CLONE_NEWNS");
......
......@@ -148,23 +148,46 @@ int dir_mount(struct lxc_storage *bdev)
src = lxc_storage_get_path(bdev->src, bdev->type);
ret = mount(src, bdev->dest, "bind", MS_BIND | MS_REC | mntflags | pflags, mntdata);
if (ret < 0)
return log_error_errno(-errno, errno, "Failed to mount \"%s\" on \"%s\"", src, bdev->dest);
if (ret == 0 && (mntflags & MS_RDONLY)) {
mflags = add_required_remount_flags(src, bdev->dest, MS_BIND | MS_REC | mntflags | pflags | MS_REMOUNT);
ret = mount(src, bdev->dest, "bind", mflags, mntdata);
if (can_use_bind_mounts()) {
__do_close int fd_source = -EBADF, fd_target = -EBADF;
fd_source = open_at(-EBADF, src, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_ABSOLUTE, 0);
if (fd_source < 0)
return syserror("Failed to open \"%s\"", src);
fd_target = open_at(-EBADF, bdev->dest, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_ABSOLUTE, 0);
if (fd_target < 0)
return syserror("Failed to open \"%s\"", bdev->dest);
ret = fd_mount_idmapped(fd_source, "", PROTECT_OPATH_DIRECTORY,
PROTECT_LOOKUP_BENEATH, fd_target, "",
PROTECT_OPATH_DIRECTORY,
PROTECT_LOOKUP_BENEATH, 0,
bdev->rootfs->mnt_opts.userns_fd, true);
if (ret < 0)
return syserror("Failed to mount \"%s\" onto \"%s\"", src, bdev->dest);
} else {
ret = mount(src, bdev->dest, "bind", MS_BIND | MS_REC | mntflags | pflags, mntdata);
if (ret < 0)
return log_error_errno(-errno, errno, "Failed to remount \"%s\" on \"%s\" read-only with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
else
DEBUG("Remounted \"%s\" on \"%s\" read-only with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
return log_error_errno(-errno, errno, "Failed to mount \"%s\" on \"%s\"", src, bdev->dest);
if (ret == 0 && (mntflags & MS_RDONLY)) {
mflags = add_required_remount_flags(src, bdev->dest, MS_BIND | MS_REC | mntflags | pflags | MS_REMOUNT);
ret = mount(src, bdev->dest, "bind", mflags, mntdata);
if (ret < 0)
return log_error_errno(-errno, errno, "Failed to remount \"%s\" on \"%s\" read-only with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
else
DEBUG("Remounted \"%s\" on \"%s\" read-only with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
}
TRACE("Mounted \"%s\" on \"%s\" with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
}
TRACE("Mounted \"%s\" on \"%s\" with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
TRACE("Mounted \"%s\" onto \"%s\"", src, bdev->dest);
return 0;
}
......
......@@ -598,14 +598,13 @@ struct lxc_storage *storage_init(struct lxc_conf *conf)
if (!q)
return NULL;
bdev = malloc(sizeof(struct lxc_storage));
bdev = zalloc(sizeof(struct lxc_storage));
if (!bdev)
return NULL;
memset(bdev, 0, sizeof(struct lxc_storage));
bdev->ops = q->ops;
bdev->type = q->name;
bdev->ops = q->ops;
bdev->type = q->name;
bdev->rootfs = &conf->rootfs;
if (mntopts)
bdev->mntopts = strdup(mntopts);
......
......@@ -15,6 +15,7 @@
#endif
#include "compiler.h"
#include "conf.h"
#ifndef MS_DIRSYNC
#define MS_DIRSYNC 128
......@@ -87,6 +88,7 @@ struct lxc_storage {
/* index for the connected nbd device. */
int nbd_idx;
int flags;
struct lxc_rootfs *rootfs;
};
/**
......
......@@ -680,4 +680,24 @@
#endif
#endif
#ifndef __NR_mount_setattr
#if defined __alpha__
#define __NR_mount_setattr 552
#elif defined _MIPS_SIM
#if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
#define __NR_mount_setattr (442 + 4000)
#endif
#if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
#define __NR_mount_setattr (442 + 6000)
#endif
#if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
#define __NR_mount_setattr (442 + 5000)
#endif
#elif defined __ia64__
#define __NR_mount_setattr (442 + 1024)
#else
#define __NR_mount_setattr 442
#endif
#endif
#endif /* __LXC_SYSCALL_NUMBERS_H */
......@@ -209,6 +209,24 @@ extern int fsmount(int fs_fd, unsigned int flags, unsigned int attr_flags);
#endif
/*
* mount_setattr()
*/
struct lxc_mount_attr {
__u64 attr_set;
__u64 attr_clr;
__u64 propagation;
__u64 userns_fd;
};
#ifndef HAVE_MOUNT_SETATTR
static inline int mount_setattr(int dfd, const char *path, unsigned int flags,
struct lxc_mount_attr *attr, size_t size)
{
return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
}
#endif
/*
* Arguments for how openat2(2) should open the target path. If only @flags and
* @mode are non-zero, then openat2(2) operates very similarly to openat(2).
*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment