Unverified Commit 642429e5 by Stéphane Graber Committed by GitHub

Merge pull request #3662 from brauner/2021-02-08/fixes

conf: expand fd-only setup codepaths
parents 01149adf 58b38111
......@@ -644,7 +644,7 @@ AC_CHECK_HEADER([ifaddrs.h],
AC_HEADER_MAJOR
# Check for some syscalls functions
AC_CHECK_FUNCS([setns pivot_root sethostname unshare rand_r confstr faccessat gettid memfd_create move_mount open_tree execveat clone3 fsopen fspick fsconfig fsmount, openat2, close_range])
AC_CHECK_FUNCS([setns pivot_root sethostname unshare rand_r confstr faccessat gettid memfd_create move_mount open_tree execveat clone3 fsopen fspick fsconfig fsmount, openat2, close_range, statvfs])
AC_CHECK_TYPES([struct open_how], [], [], [[#include <linux/openat2.h>]])
AC_CHECK_TYPES([struct clone_args], [], [], [[#include <linux/sched.h>]])
AC_CHECK_MEMBERS([struct clone_args.set_tid],[],[],[[#include <linux/sched.h>]])
......@@ -684,7 +684,6 @@ fi
# Check for some functions
AC_CHECK_LIB(pthread, main)
AC_CHECK_FUNCS(statvfs)
AC_CHECK_LIB(util, openpty)
AC_CHECK_FUNCS([hasmntopt setmntent endmntent utmpxname])
AC_CHECK_FUNCS([getgrgid_r],
......
......@@ -477,105 +477,74 @@ int run_script(const char *name, const char *section, const char *script, ...)
return run_buffer(buffer);
}
/* pin_rootfs
/* lxc_rootfs_prepare
* if rootfs is a directory, then open ${rootfs}/.lxc-keep for writing for
* the duration of the container run, to prevent the container from marking
* the underlying fs readonly on shutdown. unlink the file immediately so
* no name pollution is happens.
* don't unlink on NFS to avoid random named stale handles.
* return -1 on error.
* return -2 if nothing needed to be pinned.
* return an open fd (>=0) if we pinned it.
*/
int pin_rootfs(const char *rootfs)
int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns)
{
__do_free char *absrootfs = NULL;
int fd, ret;
char absrootfspin[PATH_MAX];
struct stat s;
struct statfs sfs;
if (rootfs == NULL || strlen(rootfs) == 0)
return -2;
absrootfs = realpath(rootfs, NULL);
if (!absrootfs)
return -2;
__do_close int dfd_path = -EBADF, fd_pin = -EBADF;
int ret;
struct stat st;
struct statfs stfs;
ret = stat(absrootfs, &s);
if (ret < 0)
return -1;
if (rootfs->path) {
if (rootfs->bdev_type &&
(!strcmp(rootfs->bdev_type, "overlay") ||
!strcmp(rootfs->bdev_type, "overlayfs")))
return log_trace_errno(0, EINVAL, "Not pinning on stacking filesystem");
if (!S_ISDIR(s.st_mode))
return -2;
dfd_path = open_at(-EBADF, rootfs->path, PROTECT_OPATH_FILE, 0, 0);
} else {
dfd_path = open_at(-EBADF, "/", PROTECT_OPATH_FILE, PROTECT_LOOKUP_ABSOLUTE, 0);
}
if (dfd_path < 0)
return log_error_errno(-errno, errno, "Failed to open \"%s\"", rootfs->path);
ret = snprintf(absrootfspin, sizeof(absrootfspin), "%s/.lxc-keep", absrootfs);
if (ret < 0 || (size_t)ret >= sizeof(absrootfspin))
return -1;
if (!rootfs->path)
return log_trace(0, "Not pinning because container does not have a rootfs");
fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR | S_IRUSR | O_CLOEXEC);
if (fd < 0)
return fd;
if (userns)
return log_trace(0, "Not pinning because container runs in user namespace");
ret = fstatfs (fd, &sfs);
ret = fstat(dfd_path, &st);
if (ret < 0)
return fd;
return log_trace_errno(-errno, errno, "Failed to retrieve file status");
if (sfs.f_type == NFS_SUPER_MAGIC)
return log_debug(fd, "Rootfs on NFS, not unlinking pin file \"%s\"", absrootfspin);
if (!S_ISDIR(st.st_mode))
return log_trace_errno(0, ENOTDIR, "Not pinning because file descriptor is not a directory");
(void)unlink(absrootfspin);
fd_pin = open_at(dfd_path, ".lxc_keep",
PROTECT_OPEN | O_CREAT,
PROTECT_LOOKUP_BENEATH,
S_IWUSR | S_IRUSR);
if (fd_pin < 0)
return log_error_errno(-errno, errno, "Failed to pin rootfs");
return fd;
}
TRACE("Pinned rootfs %d(.lxc_keep)", fd_pin);
/* If we are asking to remount something, make sure that any NOEXEC etc are
* honored.
*/
unsigned long add_required_remount_flags(const char *s, const char *d,
unsigned long flags)
{
#ifdef HAVE_STATVFS
int ret;
struct statvfs sb;
unsigned long required_flags = 0;
ret = fstatfs(fd_pin, &stfs);
if (ret < 0) {
SYSWARN("Failed to retrieve filesystem status");
goto out;
}
if (!s)
s = d;
if (stfs.f_type == NFS_SUPER_MAGIC) {
DEBUG("Not unlinking pinned file on NFS");
goto out;
}
if (!s)
return flags;
if (unlinkat(dfd_path, ".lxc_keep", 0))
SYSTRACE("Failed to unlink rootfs pinning file %d(.lxc_keep)", dfd_path);
else
TRACE("Unlinked pinned file %d(.lxc_keep)", dfd_path);
ret = statvfs(s, &sb);
if (ret < 0)
return flags;
if (flags & MS_REMOUNT) {
if (sb.f_flag & MS_NOSUID)
required_flags |= MS_NOSUID;
if (sb.f_flag & MS_NODEV)
required_flags |= MS_NODEV;
if (sb.f_flag & MS_RDONLY)
required_flags |= MS_RDONLY;
if (sb.f_flag & MS_NOEXEC)
required_flags |= MS_NOEXEC;
}
if (sb.f_flag & MS_NOATIME)
required_flags |= MS_NOATIME;
if (sb.f_flag & MS_NODIRATIME)
required_flags |= MS_NODIRATIME;
if (sb.f_flag & MS_LAZYTIME)
required_flags |= MS_LAZYTIME;
if (sb.f_flag & MS_RELATIME)
required_flags |= MS_RELATIME;
if (sb.f_flag & MS_STRICTATIME)
required_flags |= MS_STRICTATIME;
return flags | required_flags;
#else
return flags;
#endif
out:
rootfs->fd_path_pin = move_fd(fd_pin);
return 0;
}
static int add_shmount_to_list(struct lxc_conf *conf)
......@@ -837,70 +806,101 @@ static bool append_ttyname(char **pp, char *name)
static int lxc_setup_ttys(struct lxc_conf *conf)
{
int i, ret;
int ret;
struct lxc_rootfs *rootfs = &conf->rootfs;
const struct lxc_tty_info *ttys = &conf->ttys;
char *ttydir = ttys->dir;
char path[PATH_MAX], lxcpath[PATH_MAX];
if (!conf->rootfs.path)
return 0;
for (i = 0; i < ttys->max; i++) {
for (int i = 0; i < ttys->max; i++) {
__do_close int fd_to = -EBADF;
struct lxc_terminal_info *tty = &ttys->tty[i];
ret = snprintf(path, sizeof(path), "/dev/tty%d", i + 1);
if (ret < 0 || (size_t)ret >= sizeof(path))
return -1;
if (ttydir) {
/* create dev/lxc/tty%d" */
ret = snprintf(lxcpath, sizeof(lxcpath),
"/dev/%s/tty%d", ttydir, i + 1);
if (ret < 0 || (size_t)ret >= sizeof(lxcpath))
return -1;
ret = mknod(lxcpath, S_IFREG | 0000, 0);
if (ret < 0 && errno != EEXIST) {
SYSERROR("Failed to create \"%s\"", lxcpath);
return -1;
}
ret = unlink(path);
if (ret < 0 && errno != ENOENT) {
SYSERROR("Failed to unlink \"%s\"", path);
return -1;
}
char *tty_name, *tty_path;
ret = mount(tty->name, lxcpath, "none", MS_BIND, 0);
if (ret < 0) {
SYSWARN("Failed to bind mount \"%s\" onto \"%s\"", tty->name, lxcpath);
continue;
ret = snprintf(rootfs->buf, sizeof(rootfs->buf),
"/dev/%s/tty%d", ttydir, i + 1);
if (ret < 0 || (size_t)ret >= sizeof(rootfs->buf))
return ret_errno(-EIO);
tty_path = &rootfs->buf[STRLITERALLEN("/dev/")];
tty_name = tty_path + strlen(ttydir) + 1;
/* create bind-mount target */
fd_to = open_at(rootfs->dfd_dev, tty_path,
PROTECT_OPEN_W | O_CREAT,
PROTECT_LOOKUP_BENEATH, 0);
if (fd_to < 0)
return log_error_errno(-errno, errno,
"Failed to create tty mount target %d(%s)",
rootfs->dfd_dev, tty_path);
ret = unlinkat(rootfs->dfd_dev, tty_name, 0);
if (ret < 0 && errno != ENOENT)
return log_error_errno(-errno, errno,
"Failed to unlink %d(%s)",
rootfs->dfd_dev, tty_name);
if (new_mount_api()) {
ret = fd_bind_mount(tty->pty, "",
PROTECT_OPATH_FILE,
PROTECT_LOOKUP_BENEATH_XDEV,
fd_to, "",
PROTECT_OPATH_FILE,
PROTECT_LOOKUP_BENEATH_XDEV, 0,
false);
} else {
ret = mount(tty->name, rootfs->buf, "none", MS_BIND, 0);
}
DEBUG("Bind mounted \"%s\" onto \"%s\"", tty->name, lxcpath);
ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d",
ttydir, i + 1);
if (ret < 0 || (size_t)ret >= sizeof(lxcpath))
return -1;
if (ret < 0)
return log_error_errno(-errno, errno,
"Failed to bind mount \"%s\" onto \"%s\"",
tty->name, rootfs->buf);
DEBUG("Bind mounted \"%s\" onto \"%s\"", tty->name, rootfs->buf);
ret = symlink(lxcpath, path);
ret = symlinkat(tty_path, rootfs->dfd_dev, tty_name);
if (ret < 0)
return log_error_errno(-1, errno, "Failed to create symlink \"%s\" -> \"%s\"", path, lxcpath);
return log_error_errno(-errno, errno,
"Failed to create symlink \"%d(%s)\" -> \"%d(%s)\"",
rootfs->dfd_dev, tty_name,
rootfs->dfd_dev, tty_path);
} else {
/* If we populated /dev, then we need to create
* /dev/ttyN
*/
ret = mknod(path, S_IFREG | 0000, 0);
if (ret < 0) /* this isn't fatal, continue */
SYSERROR("Failed to create \"%s\"", path);
ret = mount(tty->name, path, "none", MS_BIND, 0);
if (ret < 0) {
SYSERROR("Failed to mount '%s'->'%s'", tty->name, path);
continue;
ret = snprintf(rootfs->buf, sizeof(rootfs->buf), "tty%d", i + 1);
if (ret < 0 || (size_t)ret >= sizeof(rootfs->buf))
return ret_errno(-EIO);
/* If we populated /dev, then we need to create /dev/tty<idx>. */
fd_to = open_at(rootfs->dfd_dev, rootfs->buf,
PROTECT_OPEN_W | O_CREAT,
PROTECT_LOOKUP_BENEATH, 0);
if (fd_to < 0)
return log_error_errno(-errno, errno,
"Failed to create tty mount target %d(%s)",
rootfs->dfd_dev, rootfs->buf);
if (new_mount_api()) {
ret = fd_bind_mount(tty->pty, "",
PROTECT_OPATH_FILE,
PROTECT_LOOKUP_BENEATH_XDEV,
fd_to, "",
PROTECT_OPATH_FILE,
PROTECT_LOOKUP_BENEATH, 0,
false);
} else {
ret = snprintf(rootfs->buf, sizeof(rootfs->buf), "/dev/tty%d", i + 1);
if (ret < 0 || (size_t)ret >= sizeof(rootfs->buf))
return ret_errno(-EIO);
ret = mount(tty->name, rootfs->buf, "none", MS_BIND, 0);
}
DEBUG("Bind mounted \"%s\" onto \"%s\"", tty->name, path);
if (ret < 0)
return log_error_errno(-errno, errno,
"Failed to bind mount \"%s\" onto \"%s\"",
tty->name, rootfs->buf);
DEBUG("Bind mounted \"%s\" onto \"%s\"", tty->name, rootfs->buf);
}
if (!append_ttyname(&conf->ttys.tty_names, tty->name))
......@@ -911,13 +911,11 @@ static int lxc_setup_ttys(struct lxc_conf *conf)
return 0;
}
define_cleanup_function(struct lxc_tty_info *, lxc_delete_tty);
static int lxc_allocate_ttys(struct lxc_conf *conf)
{
struct lxc_terminal_info *tty_new = NULL;
int ret;
call_cleaner(lxc_delete_tty) struct lxc_tty_info *ttys = &conf->ttys;
struct lxc_tty_info *ttys = &conf->ttys;
/* no tty in the configuration */
if (ttys->max == 0)
......@@ -926,27 +924,25 @@ static int lxc_allocate_ttys(struct lxc_conf *conf)
tty_new = malloc(sizeof(struct lxc_terminal_info) * ttys->max);
if (!tty_new)
return -ENOMEM;
ttys->tty = tty_new;
for (size_t i = 0; i < ttys->max; i++) {
struct lxc_terminal_info *tty = &ttys->tty[i];
for (size_t i = 0; i < conf->ttys.max; i++) {
struct lxc_terminal_info *tty = &tty_new[i];
tty->ptx = -EBADF;
tty->pty = -EBADF;
ret = openpty(&tty->ptx, &tty->pty, NULL, NULL, NULL);
if (ret < 0) {
ttys->max = i;
conf->ttys.max = i;
return log_error_errno(-ENOTTY, ENOTTY, "Failed to create tty %zu", i);
}
ret = ttyname_r(tty->pty, tty->name, sizeof(tty->name));
if (ret < 0) {
ttys->max = i;
conf->ttys.max = i;
return log_error_errno(-ENOTTY, ENOTTY, "Failed to retrieve name of tty %zu pty", i);
}
DEBUG("Created tty \"%s\" with ptx fd %d and pty fd %d",
tty->name, tty->ptx, tty->pty);
DEBUG("Created tty with ptx fd %d and pty fd %d", tty->ptx, tty->pty);
/* Prevent leaking the file descriptors to the container */
ret = fd_cloexec(tty->ptx, true);
......@@ -963,7 +959,7 @@ static int lxc_allocate_ttys(struct lxc_conf *conf)
}
INFO("Finished creating %zu tty devices", ttys->max);
move_ptr(ttys);
conf->ttys.tty = move_ptr(tty_new);
return 0;
}
......@@ -1155,7 +1151,7 @@ enum {
LXC_DEVNODE_OPEN,
};
static int lxc_fill_autodev(const struct lxc_rootfs *rootfs)
static int lxc_fill_autodev(struct lxc_rootfs *rootfs)
{
int i, ret;
mode_t cmask;
......@@ -1168,7 +1164,6 @@ static int lxc_fill_autodev(const struct lxc_rootfs *rootfs)
cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
for (i = 0; i < sizeof(lxc_devices) / sizeof(lxc_devices[0]); i++) {
char device_path[PATH_MAX];
const struct lxc_device_node *device = &lxc_devices[i];
if (use_mknod >= LXC_DEVNODE_MKNOD) {
......@@ -1216,12 +1211,12 @@ static int lxc_fill_autodev(const struct lxc_rootfs *rootfs)
}
/* Fallback to bind-mounting the device from the host. */
ret = snprintf(device_path, sizeof(device_path), "dev/%s", device->name);
if (ret < 0 || (size_t)ret >= sizeof(device_path))
ret = snprintf(rootfs->buf, sizeof(rootfs->buf), "dev/%s", device->name);
if (ret < 0 || (size_t)ret >= sizeof(rootfs->buf))
return ret_errno(EIO);
if (new_mount_api()) {
ret = fd_bind_mount(rootfs->dfd_host, device_path,
ret = fd_bind_mount(rootfs->dfd_host, rootfs->buf,
PROTECT_OPATH_FILE,
PROTECT_LOOKUP_BENEATH_XDEV,
rootfs->dfd_dev, device->name,
......@@ -1230,22 +1225,22 @@ static int lxc_fill_autodev(const struct lxc_rootfs *rootfs)
} else {
char path[PATH_MAX];
ret = snprintf(device_path, sizeof(device_path), "/dev/%s", device->name);
if (ret < 0 || (size_t)ret >= sizeof(device_path))
ret = snprintf(rootfs->buf, sizeof(rootfs->buf), "/dev/%s", device->name);
if (ret < 0 || (size_t)ret >= sizeof(rootfs->buf))
return ret_errno(EIO);
ret = snprintf(path, sizeof(path), "%s/dev/%s", get_rootfs_mnt(rootfs), device->name);
if (ret < 0 || ret >= sizeof(path))
return log_error(-1, "Failed to create device path for %s", device->name);
ret = safe_mount(device_path, path, 0, MS_BIND, NULL, get_rootfs_mnt(rootfs));
ret = safe_mount(rootfs->buf, path, 0, MS_BIND, NULL, get_rootfs_mnt(rootfs));
if (ret < 0)
return log_error_errno(-1, errno, "Failed to bind mount host device node \"%s\" to \"%s\"", device_path, path);
return log_error_errno(-1, errno, "Failed to bind mount host device node \"%s\" to \"%s\"", rootfs->buf, path);
DEBUG("Bind mounted host device node \"%s\" to \"%s\"", device_path, path);
DEBUG("Bind mounted host device node \"%s\" to \"%s\"", rootfs->buf, path);
continue;
}
DEBUG("Bind mounted host device %d(%s) to %d(%s)", rootfs->dfd_host, device_path, rootfs->dfd_dev, device->name);
DEBUG("Bind mounted host device %d(%s) to %d(%s)", rootfs->dfd_host, rootfs->buf, rootfs->dfd_dev, device->name);
}
(void)umask(cmask);
......@@ -1647,12 +1642,11 @@ static inline bool wants_console(const struct lxc_terminal *terminal)
return !terminal->path || strcmp(terminal->path, "none");
}
static int lxc_setup_dev_console(const struct lxc_rootfs *rootfs,
static int lxc_setup_dev_console(struct lxc_rootfs *rootfs,
const struct lxc_terminal *console,
int pty_mnt_fd)
{
int ret;
char path[PATH_MAX];
char *rootfs_path = rootfs->path ? rootfs->mount : "";
if (!wants_console(console))
......@@ -1663,15 +1657,15 @@ static int lxc_setup_dev_console(const struct lxc_rootfs *rootfs,
* /dev/console bind-mounts.
*/
if (exists_file_at(rootfs->dfd_dev, "console")) {
ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs_path);
if (ret < 0 || (size_t)ret >= sizeof(path))
ret = snprintf(rootfs->buf, sizeof(rootfs->buf), "%s/dev/console", rootfs_path);
if (ret < 0 || (size_t)ret >= sizeof(rootfs->buf))
return -1;
ret = lxc_unstack_mountpoint(path, false);
ret = lxc_unstack_mountpoint(rootfs->buf, false);
if (ret < 0)
return log_error_errno(-ret, errno, "Failed to unmount \"%s\"", path);
return log_error_errno(-ret, errno, "Failed to unmount \"%s\"", rootfs->buf);
else
DEBUG("Cleared all (%d) mounts from \"%s\"", ret, path);
DEBUG("Cleared all (%d) mounts from \"%s\"", ret, rootfs->buf);
}
/*
......@@ -1702,17 +1696,17 @@ static int lxc_setup_dev_console(const struct lxc_rootfs *rootfs,
ret = safe_mount_beneath_at(rootfs->dfd_dev, console->name, "console", NULL, MS_BIND, NULL);
if (ret < 0) {
if (errno == ENOSYS) {
ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs_path);
if (ret < 0 || (size_t)ret >= sizeof(path))
ret = snprintf(rootfs->buf, sizeof(rootfs->buf), "%s/dev/console", rootfs_path);
if (ret < 0 || (size_t)ret >= sizeof(rootfs->buf))
return -1;
ret = safe_mount(console->name, path, "none", MS_BIND, NULL, rootfs_path);
ret = safe_mount(console->name, rootfs->buf, "none", MS_BIND, NULL, rootfs_path);
if (ret < 0)
return log_error_errno(-1, errno, "Failed to mount %d(%s) on \"%s\"", pty_mnt_fd, console->name, path);
return log_error_errno(-1, errno, "Failed to mount %d(%s) on \"%s\"", pty_mnt_fd, console->name, rootfs->buf);
}
}
DEBUG("Mounted pty device %d(%s) onto \"%s\"", pty_mnt_fd, console->name, path);
DEBUG("Mounted pty device %d(%s) onto \"%s\"", pty_mnt_fd, console->name, rootfs->buf);
return 0;
}
......@@ -1795,7 +1789,7 @@ finish:
return 0;
}
static int lxc_setup_console(const struct lxc_rootfs *rootfs,
static int lxc_setup_console(struct lxc_rootfs *rootfs,
const struct lxc_terminal *console, char *ttydir,
int pty_mnt_fd)
{
......@@ -2158,33 +2152,32 @@ static inline int mount_entry_on_generic(struct mntent *mntent,
return ret;
}
static inline int mount_entry_on_systemfs(struct mntent *mntent)
static inline int mount_entry_on_systemfs(struct lxc_rootfs *rootfs,
struct mntent *mntent)
{
int ret;
char path[PATH_MAX];
/* For containers created without a rootfs all mounts are treated as
* absolute paths starting at / on the host.
*/
if (mntent->mnt_dir[0] != '/')
ret = snprintf(path, sizeof(path), "/%s", mntent->mnt_dir);
ret = snprintf(rootfs->buf, sizeof(rootfs->buf), "/%s", mntent->mnt_dir);
else
ret = snprintf(path, sizeof(path), "%s", mntent->mnt_dir);
if (ret < 0 || ret >= sizeof(path))
ret = snprintf(rootfs->buf, sizeof(rootfs->buf), "%s", mntent->mnt_dir);
if (ret < 0 || ret >= sizeof(rootfs->buf))
return -1;
return mount_entry_on_generic(mntent, path, NULL, NULL, NULL);
return mount_entry_on_generic(mntent, rootfs->buf, NULL, NULL, NULL);
}
static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
const struct lxc_rootfs *rootfs,
struct lxc_rootfs *rootfs,
const char *lxc_name,
const char *lxc_path)
{
int offset;
char *aux;
const char *lxcpath;
char path[PATH_MAX];
int ret = 0;
lxcpath = lxc_global_config_value("lxc.lxcpath");
......@@ -2194,13 +2187,13 @@ static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
/* If rootfs->path is a blockdev path, allow container fstab to use
* <lxcpath>/<name>/rootfs" as the target prefix.
*/
ret = snprintf(path, PATH_MAX, "%s/%s/rootfs", lxcpath, lxc_name);
if (ret < 0 || ret >= PATH_MAX)
ret = snprintf(rootfs->buf, sizeof(rootfs->buf), "%s/%s/rootfs", lxcpath, lxc_name);
if (ret < 0 || ret >= sizeof(rootfs->buf))
goto skipvarlib;
aux = strstr(mntent->mnt_dir, path);
aux = strstr(mntent->mnt_dir, rootfs->buf);
if (aux) {
offset = strlen(path);
offset = strlen(rootfs->buf);
goto skipabs;
}
......@@ -2211,30 +2204,29 @@ skipvarlib:
offset = strlen(rootfs->path);
skipabs:
ret = snprintf(path, PATH_MAX, "%s/%s", rootfs->mount, aux + offset);
if (ret < 0 || ret >= PATH_MAX)
ret = snprintf(rootfs->buf, sizeof(rootfs->buf), "%s/%s", rootfs->mount, aux + offset);
if (ret < 0 || ret >= sizeof(rootfs->buf))
return -1;
return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
return mount_entry_on_generic(mntent, rootfs->buf, rootfs, lxc_name, lxc_path);
}
static int mount_entry_on_relative_rootfs(struct mntent *mntent,
const struct lxc_rootfs *rootfs,
struct lxc_rootfs *rootfs,
const char *lxc_name,
const char *lxc_path)
{
int ret;
char path[PATH_MAX];
/* relative to root mount point */
ret = snprintf(path, sizeof(path), "%s/%s", rootfs->mount, mntent->mnt_dir);
if (ret < 0 || (size_t)ret >= sizeof(path))
ret = snprintf(rootfs->buf, sizeof(rootfs->buf), "%s/%s", rootfs->mount, mntent->mnt_dir);
if (ret < 0 || (size_t)ret >= sizeof(rootfs->buf))
return -1;
return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
return mount_entry_on_generic(mntent, rootfs->buf, rootfs, lxc_name, lxc_path);
}
static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
static int mount_file_entries(struct lxc_rootfs *rootfs, FILE *file,
const char *lxc_name, const char *lxc_path)
{
char buf[PATH_MAX];
......@@ -2244,7 +2236,7 @@ static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
int ret;
if (!rootfs->path)
ret = mount_entry_on_systemfs(&mntent);
ret = mount_entry_on_systemfs(rootfs, &mntent);
else if (mntent.mnt_dir[0] != '/')
ret = mount_entry_on_relative_rootfs(&mntent, rootfs,
lxc_name, lxc_path);
......@@ -2269,9 +2261,8 @@ static inline void __auto_endmntent__(FILE **f)
#define __do_endmntent __attribute__((__cleanup__(__auto_endmntent__)))
static int setup_mount(const struct lxc_conf *conf,
const struct lxc_rootfs *rootfs, const char *fstab,
const char *lxc_name, const char *lxc_path)
static int setup_mount_fstab(struct lxc_rootfs *rootfs, const char *fstab,
const char *lxc_name, const char *lxc_path)
{
__do_endmntent FILE *f = NULL;
int ret;
......@@ -2360,9 +2351,8 @@ FILE *make_anonymous_mount_file(struct lxc_list *mount,
}
static int setup_mount_entries(const struct lxc_conf *conf,
const struct lxc_rootfs *rootfs,
struct lxc_list *mount, const char *lxc_name,
const char *lxc_path)
struct lxc_rootfs *rootfs, struct lxc_list *mount,
const char *lxc_name, const char *lxc_path)
{
__do_fclose FILE *f = NULL;
......@@ -2638,6 +2628,7 @@ struct lxc_conf *lxc_conf_init(void)
new->rootfs.dfd_mnt = -EBADF;
new->rootfs.dfd_dev = -EBADF;
new->rootfs.dfd_host = -EBADF;
new->rootfs.fd_path_pin = -EBADF;
new->logfd = -1;
lxc_list_init(&new->cgroup);
lxc_list_init(&new->cgroup2);
......@@ -3431,7 +3422,7 @@ int lxc_setup(struct lxc_handler *handler)
if (ret < 0)
return log_error(-1, "Failed to setup first automatic mounts");
ret = setup_mount(lxc_conf, &lxc_conf->rootfs, lxc_conf->fstab, name, lxcpath);
ret = setup_mount_fstab(&lxc_conf->rootfs, lxc_conf->fstab, name, lxcpath);
if (ret < 0)
return log_error(-1, "Failed to setup mounts");
......@@ -3543,9 +3534,7 @@ int lxc_setup(struct lxc_handler *handler)
return log_error(-1, "Failed to drop capabilities");
}
close_prot_errno_disarm(lxc_conf->rootfs.dfd_mnt)
close_prot_errno_disarm(lxc_conf->rootfs.dfd_dev)
close_prot_errno_disarm(lxc_conf->rootfs.dfd_host)
put_lxc_rootfs(&handler->conf->rootfs, true);
NOTICE("The container \"%s\" is set up", name);
return 0;
......@@ -3909,9 +3898,7 @@ void lxc_conf_free(struct lxc_conf *conf)
free(conf->rootfs.options);
free(conf->rootfs.path);
free(conf->rootfs.data);
close_prot_errno_disarm(conf->rootfs.dfd_mnt);
close_prot_errno_disarm(conf->rootfs.dfd_dev);
close_prot_errno_disarm(conf->rootfs.dfd_host);
put_lxc_rootfs(&conf->rootfs, true);
free(conf->logfile);
if (conf->logfd != -1)
close(conf->logfd);
......
......@@ -196,10 +196,15 @@ struct lxc_tty_info {
*/
struct lxc_rootfs {
int dfd_host;
int dfd_mnt;
int dfd_dev;
char *path;
int fd_path_pin;
int dfd_mnt;
char *mount;
int dfd_dev;
char buf[PATH_MAX];
char *bdev_type;
char *options;
......@@ -481,7 +486,7 @@ extern struct lxc_conf *current_config;
__hidden extern int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, char *argv[]);
__hidden extern struct lxc_conf *lxc_conf_init(void);
__hidden extern void lxc_conf_free(struct lxc_conf *conf);
__hidden extern int pin_rootfs(const char *rootfs);
__hidden extern int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns);
__hidden extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid);
__hidden extern int lxc_create_tty(const char *name, struct lxc_conf *conf);
__hidden extern void lxc_delete_tty(struct lxc_tty_info *ttys);
......@@ -516,8 +521,6 @@ __hidden extern void turn_into_dependent_mounts(void);
__hidden extern void suggest_default_idmap(void);
__hidden extern FILE *make_anonymous_mount_file(struct lxc_list *mount, bool include_nesting_helpers);
__hidden extern struct lxc_list *sort_cgroup_settings(struct lxc_list *cgroup_settings);
__hidden extern unsigned long add_required_remount_flags(const char *s, const char *d,
unsigned long flags);
__hidden extern int run_script(const char *name, const char *section, const char *script, ...);
__hidden extern int run_script_argv(const char *name, unsigned int hook_version, const char *section,
const char *script, const char *hookname, char **argsin);
......@@ -559,4 +562,15 @@ static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs)
return !is_empty_string(rootfs->path) ? rootfs->mount : s;
}
static inline void put_lxc_rootfs(struct lxc_rootfs *rootfs, bool unpin)
{
if (rootfs) {
close_prot_errno_disarm(rootfs->dfd_host);
close_prot_errno_disarm(rootfs->dfd_mnt);
close_prot_errno_disarm(rootfs->dfd_dev);
if (unpin)
close_prot_errno_disarm(rootfs->fd_path_pin);
}
}
#endif /* __LXC_CONF_H */
......@@ -11,6 +11,7 @@
#include <sys/stat.h>
#include <sys/types.h>
#include "file_utils.h"
#include "log.h"
#include "macro.h"
#include "memory_utils.h"
......@@ -18,6 +19,10 @@
#include "syscall_numbers.h"
#include "syscall_wrappers.h"
#ifdef HAVE_STATVFS
#include <sys/statvfs.h>
#endif
lxc_log_define(mount_utils, lxc);
int mnt_attributes_new(unsigned int old_flags, unsigned int *new_flags)
......@@ -239,7 +244,7 @@ int fd_bind_mount(int dfd_from, const char *path_from,
{
__do_close int __fd_from = -EBADF, __fd_to = -EBADF;
__do_close int fd_tree_from = -EBADF;
unsigned int open_tree_flags = AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLONE;
unsigned int open_tree_flags = AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC;
int fd_from, fd_to, ret;
if (!is_empty_string(path_from)) {
......@@ -284,3 +289,152 @@ int fd_bind_mount(int dfd_from, const char *path_from,
TRACE("Attach detached mount %d to filesystem at %d", fd_tree_from, fd_to);
return 0;
}
int calc_remount_flags_new(int dfd_from, const char *path_from,
__u64 o_flags_from, __u64 resolve_flags_from,
bool remount, unsigned long cur_flags,
unsigned int *new_flags)
{
#ifdef HAVE_STATVFS
__do_close int fd_from = -EBADF;
unsigned int new_required_flags = 0;
int ret;
struct statvfs sb;
fd_from = open_at(dfd_from, path_from, o_flags_from, resolve_flags_from, 0);
if (fd_from < 0)
return log_error_errno(-errno, errno, "Failed to open %d(%s)", dfd_from, maybe_empty(path_from));
ret = fstatvfs(dfd_from, &sb);
if (ret < 0)
return log_error_errno(-errno, errno, "Failed to retrieve mount information from %d(%s)", fd_from, maybe_empty(path_from));
if (remount) {
if (sb.f_flag & MS_NOSUID)
new_required_flags |= MOUNT_ATTR_NOSUID;
if (sb.f_flag & MS_NODEV)
new_required_flags |= MOUNT_ATTR_NODEV;
if (sb.f_flag & MS_RDONLY)
new_required_flags |= MOUNT_ATTR_RDONLY;
if (sb.f_flag & MS_NOEXEC)
new_required_flags |= MOUNT_ATTR_NOEXEC;
}
if (sb.f_flag & MS_NOATIME)
new_required_flags |= MOUNT_ATTR_NOATIME;
if (sb.f_flag & MS_NODIRATIME)
new_required_flags |= MOUNT_ATTR_NODIRATIME;
if (sb.f_flag & MS_RELATIME)
new_required_flags |= MOUNT_ATTR_RELATIME;
if (sb.f_flag & MS_STRICTATIME)
new_required_flags |= MOUNT_ATTR_STRICTATIME;
*new_flags = (cur_flags | new_required_flags);
#endif
return 0;
}
int calc_remount_flags_old(int dfd_from, const char *path_from,
__u64 o_flags_from, __u64 resolve_flags_from,
bool remount, unsigned long cur_flags,
unsigned int *old_flags)
{
#ifdef HAVE_STATVFS
__do_close int fd_from = -EBADF;
unsigned int old_required_flags = 0;
int ret;
struct statvfs sb;
fd_from = open_at(dfd_from, path_from, o_flags_from, resolve_flags_from, 0);
if (fd_from < 0)
return log_error_errno(-errno, errno, "Failed to open %d(%s)", dfd_from, maybe_empty(path_from));
ret = fstatvfs(dfd_from, &sb);
if (ret < 0)
return log_error_errno(-errno, errno, "Failed to retrieve mount information from %d(%s)", fd_from, maybe_empty(path_from));
if (remount) {
if (sb.f_flag & MS_NOSUID)
old_required_flags |= MS_NOSUID;
if (sb.f_flag & MS_NODEV)
old_required_flags |= MS_NODEV;
if (sb.f_flag & MS_RDONLY)
old_required_flags |= MS_RDONLY;
if (sb.f_flag & MS_NOEXEC)
old_required_flags |= MS_NOEXEC;
}
if (sb.f_flag & MS_NOATIME)
old_required_flags |= MS_NOATIME;
if (sb.f_flag & MS_NODIRATIME)
old_required_flags |= MS_NODIRATIME;
if (sb.f_flag & MS_RELATIME)
old_required_flags |= MS_RELATIME;
if (sb.f_flag & MS_STRICTATIME)
old_required_flags |= MS_STRICTATIME;
*old_flags = (cur_flags | old_required_flags);
#endif
return 0;
}
/* If we are asking to remount something, make sure that any NOEXEC etc are
* honored.
*/
unsigned long add_required_remount_flags(const char *s, const char *d,
unsigned long flags)
{
#ifdef HAVE_STATVFS
int ret;
struct statvfs sb;
unsigned long required_flags = 0;
if (!s)
s = d;
if (!s)
return flags;
ret = statvfs(s, &sb);
if (ret < 0)
return flags;
if (flags & MS_REMOUNT) {
if (sb.f_flag & MS_NOSUID)
required_flags |= MS_NOSUID;
if (sb.f_flag & MS_NODEV)
required_flags |= MS_NODEV;
if (sb.f_flag & MS_RDONLY)
required_flags |= MS_RDONLY;
if (sb.f_flag & MS_NOEXEC)
required_flags |= MS_NOEXEC;
}
if (sb.f_flag & MS_NOATIME)
required_flags |= MS_NOATIME;
if (sb.f_flag & MS_NODIRATIME)
required_flags |= MS_NODIRATIME;
if (sb.f_flag & MS_LAZYTIME)
required_flags |= MS_LAZYTIME;
if (sb.f_flag & MS_RELATIME)
required_flags |= MS_RELATIME;
if (sb.f_flag & MS_STRICTATIME)
required_flags |= MS_STRICTATIME;
return flags | required_flags;
#else
return flags;
#endif
}
......@@ -207,4 +207,20 @@ static inline bool new_mount_api(void)
return supported == 1;
}
__hidden extern int calc_remount_flags_new(int dfd_from, const char *path_from,
__u64 o_flags_from,
__u64 resolve_flags_from,
bool remount, unsigned long cur_flags,
unsigned int *new_flags);
__hidden extern int calc_remount_flags_old(int dfd_from, const char *path_from,
__u64 o_flags_from,
__u64 resolve_flags_from,
bool remount, unsigned long cur_flags,
unsigned int *old_flags);
__hidden extern unsigned long add_required_remount_flags(const char *s,
const char *d,
unsigned long flags);
#endif /* __LXC_MOUNT_UTILS_H */
......@@ -618,7 +618,6 @@ out_sigfd:
void lxc_put_handler(struct lxc_handler *handler)
{
close_prot_errno_disarm(handler->pinfd);
close_prot_errno_disarm(handler->pidfd);
close_prot_errno_disarm(handler->sigfd);
lxc_put_nsfds(handler);
......@@ -660,7 +659,6 @@ struct lxc_handler *lxc_init_handler(struct lxc_handler *old,
handler->data_sock[0] = -EBADF;
handler->data_sock[1] = -EBADF;
handler->monitor_status_fd = -EBADF;
handler->pinfd = -EBADF;
handler->pidfd = -EBADF;
handler->sigfd = -EBADF;
handler->state_socket_pair[0] = -EBADF;
......@@ -925,6 +923,8 @@ void lxc_end(struct lxc_handler *handler)
cgroup_ops->monitor_destroy(cgroup_ops, handler);
}
put_lxc_rootfs(&handler->conf->rootfs, true);
if (handler->conf->reboot == REBOOT_NONE) {
/* For all new state clients simply close the command socket.
* This will inform all state clients that the container is
......@@ -1066,9 +1066,6 @@ static int do_start(void *data)
goto out_warn_father;
}
/* Don't leak the pinfd to the container. */
close_prot_errno_disarm(handler->pinfd);
if (!lxc_sync_wait_parent(handler, START_SYNC_STARTUP))
goto out_warn_father;
......@@ -1666,10 +1663,10 @@ static int lxc_spawn(struct lxc_handler *handler)
* it readonly.
* If the container is unprivileged then skip rootfs pinning.
*/
if (!wants_to_map_ids) {
handler->pinfd = pin_rootfs(conf->rootfs.path);
if (handler->pinfd == -EBADF)
INFO("Failed to pin the rootfs for container \"%s\"", handler->name);
ret = lxc_rootfs_prepare(&conf->rootfs, wants_to_map_ids);
if (ret) {
ERROR("Failed to handle rootfs pinning for container \"%s\"", handler->name);
goto out_delete_net;
}
/* Create a process in a new set of namespaces. */
......@@ -2001,7 +1998,6 @@ out_abort:
out_sync_fini:
lxc_sync_fini(handler);
close_prot_errno_disarm(handler->pinfd);
return -1;
}
......@@ -2118,8 +2114,6 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops,
if (ret < 0)
ERROR("Failed to move physical network devices back to parent network namespace");
close_prot_errno_disarm(handler->pinfd);
lxc_monitor_send_exit_code(name, status, handler->lxcpath);
lxc_error_set_and_log(handler->pid, status);
if (error_num)
......
......@@ -43,9 +43,6 @@ struct lxc_handler {
__aligned_u64 clone_flags;
};
/* File descriptor to pin the rootfs for privileged containers. */
int pinfd;
/* Signal file descriptor. */
int sigfd;
......
......@@ -11,6 +11,7 @@
#include "log.h"
#include "macro.h"
#include "memory_utils.h"
#include "mount_utils.h"
#include "storage.h"
#include "utils.h"
......
......@@ -66,11 +66,12 @@ which newuidmap >/dev/null 2>&1 || { echo "'newuidmap' command is missing" >&2;
DONE=0
KNOWN_RELEASES="precise trusty xenial yakkety zesty"
UNPRIV_LOG=$(mktemp --dry-run)
cleanup() {
cd /
run_cmd lxc-stop -n c2 -k || true
run_cmd lxc-stop -n c1 -k || true
run_cmd lxc-stop -n c2 -k -l trace -o "${UNPRIV_LOG}" || true
run_cmd lxc-stop -n c1 -k -l trace -o "${UNPRIV_LOG}" || true
pkill -u $(id -u $TUSER) -9 || true
sed -i '/lxcunpriv/d' /run/lxc/nics /etc/lxc/lxc-usernet
......@@ -81,6 +82,8 @@ cleanup() {
deluser $TUSER
if [ $DONE -eq 0 ]; then
cat "${UNPRIV_LOG}"
rm -f "${UNPRIV_LOG}" || true
echo "FAIL"
exit 1
fi
......@@ -173,45 +176,45 @@ run_cmd mkdir -p $HDIR/.cache/lxc
cp -R /var/cache/lxc/download $HDIR/.cache/lxc && \
chown -R $TUSER: $HDIR/.cache/lxc
run_cmd lxc-create -t download -n c1 -- -d ubuntu -r $release -a $ARCH
run_cmd lxc-create -t download -n c1 -l trace -o "${UNPRIV_LOG}" -- -d ubuntu -r $release -a $ARCH
# Make sure we can start it - twice
for count in `seq 1 2`; do
run_cmd lxc-start -n c1 -d
run_cmd lxc-start -n c1 -d -l trace -o "${UNPRIV_LOG}"
p1=$(run_cmd lxc-info -n c1 -p -H)
p1=$(run_cmd lxc-info -n c1 -p -H -l trace -o "${UNPRIV_LOG}")
[ "$p1" != "-1" ] || { echo "Failed to start container c1 (run $count)"; false; }
run_cmd lxc-info -n c1
run_cmd lxc-attach -n c1 -- /bin/true
run_cmd lxc-info -n c1 -l trace -o "${UNPRIV_LOG}"
run_cmd lxc-attach -n c1 -l trace -o "${UNPRIV_LOG}" -- /bin/true
run_cmd lxc-stop -n c1 -k
run_cmd lxc-stop -n c1 -k -l trace -o "${UNPRIV_LOG}"
done
run_cmd lxc-copy -s -n c1 -N c2
run_cmd lxc-start -n c2 -d
p1=$(run_cmd lxc-info -n c2 -p -H)
run_cmd lxc-copy -s -n c1 -N c2 -l trace -o "${UNPRIV_LOG}"
run_cmd lxc-start -n c2 -d -l trace -o "${UNPRIV_LOG}"
p1=$(run_cmd lxc-info -n c2 -p -H -l trace -o "${UNPRIV_LOG}")
[ "$p1" != "-1" ] || { echo "Failed to start container c2"; false; }
run_cmd lxc-stop -n c2 -k
run_cmd lxc-stop -n c2 -k -l trace -o "${UNPRIV_LOG}"
if which cgm >/dev/null 2>&1; then
echo "Testing containers under different cgroups per subsystem"
run_cmd cgm create freezer x1/x2
cgm movepid freezer x1 $$
run_cmd lxc-start -n c1 -d
p1=$(run_cmd lxc-info -n c1 -p -H)
run_cmd lxc-start -n c1 -d -l trace -o "${UNPRIV_LOG}"
p1=$(run_cmd lxc-info -n c1 -p -H -l trace -o "${UNPRIV_LOG}")
[ "$p1" != "-1" ] || { echo "Failed to start container c1"; false; }
run_cmd lxc-info -n c1
run_cmd lxc-attach -n c1 -- /bin/true
run_cmd lxc-cgroup -n c1 freezer.state
run_cmd lxc-info -n c1 -l trace -o "${UNPRIV_LOG}"
run_cmd lxc-attach -n c1 -l trace -o "${UNPRIV_LOG}" -- /bin/true
run_cmd lxc-cgroup -n c1 freezer.state -l trace -o "${UNPRIV_LOG}"
echo "Testing lxc-attach and lxc-cgroup from different cgroup"
cgm movepid freezer x2 $$
run_cmd lxc-attach -n c1 -- /bin/true
run_cmd lxc-cgroup -n c1 freezer.state
run_cmd lxc-cgroup -n c1 memory.limit_in_bytes
run_cmd lxc-attach -n c1 -l trace -o "${UNPRIV_LOG}" -- /bin/true
run_cmd lxc-cgroup -n c1 -l trace -o "${UNPRIV_LOG}" freezer.state
run_cmd lxc-cgroup -n c1 -l trace -o "${UNPRIV_LOG}" memory.limit_in_bytes
fi
DONE=1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment