conf: rework rootfs pinning

parent 4806d3b9
......@@ -477,56 +477,74 @@ int run_script(const char *name, const char *section, const char *script, ...)
return run_buffer(buffer);
}
/* pin_rootfs
/* lxc_rootfs_prepare
* if rootfs is a directory, then open ${rootfs}/.lxc-keep for writing for
* the duration of the container run, to prevent the container from marking
* the underlying fs readonly on shutdown. unlink the file immediately so
* no name pollution is happens.
* don't unlink on NFS to avoid random named stale handles.
* return -1 on error.
* return -2 if nothing needed to be pinned.
* return an open fd (>=0) if we pinned it.
*/
int pin_rootfs(const char *rootfs)
int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns)
{
__do_free char *absrootfs = NULL;
int fd, ret;
char absrootfspin[PATH_MAX];
struct stat s;
struct statfs sfs;
__do_close int dfd_path = -EBADF, fd_pin = -EBADF;
int ret;
struct stat st;
struct statfs stfs;
if (rootfs == NULL || strlen(rootfs) == 0)
return -2;
if (rootfs->path) {
if (rootfs->bdev_type &&
(!strcmp(rootfs->bdev_type, "overlay") ||
!strcmp(rootfs->bdev_type, "overlayfs")))
return log_trace_errno(0, EINVAL, "Not pinning on stacking filesystem");
absrootfs = realpath(rootfs, NULL);
if (!absrootfs)
return -2;
dfd_path = open_at(-EBADF, rootfs->path, PROTECT_OPATH_FILE, 0, 0);
} else {
dfd_path = open_at(-EBADF, "/", PROTECT_OPATH_FILE, PROTECT_LOOKUP_ABSOLUTE, 0);
}
if (dfd_path < 0)
return log_error_errno(-errno, errno, "Failed to open \"%s\"", rootfs->path);
if (!rootfs->path)
return log_trace(0, "Not pinning because container does not have a rootfs");
ret = stat(absrootfs, &s);
if (userns)
return log_trace(0, "Not pinning because container runs in user namespace");
ret = fstat(dfd_path, &st);
if (ret < 0)
return -1;
return log_trace_errno(-errno, errno, "Failed to retrieve file status");
if (!S_ISDIR(s.st_mode))
return -2;
if (!S_ISDIR(st.st_mode))
return log_trace_errno(0, ENOTDIR, "Not pinning because file descriptor is not a directory");
ret = snprintf(absrootfspin, sizeof(absrootfspin), "%s/.lxc-keep", absrootfs);
if (ret < 0 || (size_t)ret >= sizeof(absrootfspin))
return -1;
fd_pin = open_at(dfd_path, ".lxc_keep",
PROTECT_OPEN | O_CREAT,
PROTECT_LOOKUP_BENEATH,
S_IWUSR | S_IRUSR);
if (fd_pin < 0)
return log_error_errno(-errno, errno, "Failed to pin rootfs");
fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR | S_IRUSR | O_CLOEXEC);
if (fd < 0)
return fd;
TRACE("Pinned rootfs %d(.lxc_keep)", fd_pin);
ret = fstatfs (fd, &sfs);
if (ret < 0)
return fd;
ret = fstatfs(fd_pin, &stfs);
if (ret < 0) {
SYSWARN("Failed to retrieve filesystem status");
goto out;
}
if (sfs.f_type == NFS_SUPER_MAGIC)
return log_debug(fd, "Rootfs on NFS, not unlinking pin file \"%s\"", absrootfspin);
if (stfs.f_type == NFS_SUPER_MAGIC) {
DEBUG("Not unlinking pinned file on NFS");
goto out;
}
(void)unlink(absrootfspin);
if (unlinkat(dfd_path, ".lxc_keep", 0))
SYSTRACE("Failed to unlink rootfs pinning file %d(.lxc_keep)", dfd_path);
else
TRACE("Unlinked pinned file %d(.lxc_keep)", dfd_path);
return fd;
out:
rootfs->fd_path_pin = move_fd(fd_pin);
return 0;
}
static int add_shmount_to_list(struct lxc_conf *conf)
......@@ -2585,6 +2603,7 @@ struct lxc_conf *lxc_conf_init(void)
new->rootfs.dfd_mnt = -EBADF;
new->rootfs.dfd_dev = -EBADF;
new->rootfs.dfd_host = -EBADF;
new->rootfs.fd_path_pin = -EBADF;
new->logfd = -1;
lxc_list_init(&new->cgroup);
lxc_list_init(&new->cgroup2);
......@@ -3490,9 +3509,7 @@ int lxc_setup(struct lxc_handler *handler)
return log_error(-1, "Failed to drop capabilities");
}
close_prot_errno_disarm(lxc_conf->rootfs.dfd_mnt)
close_prot_errno_disarm(lxc_conf->rootfs.dfd_dev)
close_prot_errno_disarm(lxc_conf->rootfs.dfd_host)
put_lxc_rootfs(&handler->conf->rootfs, true);
NOTICE("The container \"%s\" is set up", name);
return 0;
......@@ -3856,9 +3873,7 @@ void lxc_conf_free(struct lxc_conf *conf)
free(conf->rootfs.options);
free(conf->rootfs.path);
free(conf->rootfs.data);
close_prot_errno_disarm(conf->rootfs.dfd_mnt);
close_prot_errno_disarm(conf->rootfs.dfd_dev);
close_prot_errno_disarm(conf->rootfs.dfd_host);
put_lxc_rootfs(&conf->rootfs, true);
free(conf->logfile);
if (conf->logfd != -1)
close(conf->logfd);
......
......@@ -196,10 +196,15 @@ struct lxc_tty_info {
*/
struct lxc_rootfs {
int dfd_host;
int dfd_mnt;
int dfd_dev;
char *path;
int fd_path_pin;
int dfd_mnt;
char *mount;
int dfd_dev;
char buf[PATH_MAX];
char *bdev_type;
char *options;
......@@ -481,7 +486,7 @@ extern struct lxc_conf *current_config;
__hidden extern int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, char *argv[]);
__hidden extern struct lxc_conf *lxc_conf_init(void);
__hidden extern void lxc_conf_free(struct lxc_conf *conf);
__hidden extern int pin_rootfs(const char *rootfs);
__hidden extern int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns);
__hidden extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid);
__hidden extern int lxc_create_tty(const char *name, struct lxc_conf *conf);
__hidden extern void lxc_delete_tty(struct lxc_tty_info *ttys);
......@@ -557,4 +562,15 @@ static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs)
return !is_empty_string(rootfs->path) ? rootfs->mount : s;
}
static inline void put_lxc_rootfs(struct lxc_rootfs *rootfs, bool unpin)
{
if (rootfs) {
close_prot_errno_disarm(rootfs->dfd_host);
close_prot_errno_disarm(rootfs->dfd_mnt);
close_prot_errno_disarm(rootfs->dfd_dev);
if (unpin)
close_prot_errno_disarm(rootfs->fd_path_pin);
}
}
#endif /* __LXC_CONF_H */
......@@ -618,7 +618,6 @@ out_sigfd:
void lxc_put_handler(struct lxc_handler *handler)
{
close_prot_errno_disarm(handler->pinfd);
close_prot_errno_disarm(handler->pidfd);
close_prot_errno_disarm(handler->sigfd);
lxc_put_nsfds(handler);
......@@ -660,7 +659,6 @@ struct lxc_handler *lxc_init_handler(struct lxc_handler *old,
handler->data_sock[0] = -EBADF;
handler->data_sock[1] = -EBADF;
handler->monitor_status_fd = -EBADF;
handler->pinfd = -EBADF;
handler->pidfd = -EBADF;
handler->sigfd = -EBADF;
handler->state_socket_pair[0] = -EBADF;
......@@ -925,6 +923,8 @@ void lxc_end(struct lxc_handler *handler)
cgroup_ops->monitor_destroy(cgroup_ops, handler);
}
put_lxc_rootfs(&handler->conf->rootfs, true);
if (handler->conf->reboot == REBOOT_NONE) {
/* For all new state clients simply close the command socket.
* This will inform all state clients that the container is
......@@ -1066,9 +1066,6 @@ static int do_start(void *data)
goto out_warn_father;
}
/* Don't leak the pinfd to the container. */
close_prot_errno_disarm(handler->pinfd);
if (!lxc_sync_wait_parent(handler, START_SYNC_STARTUP))
goto out_warn_father;
......@@ -1666,10 +1663,10 @@ static int lxc_spawn(struct lxc_handler *handler)
* it readonly.
* If the container is unprivileged then skip rootfs pinning.
*/
if (!wants_to_map_ids) {
handler->pinfd = pin_rootfs(conf->rootfs.path);
if (handler->pinfd == -EBADF)
INFO("Failed to pin the rootfs for container \"%s\"", handler->name);
ret = lxc_rootfs_prepare(&conf->rootfs, wants_to_map_ids);
if (ret) {
ERROR("Failed to handle rootfs pinning for container \"%s\"", handler->name);
goto out_delete_net;
}
/* Create a process in a new set of namespaces. */
......@@ -2001,7 +1998,6 @@ out_abort:
out_sync_fini:
lxc_sync_fini(handler);
close_prot_errno_disarm(handler->pinfd);
return -1;
}
......@@ -2118,8 +2114,6 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops,
if (ret < 0)
ERROR("Failed to move physical network devices back to parent network namespace");
close_prot_errno_disarm(handler->pinfd);
lxc_monitor_send_exit_code(name, status, handler->lxcpath);
lxc_error_set_and_log(handler->pid, status);
if (error_num)
......
......@@ -43,9 +43,6 @@ struct lxc_handler {
__aligned_u64 clone_flags;
};
/* File descriptor to pin the rootfs for privileged containers. */
int pinfd;
/* Signal file descriptor. */
int sigfd;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment