conf: rework rootfs pinning

parent 4806d3b9
...@@ -477,56 +477,74 @@ int run_script(const char *name, const char *section, const char *script, ...) ...@@ -477,56 +477,74 @@ int run_script(const char *name, const char *section, const char *script, ...)
return run_buffer(buffer); return run_buffer(buffer);
} }
/* pin_rootfs /* lxc_rootfs_prepare
* if rootfs is a directory, then open ${rootfs}/.lxc-keep for writing for * if rootfs is a directory, then open ${rootfs}/.lxc-keep for writing for
* the duration of the container run, to prevent the container from marking * the duration of the container run, to prevent the container from marking
* the underlying fs readonly on shutdown. unlink the file immediately so * the underlying fs readonly on shutdown. unlink the file immediately so
* no name pollution is happens. * no name pollution is happens.
* don't unlink on NFS to avoid random named stale handles. * don't unlink on NFS to avoid random named stale handles.
* return -1 on error.
* return -2 if nothing needed to be pinned.
* return an open fd (>=0) if we pinned it.
*/ */
int pin_rootfs(const char *rootfs) int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns)
{ {
__do_free char *absrootfs = NULL; __do_close int dfd_path = -EBADF, fd_pin = -EBADF;
int fd, ret; int ret;
char absrootfspin[PATH_MAX]; struct stat st;
struct stat s; struct statfs stfs;
struct statfs sfs;
if (rootfs == NULL || strlen(rootfs) == 0) if (rootfs->path) {
return -2; if (rootfs->bdev_type &&
(!strcmp(rootfs->bdev_type, "overlay") ||
!strcmp(rootfs->bdev_type, "overlayfs")))
return log_trace_errno(0, EINVAL, "Not pinning on stacking filesystem");
absrootfs = realpath(rootfs, NULL); dfd_path = open_at(-EBADF, rootfs->path, PROTECT_OPATH_FILE, 0, 0);
if (!absrootfs) } else {
return -2; dfd_path = open_at(-EBADF, "/", PROTECT_OPATH_FILE, PROTECT_LOOKUP_ABSOLUTE, 0);
}
if (dfd_path < 0)
return log_error_errno(-errno, errno, "Failed to open \"%s\"", rootfs->path);
if (!rootfs->path)
return log_trace(0, "Not pinning because container does not have a rootfs");
ret = stat(absrootfs, &s); if (userns)
return log_trace(0, "Not pinning because container runs in user namespace");
ret = fstat(dfd_path, &st);
if (ret < 0) if (ret < 0)
return -1; return log_trace_errno(-errno, errno, "Failed to retrieve file status");
if (!S_ISDIR(s.st_mode)) if (!S_ISDIR(st.st_mode))
return -2; return log_trace_errno(0, ENOTDIR, "Not pinning because file descriptor is not a directory");
ret = snprintf(absrootfspin, sizeof(absrootfspin), "%s/.lxc-keep", absrootfs); fd_pin = open_at(dfd_path, ".lxc_keep",
if (ret < 0 || (size_t)ret >= sizeof(absrootfspin)) PROTECT_OPEN | O_CREAT,
return -1; PROTECT_LOOKUP_BENEATH,
S_IWUSR | S_IRUSR);
if (fd_pin < 0)
return log_error_errno(-errno, errno, "Failed to pin rootfs");
fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR | S_IRUSR | O_CLOEXEC); TRACE("Pinned rootfs %d(.lxc_keep)", fd_pin);
if (fd < 0)
return fd;
ret = fstatfs (fd, &sfs); ret = fstatfs(fd_pin, &stfs);
if (ret < 0) if (ret < 0) {
return fd; SYSWARN("Failed to retrieve filesystem status");
goto out;
}
if (sfs.f_type == NFS_SUPER_MAGIC) if (stfs.f_type == NFS_SUPER_MAGIC) {
return log_debug(fd, "Rootfs on NFS, not unlinking pin file \"%s\"", absrootfspin); DEBUG("Not unlinking pinned file on NFS");
goto out;
}
(void)unlink(absrootfspin); if (unlinkat(dfd_path, ".lxc_keep", 0))
SYSTRACE("Failed to unlink rootfs pinning file %d(.lxc_keep)", dfd_path);
else
TRACE("Unlinked pinned file %d(.lxc_keep)", dfd_path);
return fd; out:
rootfs->fd_path_pin = move_fd(fd_pin);
return 0;
} }
static int add_shmount_to_list(struct lxc_conf *conf) static int add_shmount_to_list(struct lxc_conf *conf)
...@@ -2585,6 +2603,7 @@ struct lxc_conf *lxc_conf_init(void) ...@@ -2585,6 +2603,7 @@ struct lxc_conf *lxc_conf_init(void)
new->rootfs.dfd_mnt = -EBADF; new->rootfs.dfd_mnt = -EBADF;
new->rootfs.dfd_dev = -EBADF; new->rootfs.dfd_dev = -EBADF;
new->rootfs.dfd_host = -EBADF; new->rootfs.dfd_host = -EBADF;
new->rootfs.fd_path_pin = -EBADF;
new->logfd = -1; new->logfd = -1;
lxc_list_init(&new->cgroup); lxc_list_init(&new->cgroup);
lxc_list_init(&new->cgroup2); lxc_list_init(&new->cgroup2);
...@@ -3490,9 +3509,7 @@ int lxc_setup(struct lxc_handler *handler) ...@@ -3490,9 +3509,7 @@ int lxc_setup(struct lxc_handler *handler)
return log_error(-1, "Failed to drop capabilities"); return log_error(-1, "Failed to drop capabilities");
} }
close_prot_errno_disarm(lxc_conf->rootfs.dfd_mnt) put_lxc_rootfs(&handler->conf->rootfs, true);
close_prot_errno_disarm(lxc_conf->rootfs.dfd_dev)
close_prot_errno_disarm(lxc_conf->rootfs.dfd_host)
NOTICE("The container \"%s\" is set up", name); NOTICE("The container \"%s\" is set up", name);
return 0; return 0;
...@@ -3856,9 +3873,7 @@ void lxc_conf_free(struct lxc_conf *conf) ...@@ -3856,9 +3873,7 @@ void lxc_conf_free(struct lxc_conf *conf)
free(conf->rootfs.options); free(conf->rootfs.options);
free(conf->rootfs.path); free(conf->rootfs.path);
free(conf->rootfs.data); free(conf->rootfs.data);
close_prot_errno_disarm(conf->rootfs.dfd_mnt); put_lxc_rootfs(&conf->rootfs, true);
close_prot_errno_disarm(conf->rootfs.dfd_dev);
close_prot_errno_disarm(conf->rootfs.dfd_host);
free(conf->logfile); free(conf->logfile);
if (conf->logfd != -1) if (conf->logfd != -1)
close(conf->logfd); close(conf->logfd);
......
...@@ -196,10 +196,15 @@ struct lxc_tty_info { ...@@ -196,10 +196,15 @@ struct lxc_tty_info {
*/ */
struct lxc_rootfs { struct lxc_rootfs {
int dfd_host; int dfd_host;
int dfd_mnt;
int dfd_dev;
char *path; char *path;
int fd_path_pin;
int dfd_mnt;
char *mount; char *mount;
int dfd_dev;
char buf[PATH_MAX]; char buf[PATH_MAX];
char *bdev_type; char *bdev_type;
char *options; char *options;
...@@ -481,7 +486,7 @@ extern struct lxc_conf *current_config; ...@@ -481,7 +486,7 @@ extern struct lxc_conf *current_config;
__hidden extern int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, char *argv[]); __hidden extern int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, char *argv[]);
__hidden extern struct lxc_conf *lxc_conf_init(void); __hidden extern struct lxc_conf *lxc_conf_init(void);
__hidden extern void lxc_conf_free(struct lxc_conf *conf); __hidden extern void lxc_conf_free(struct lxc_conf *conf);
__hidden extern int pin_rootfs(const char *rootfs); __hidden extern int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns);
__hidden extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid); __hidden extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid);
__hidden extern int lxc_create_tty(const char *name, struct lxc_conf *conf); __hidden extern int lxc_create_tty(const char *name, struct lxc_conf *conf);
__hidden extern void lxc_delete_tty(struct lxc_tty_info *ttys); __hidden extern void lxc_delete_tty(struct lxc_tty_info *ttys);
...@@ -557,4 +562,15 @@ static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs) ...@@ -557,4 +562,15 @@ static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs)
return !is_empty_string(rootfs->path) ? rootfs->mount : s; return !is_empty_string(rootfs->path) ? rootfs->mount : s;
} }
static inline void put_lxc_rootfs(struct lxc_rootfs *rootfs, bool unpin)
{
if (rootfs) {
close_prot_errno_disarm(rootfs->dfd_host);
close_prot_errno_disarm(rootfs->dfd_mnt);
close_prot_errno_disarm(rootfs->dfd_dev);
if (unpin)
close_prot_errno_disarm(rootfs->fd_path_pin);
}
}
#endif /* __LXC_CONF_H */ #endif /* __LXC_CONF_H */
...@@ -618,7 +618,6 @@ out_sigfd: ...@@ -618,7 +618,6 @@ out_sigfd:
void lxc_put_handler(struct lxc_handler *handler) void lxc_put_handler(struct lxc_handler *handler)
{ {
close_prot_errno_disarm(handler->pinfd);
close_prot_errno_disarm(handler->pidfd); close_prot_errno_disarm(handler->pidfd);
close_prot_errno_disarm(handler->sigfd); close_prot_errno_disarm(handler->sigfd);
lxc_put_nsfds(handler); lxc_put_nsfds(handler);
...@@ -660,7 +659,6 @@ struct lxc_handler *lxc_init_handler(struct lxc_handler *old, ...@@ -660,7 +659,6 @@ struct lxc_handler *lxc_init_handler(struct lxc_handler *old,
handler->data_sock[0] = -EBADF; handler->data_sock[0] = -EBADF;
handler->data_sock[1] = -EBADF; handler->data_sock[1] = -EBADF;
handler->monitor_status_fd = -EBADF; handler->monitor_status_fd = -EBADF;
handler->pinfd = -EBADF;
handler->pidfd = -EBADF; handler->pidfd = -EBADF;
handler->sigfd = -EBADF; handler->sigfd = -EBADF;
handler->state_socket_pair[0] = -EBADF; handler->state_socket_pair[0] = -EBADF;
...@@ -925,6 +923,8 @@ void lxc_end(struct lxc_handler *handler) ...@@ -925,6 +923,8 @@ void lxc_end(struct lxc_handler *handler)
cgroup_ops->monitor_destroy(cgroup_ops, handler); cgroup_ops->monitor_destroy(cgroup_ops, handler);
} }
put_lxc_rootfs(&handler->conf->rootfs, true);
if (handler->conf->reboot == REBOOT_NONE) { if (handler->conf->reboot == REBOOT_NONE) {
/* For all new state clients simply close the command socket. /* For all new state clients simply close the command socket.
* This will inform all state clients that the container is * This will inform all state clients that the container is
...@@ -1066,9 +1066,6 @@ static int do_start(void *data) ...@@ -1066,9 +1066,6 @@ static int do_start(void *data)
goto out_warn_father; goto out_warn_father;
} }
/* Don't leak the pinfd to the container. */
close_prot_errno_disarm(handler->pinfd);
if (!lxc_sync_wait_parent(handler, START_SYNC_STARTUP)) if (!lxc_sync_wait_parent(handler, START_SYNC_STARTUP))
goto out_warn_father; goto out_warn_father;
...@@ -1666,10 +1663,10 @@ static int lxc_spawn(struct lxc_handler *handler) ...@@ -1666,10 +1663,10 @@ static int lxc_spawn(struct lxc_handler *handler)
* it readonly. * it readonly.
* If the container is unprivileged then skip rootfs pinning. * If the container is unprivileged then skip rootfs pinning.
*/ */
if (!wants_to_map_ids) { ret = lxc_rootfs_prepare(&conf->rootfs, wants_to_map_ids);
handler->pinfd = pin_rootfs(conf->rootfs.path); if (ret) {
if (handler->pinfd == -EBADF) ERROR("Failed to handle rootfs pinning for container \"%s\"", handler->name);
INFO("Failed to pin the rootfs for container \"%s\"", handler->name); goto out_delete_net;
} }
/* Create a process in a new set of namespaces. */ /* Create a process in a new set of namespaces. */
...@@ -2001,7 +1998,6 @@ out_abort: ...@@ -2001,7 +1998,6 @@ out_abort:
out_sync_fini: out_sync_fini:
lxc_sync_fini(handler); lxc_sync_fini(handler);
close_prot_errno_disarm(handler->pinfd);
return -1; return -1;
} }
...@@ -2118,8 +2114,6 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, ...@@ -2118,8 +2114,6 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops,
if (ret < 0) if (ret < 0)
ERROR("Failed to move physical network devices back to parent network namespace"); ERROR("Failed to move physical network devices back to parent network namespace");
close_prot_errno_disarm(handler->pinfd);
lxc_monitor_send_exit_code(name, status, handler->lxcpath); lxc_monitor_send_exit_code(name, status, handler->lxcpath);
lxc_error_set_and_log(handler->pid, status); lxc_error_set_and_log(handler->pid, status);
if (error_num) if (error_num)
......
...@@ -43,9 +43,6 @@ struct lxc_handler { ...@@ -43,9 +43,6 @@ struct lxc_handler {
__aligned_u64 clone_flags; __aligned_u64 clone_flags;
}; };
/* File descriptor to pin the rootfs for privileged containers. */
int pinfd;
/* Signal file descriptor. */ /* Signal file descriptor. */
int sigfd; int sigfd;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment