Unverified Commit b5e75029 by Stéphane Graber Committed by GitHub

Merge pull request #3649 from brauner/2021-02-03/attach_via_pidfds

attach: attach to namespaces via pidfds
parents 07f89a4f 9b31ab58
...@@ -87,6 +87,7 @@ static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_D ...@@ -87,6 +87,7 @@ static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_D
struct attach_context { struct attach_context {
unsigned int attach_flags; unsigned int attach_flags;
int init_pid; int init_pid;
int init_pidfd;
int dfd_init_pid; int dfd_init_pid;
int dfd_self_pid; int dfd_self_pid;
uid_t setup_ns_uid; uid_t setup_ns_uid;
...@@ -179,15 +180,16 @@ static struct attach_context *alloc_attach_context(void) ...@@ -179,15 +180,16 @@ static struct attach_context *alloc_attach_context(void)
if (!ctx) if (!ctx)
return ret_set_errno(NULL, ENOMEM); return ret_set_errno(NULL, ENOMEM);
ctx->dfd_self_pid = -EBADF; ctx->dfd_self_pid = -EBADF;
ctx->dfd_init_pid = -EBADF; ctx->dfd_init_pid = -EBADF;
ctx->init_pid = -ESRCH; ctx->init_pidfd = -EBADF;
ctx->setup_ns_uid = LXC_INVALID_UID; ctx->init_pid = -ESRCH;
ctx->setup_ns_gid = LXC_INVALID_GID; ctx->setup_ns_uid = LXC_INVALID_UID;
ctx->target_ns_uid = LXC_INVALID_UID; ctx->setup_ns_gid = LXC_INVALID_GID;
ctx->target_ns_gid = LXC_INVALID_GID; ctx->target_ns_uid = LXC_INVALID_UID;
ctx->target_host_uid = LXC_INVALID_UID; ctx->target_ns_gid = LXC_INVALID_GID;
ctx->target_host_gid = LXC_INVALID_GID; ctx->target_host_uid = LXC_INVALID_UID;
ctx->target_host_gid = LXC_INVALID_GID;
for (int i = 0; i < LXC_NS_MAX; i++) for (int i = 0; i < LXC_NS_MAX; i++)
ctx->ns_fd[i] = -EBADF; ctx->ns_fd[i] = -EBADF;
...@@ -366,11 +368,29 @@ static int parse_init_status(struct attach_context *ctx, lxc_attach_options_t *o ...@@ -366,11 +368,29 @@ static int parse_init_status(struct attach_context *ctx, lxc_attach_options_t *o
return 0; return 0;
} }
static bool pidfd_setns_supported(struct attach_context *ctx)
{
int ret;
/*
* The ability to attach to time namespaces came after the introduction
* of of using pidfds for attaching to namespaces. To avoid having to
* special-case both CLONE_NEWUSER and CLONE_NEWTIME handling, let's
* use CLONE_NEWTIME as gatekeeper.
*/
if (ctx->init_pidfd >= 0)
ret = setns(ctx->init_pidfd, CLONE_NEWTIME);
else
ret = -EOPNOTSUPP;
TRACE("Attaching to namespaces via pidfds %s",
ret ? "unsupported" : "supported");
return ret == 0;
}
static int get_attach_context(struct attach_context *ctx, static int get_attach_context(struct attach_context *ctx,
struct lxc_container *container, struct lxc_container *container,
lxc_attach_options_t *options) lxc_attach_options_t *options)
{ {
__do_close int init_pidfd = -EBADF;
__do_free char *lsm_label = NULL; __do_free char *lsm_label = NULL;
int ret; int ret;
char path[LXC_PROC_PID_LEN]; char path[LXC_PROC_PID_LEN];
...@@ -384,9 +404,9 @@ static int get_attach_context(struct attach_context *ctx, ...@@ -384,9 +404,9 @@ static int get_attach_context(struct attach_context *ctx,
if (ctx->dfd_self_pid < 0) if (ctx->dfd_self_pid < 0)
return log_error_errno(-errno, errno, "Failed to open /proc/self"); return log_error_errno(-errno, errno, "Failed to open /proc/self");
init_pidfd = lxc_cmd_get_init_pidfd(container->name, container->config_path); ctx->init_pidfd = lxc_cmd_get_init_pidfd(container->name, container->config_path);
if (init_pidfd >= 0) if (ctx->init_pidfd >= 0)
ctx->init_pid = pidfd_get_pid(ctx->dfd_self_pid, init_pidfd); ctx->init_pid = pidfd_get_pid(ctx->dfd_self_pid, ctx->init_pidfd);
else else
ctx->init_pid = lxc_cmd_get_init_pid(container->name, container->config_path); ctx->init_pid = lxc_cmd_get_init_pid(container->name, container->config_path);
...@@ -403,12 +423,21 @@ static int get_attach_context(struct attach_context *ctx, ...@@ -403,12 +423,21 @@ static int get_attach_context(struct attach_context *ctx,
if (ctx->dfd_init_pid < 0) if (ctx->dfd_init_pid < 0)
return log_error_errno(-errno, errno, "Failed to open /proc/%d", ctx->init_pid); return log_error_errno(-errno, errno, "Failed to open /proc/%d", ctx->init_pid);
if (init_pidfd >= 0) { if (ctx->init_pidfd >= 0) {
ret = lxc_raw_pidfd_send_signal(init_pidfd, 0, NULL, 0); ret = lxc_raw_pidfd_send_signal(ctx->init_pidfd, 0, NULL, 0);
if (ret) if (ret)
return log_error_errno(-errno, errno, "Container process exited or PID has been recycled"); return log_error_errno(-errno, errno, "Container process exited or PID has been recycled");
else else
TRACE("Container process still running and PID was not recycled"); TRACE("Container process still running and PID was not recycled");
if (!pidfd_setns_supported(ctx)) {
/* We can't risk leaking file descriptors during attach. */
if (close(ctx->init_pidfd))
return log_error_errno(-errno, errno, "Failed to close pidfd");
ctx->init_pidfd = -EBADF;
TRACE("Attaching to namespaces via pidfds not supported");
}
} }
/* Determine which namespaces the container was created with. */ /* Determine which namespaces the container was created with. */
...@@ -446,7 +475,6 @@ static int get_attach_context(struct attach_context *ctx, ...@@ -446,7 +475,6 @@ static int get_attach_context(struct attach_context *ctx,
else else
INFO("Retrieved security context %s", lsm_label); INFO("Retrieved security context %s", lsm_label);
} }
ctx->ns_inherited = 0;
ret = get_personality(container->name, container->config_path, &ctx->personality); ret = get_personality(container->name, container->config_path, &ctx->personality);
if (ret) if (ret)
...@@ -462,50 +490,77 @@ static int get_attach_context(struct attach_context *ctx, ...@@ -462,50 +490,77 @@ static int get_attach_context(struct attach_context *ctx,
return 0; return 0;
} }
static int same_ns(int ns_fd_pid1, int ns_fd_pid2, const char *ns_path) static int same_nsfd(int dfd_pid1, int dfd_pid2, const char *ns_path)
{ {
__do_close int ns_fd1 = -EBADF, ns_fd2 = -EBADF; int ret;
int ret = -1;
struct stat ns_st1, ns_st2; struct stat ns_st1, ns_st2;
ns_fd1 = open_at(ns_fd_pid1, ns_path, ret = fstatat(dfd_pid1, ns_path, &ns_st1, 0);
PROTECT_OPEN_WITH_TRAILING_SYMLINKS, if (ret)
(PROTECT_LOOKUP_BENEATH_WITH_MAGICLINKS & ~(RESOLVE_NO_XDEV | RESOLVE_BENEATH)),
0);
if (ns_fd1 < 0) {
/* The kernel does not support this namespace. This is not an error. */
if (errno == ENOENT)
return -EINVAL;
return log_error_errno(-errno, errno, "Failed to open %d(%s)", ns_fd_pid1, ns_path);
}
ns_fd2 = open_at(ns_fd_pid2, ns_path,
PROTECT_OPEN_WITH_TRAILING_SYMLINKS,
(PROTECT_LOOKUP_BENEATH_WITH_MAGICLINKS & ~(RESOLVE_NO_XDEV | RESOLVE_BENEATH)),
0);
if (ns_fd2 < 0)
return log_error_errno(-errno, errno, "Failed to open %d(%s)", ns_fd_pid2, ns_path);
ret = fstat(ns_fd1, &ns_st1);
if (ret < 0)
return -1; return -1;
ret = fstat(ns_fd2, &ns_st2); ret = fstatat(dfd_pid2, ns_path, &ns_st2, 0);
if (ret < 0) if (ret)
return -1; return -1;
/* processes are in the same namespace */ /* processes are in the same namespace */
if ((ns_st1.st_dev == ns_st2.st_dev) && if ((ns_st1.st_dev == ns_st2.st_dev) &&
(ns_st1.st_ino == ns_st2.st_ino)) (ns_st1.st_ino == ns_st2.st_ino))
return -EINVAL; return -EINVAL;
return 0;
}
static int same_ns(int dfd_pid1, int dfd_pid2, const char *ns_path)
{
__do_close int ns_fd2 = -EBADF;
int ret = -1;
ns_fd2 = open_at(dfd_pid2, ns_path, PROTECT_OPEN_WITH_TRAILING_SYMLINKS,
(PROTECT_LOOKUP_BENEATH_WITH_MAGICLINKS &
~(RESOLVE_NO_XDEV | RESOLVE_BENEATH)), 0);
if (ns_fd2 < 0) {
/* The kernel does not support this namespace. This is not an error. */
if (errno == ENOENT)
return -EINVAL;
return log_error_errno(-errno, errno, "Failed to open %d(%s)",
dfd_pid2, ns_path);
}
ret = same_nsfd(dfd_pid1, dfd_pid2, ns_path);
if (ret < 0)
return ret;
/* processes are in different namespaces */ /* processes are in different namespaces */
return move_fd(ns_fd2); return move_fd(ns_fd2);
} }
static int get_attach_context_nsfds(struct attach_context *ctx, static int __prepare_namespaces_pidfd(struct attach_context *ctx)
lxc_attach_options_t *options) {
for (int i = 0; i < LXC_NS_MAX; i++) {
int ret;
if (!(ctx->ns_inherited & ns_info[i].clone_flag))
continue;
ret = same_nsfd(ctx->dfd_self_pid,
ctx->dfd_init_pid,
ns_info[i].proc_path);
if (ret == -EINVAL)
ctx->ns_inherited &= ~ns_info[i].clone_flag;
else if (ret < 0)
return log_error_errno(-1, errno,
"Failed to determine whether %s namespace is shared",
ns_info[i].proc_name);
else
TRACE("Shared %s namespace needs attach", ns_info[i].proc_name);
}
return 0;
}
static int __prepare_namespaces_nsfd(struct attach_context *ctx,
lxc_attach_options_t *options)
{ {
for (int i = 0; i < LXC_NS_MAX; i++) { for (int i = 0; i < LXC_NS_MAX; i++) {
int j; int j;
...@@ -514,7 +569,8 @@ static int get_attach_context_nsfds(struct attach_context *ctx, ...@@ -514,7 +569,8 @@ static int get_attach_context_nsfds(struct attach_context *ctx,
ctx->ns_fd[i] = open_at(ctx->dfd_init_pid, ctx->ns_fd[i] = open_at(ctx->dfd_init_pid,
ns_info[i].proc_path, ns_info[i].proc_path,
PROTECT_OPEN_WITH_TRAILING_SYMLINKS, PROTECT_OPEN_WITH_TRAILING_SYMLINKS,
(PROTECT_LOOKUP_BENEATH_WITH_MAGICLINKS & ~(RESOLVE_NO_XDEV | RESOLVE_BENEATH)), (PROTECT_LOOKUP_BENEATH_WITH_MAGICLINKS &
~(RESOLVE_NO_XDEV | RESOLVE_BENEATH)),
0); 0);
else if (ctx->ns_inherited & ns_info[i].clone_flag) else if (ctx->ns_inherited & ns_info[i].clone_flag)
ctx->ns_fd[i] = same_ns(ctx->dfd_self_pid, ctx->ns_fd[i] = same_ns(ctx->dfd_self_pid,
...@@ -527,13 +583,13 @@ static int get_attach_context_nsfds(struct attach_context *ctx, ...@@ -527,13 +583,13 @@ static int get_attach_context_nsfds(struct attach_context *ctx,
continue; continue;
if (ctx->ns_fd[i] == -EINVAL) { if (ctx->ns_fd[i] == -EINVAL) {
DEBUG("Inheriting %s namespace", ns_info[i].proc_name);
ctx->ns_inherited &= ~ns_info[i].clone_flag; ctx->ns_inherited &= ~ns_info[i].clone_flag;
continue; continue;
} }
/* We failed to preserve the namespace. */ /* We failed to preserve the namespace. */
SYSERROR("Failed to preserve %s namespace of %d", ns_info[i].proc_name, ctx->init_pid); SYSERROR("Failed to preserve %s namespace of %d",
ns_info[i].proc_name, ctx->init_pid);
/* Close all already opened file descriptors before we return an /* Close all already opened file descriptors before we return an
* error, so we don't leak them. * error, so we don't leak them.
...@@ -547,30 +603,45 @@ static int get_attach_context_nsfds(struct attach_context *ctx, ...@@ -547,30 +603,45 @@ static int get_attach_context_nsfds(struct attach_context *ctx,
return 0; return 0;
} }
static inline void close_nsfds(struct attach_context *ctx) static int prepare_namespaces(struct attach_context *ctx,
lxc_attach_options_t *options)
{ {
for (int i = 0; i < LXC_NS_MAX; i++) if (ctx->init_pidfd < 0)
close_prot_errno_disarm(ctx->ns_fd[i]); return __prepare_namespaces_nsfd(ctx, options);
return __prepare_namespaces_pidfd(ctx);
} }
static void put_attach_context(struct attach_context *ctx) static inline void put_namespaces(struct attach_context *ctx)
{ {
if (ctx) { if (ctx->init_pidfd < 0) {
if (!(ctx->attach_flags & LXC_ATTACH_LSM_LABEL)) for (int i = 0; i < LXC_NS_MAX; i++)
free_disarm(ctx->lsm_label); close_prot_errno_disarm(ctx->ns_fd[i]);
close_prot_errno_disarm(ctx->dfd_init_pid); }
}
if (ctx->container) { static int __attach_namespaces_pidfd(struct attach_context *ctx,
lxc_container_put(ctx->container); lxc_attach_options_t *options)
ctx->container = NULL; {
} unsigned int ns_flags = options->namespaces | ctx->ns_inherited;
int ret;
close_nsfds(ctx); /* The common case is to attach to all namespaces. */
free(ctx); ret = setns(ctx->init_pidfd, ns_flags);
} if (ret)
return log_error_errno(-errno, errno,
"Failed to attach to namespaces via pidfd");
/* We can't risk leaking file descriptors into the container. */
if (close(ctx->init_pidfd))
return log_error_errno(-errno, errno, "Failed to close pidfd");
ctx->init_pidfd = -EBADF;
return log_trace(0, "Attached to container namespaces via pidfd");
} }
static int attach_context_container(struct attach_context *ctx) static int __attach_namespaces_nsfd(struct attach_context *ctx,
lxc_attach_options_t *options)
{ {
int fret = 0; int fret = 0;
...@@ -582,13 +653,15 @@ static int attach_context_container(struct attach_context *ctx) ...@@ -582,13 +653,15 @@ static int attach_context_container(struct attach_context *ctx)
ret = setns(ctx->ns_fd[i], ns_info[i].clone_flag); ret = setns(ctx->ns_fd[i], ns_info[i].clone_flag);
if (ret) if (ret)
return log_error_errno(-errno, errno, "Failed to attach to %s namespace of %d", ns_info[i].proc_name, ctx->init_pid); return log_error_errno(-errno, errno,
"Failed to attach to %s namespace of %d",
DEBUG("Attached to %s namespace of %d", ns_info[i].proc_name, ctx->init_pid); ns_info[i].proc_name,
ctx->init_pid);
if (close(ctx->ns_fd[i])) { if (close(ctx->ns_fd[i])) {
fret = -errno; fret = -errno;
SYSERROR("Failed to close file descriptor for %s namespace", ns_info[i].proc_name); SYSERROR("Failed to close file descriptor for %s namespace",
ns_info[i].proc_name);
} }
ctx->ns_fd[i] = -EBADF; ctx->ns_fd[i] = -EBADF;
} }
...@@ -596,6 +669,46 @@ static int attach_context_container(struct attach_context *ctx) ...@@ -596,6 +669,46 @@ static int attach_context_container(struct attach_context *ctx)
return fret; return fret;
} }
static int attach_namespaces(struct attach_context *ctx,
lxc_attach_options_t *options)
{
if (lxc_log_trace()) {
for (int i = 0; i < LXC_NS_MAX; i++) {
if (ns_info[i].clone_flag & options->namespaces) {
TRACE("Attaching to %s namespace", ns_info[i].proc_name);
continue;
}
if (ns_info[i].clone_flag & ctx->ns_inherited) {
TRACE("Sharing %s namespace", ns_info[i].proc_name);
continue;
}
TRACE("Inheriting %s namespace", ns_info[i].proc_name);
}
}
if (ctx->init_pidfd < 0)
return __attach_namespaces_nsfd(ctx, options);
return __attach_namespaces_pidfd(ctx, options);
}
static void put_attach_context(struct attach_context *ctx)
{
if (ctx) {
if (!(ctx->attach_flags & LXC_ATTACH_LSM_LABEL))
free_disarm(ctx->lsm_label);
close_prot_errno_disarm(ctx->dfd_init_pid);
if (ctx->container) {
lxc_container_put(ctx->container);
ctx->container = NULL;
}
put_namespaces(ctx);
free(ctx);
}
}
/* /*
* Place anything in here that needs to be get rid of before we move into the * Place anything in here that needs to be get rid of before we move into the
* container's context and fail hard if we can't. * container's context and fail hard if we can't.
...@@ -1325,7 +1438,7 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, ...@@ -1325,7 +1438,7 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function,
if (!no_new_privs(ctx->container, options)) if (!no_new_privs(ctx->container, options))
WARN("Could not determine whether PR_SET_NO_NEW_PRIVS is set"); WARN("Could not determine whether PR_SET_NO_NEW_PRIVS is set");
ret = get_attach_context_nsfds(ctx, options); ret = prepare_namespaces(ctx, options);
if (ret) { if (ret) {
put_attach_context(ctx); put_attach_context(ctx);
return log_error(-1, "Failed to get namespace file descriptors"); return log_error(-1, "Failed to get namespace file descriptors");
...@@ -1434,7 +1547,7 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, ...@@ -1434,7 +1547,7 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function,
* anything sensitive. That especially means things such as * anything sensitive. That especially means things such as
* open file descriptors! * open file descriptors!
*/ */
ret = attach_context_container(ctx); ret = attach_namespaces(ctx, options);
if (ret < 0) { if (ret < 0) {
ERROR("Failed to enter namespaces"); ERROR("Failed to enter namespaces");
shutdown(ipc_sockets[1], SHUT_RDWR); shutdown(ipc_sockets[1], SHUT_RDWR);
...@@ -1512,7 +1625,7 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, ...@@ -1512,7 +1625,7 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function,
/* close unneeded file descriptors */ /* close unneeded file descriptors */
close_prot_errno_disarm(ipc_sockets[1]); close_prot_errno_disarm(ipc_sockets[1]);
close_nsfds(ctx); put_namespaces(ctx);
if (options->attach_flags & LXC_ATTACH_TERMINAL) if (options->attach_flags & LXC_ATTACH_TERMINAL)
lxc_attach_terminal_close_pts(&terminal); lxc_attach_terminal_close_pts(&terminal);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment