cgroup2: rework controller delegation

parent 64b23e10
...@@ -65,6 +65,7 @@ void cgroup_exit(struct cgroup_ops *ops) ...@@ -65,6 +65,7 @@ void cgroup_exit(struct cgroup_ops *ops)
free(ops->cgroup_pattern); free(ops->cgroup_pattern);
free(ops->container_cgroup); free(ops->container_cgroup);
free(ops->monitor_cgroup);
if (ops->cgroup2_devices) if (ops->cgroup2_devices)
bpf_program_free(ops->cgroup2_devices); bpf_program_free(ops->cgroup2_devices);
......
...@@ -88,6 +88,7 @@ struct cgroup_ops { ...@@ -88,6 +88,7 @@ struct cgroup_ops {
char **cgroup_use; char **cgroup_use;
char *cgroup_pattern; char *cgroup_pattern;
char *container_cgroup; char *container_cgroup;
char *monitor_cgroup;
/* Static memory, do not free.*/ /* Static memory, do not free.*/
const char *monitor_pattern; const char *monitor_pattern;
...@@ -135,9 +136,9 @@ struct cgroup_ops { ...@@ -135,9 +136,9 @@ struct cgroup_ops {
void (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); void (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler); bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
bool (*monitor_enter)(struct cgroup_ops *ops, pid_t pid); bool (*monitor_enter)(struct cgroup_ops *ops, struct lxc_handler *handler);
bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler); bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
bool (*payload_enter)(struct cgroup_ops *ops, pid_t pid); bool (*payload_enter)(struct cgroup_ops *ops, struct lxc_handler *handler);
const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller); const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller);
bool (*escape)(const struct cgroup_ops *ops, struct lxc_conf *conf); bool (*escape)(const struct cgroup_ops *ops, struct lxc_conf *conf);
int (*num_hierarchies)(struct cgroup_ops *ops); int (*num_hierarchies)(struct cgroup_ops *ops);
...@@ -148,8 +149,9 @@ struct cgroup_ops { ...@@ -148,8 +149,9 @@ struct cgroup_ops {
size_t len, const char *name, const char *lxcpath); size_t len, const char *name, const char *lxcpath);
int (*freeze)(struct cgroup_ops *ops, int timeout); int (*freeze)(struct cgroup_ops *ops, int timeout);
int (*unfreeze)(struct cgroup_ops *ops, int timeout); int (*unfreeze)(struct cgroup_ops *ops, int timeout);
bool (*setup_limits)(struct cgroup_ops *ops, struct lxc_conf *conf, bool (*setup_limits_legacy)(struct cgroup_ops *ops,
bool with_devices); struct lxc_conf *conf, bool with_devices);
bool (*setup_limits)(struct cgroup_ops *ops, struct lxc_handler *handler);
bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf); bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf);
bool (*attach)(struct cgroup_ops *ops, const char *name, bool (*attach)(struct cgroup_ops *ops, const char *name,
const char *lxcpath, pid_t pid); const char *lxcpath, pid_t pid);
...@@ -158,6 +160,8 @@ struct cgroup_ops { ...@@ -158,6 +160,8 @@ struct cgroup_ops {
int (*nrtasks)(struct cgroup_ops *ops); int (*nrtasks)(struct cgroup_ops *ops);
bool (*devices_activate)(struct cgroup_ops *ops, bool (*devices_activate)(struct cgroup_ops *ops,
struct lxc_handler *handler); struct lxc_handler *handler);
bool (*monitor_delegate_controllers)(struct cgroup_ops *ops);
bool (*payload_delegate_controllers)(struct cgroup_ops *ops);
}; };
extern struct cgroup_ops *cgroup_init(struct lxc_conf *conf); extern struct cgroup_ops *cgroup_init(struct lxc_conf *conf);
......
...@@ -802,6 +802,15 @@ static bool wait_on_daemonized_start(struct lxc_handler *handler, int pid) ...@@ -802,6 +802,15 @@ static bool wait_on_daemonized_start(struct lxc_handler *handler, int pid)
{ {
int ret, state; int ret, state;
/* The first child is going to fork() again and then exits. So we reap
* the first child here.
*/
ret = wait_for_pid(pid);
if (ret < 0)
DEBUG("Failed waiting on first child %d", pid);
else
DEBUG("First child %d exited", pid);
/* Close write end of the socket pair. */ /* Close write end of the socket pair. */
close(handler->state_socket_pair[1]); close(handler->state_socket_pair[1]);
handler->state_socket_pair[1] = -1; handler->state_socket_pair[1] = -1;
...@@ -812,15 +821,6 @@ static bool wait_on_daemonized_start(struct lxc_handler *handler, int pid) ...@@ -812,15 +821,6 @@ static bool wait_on_daemonized_start(struct lxc_handler *handler, int pid)
close(handler->state_socket_pair[0]); close(handler->state_socket_pair[0]);
handler->state_socket_pair[0] = -1; handler->state_socket_pair[0] = -1;
/* The first child is going to fork() again and then exits. So we reap
* the first child here.
*/
ret = wait_for_pid(pid);
if (ret < 0)
DEBUG("Failed waiting on first child %d", pid);
else
DEBUG("First child %d exited", pid);
if (state < 0) { if (state < 0) {
SYSERROR("Failed to receive the container state"); SYSERROR("Failed to receive the container state");
return false; return false;
...@@ -913,17 +913,17 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a ...@@ -913,17 +913,17 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
if (c->daemonize) { if (c->daemonize) {
bool started; bool started;
char title[2048]; char title[2048];
pid_t pid; pid_t pid_first, pid_second;
pid = fork(); pid_first = fork();
if (pid < 0) { if (pid_first < 0) {
free_init_cmd(init_cmd); free_init_cmd(init_cmd);
lxc_free_handler(handler); lxc_free_handler(handler);
return false; return false;
} }
/* first parent */ /* first parent */
if (pid != 0) { if (pid_first != 0) {
/* Set to NULL because we don't want father unlink /* Set to NULL because we don't want father unlink
* the PID file, child will do the free and unlink. * the PID file, child will do the free and unlink.
*/ */
...@@ -932,7 +932,7 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a ...@@ -932,7 +932,7 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
/* Wait for container to tell us whether it started /* Wait for container to tell us whether it started
* successfully. * successfully.
*/ */
started = wait_on_daemonized_start(handler, pid); started = wait_on_daemonized_start(handler, pid_first);
free_init_cmd(init_cmd); free_init_cmd(init_cmd);
lxc_free_handler(handler); lxc_free_handler(handler);
...@@ -958,14 +958,14 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a ...@@ -958,14 +958,14 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
* POSIX's daemon() function we change to "/" and redirect * POSIX's daemon() function we change to "/" and redirect
* std{in,out,err} to /dev/null. * std{in,out,err} to /dev/null.
*/ */
pid = fork(); pid_second = fork();
if (pid < 0) { if (pid_second < 0) {
SYSERROR("Failed to fork first child process"); SYSERROR("Failed to fork first child process");
_exit(EXIT_FAILURE); _exit(EXIT_FAILURE);
} }
/* second parent */ /* second parent */
if (pid != 0) { if (pid_second != 0) {
free_init_cmd(init_cmd); free_init_cmd(init_cmd);
lxc_free_handler(handler); lxc_free_handler(handler);
_exit(EXIT_SUCCESS); _exit(EXIT_SUCCESS);
......
...@@ -408,6 +408,12 @@ enum { ...@@ -408,6 +408,12 @@ enum {
-1; \ -1; \
}) })
#define ret_set_errno(__ret__, __errno__) \
({ \
errno = __errno__; \
__ret__; \
})
#define free_replace_move_ptr(a, b) \ #define free_replace_move_ptr(a, b) \
({ \ ({ \
free(a); \ free(a); \
......
...@@ -731,6 +731,10 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf, ...@@ -731,6 +731,10 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf,
handler->nsfd[i] = -1; handler->nsfd[i] = -1;
handler->name = name; handler->name = name;
if (daemonize)
handler->transient_pid = lxc_raw_getpid();
else
handler->transient_pid = -1;
if (daemonize && handler->conf->reboot == REBOOT_NONE) { if (daemonize && handler->conf->reboot == REBOOT_NONE) {
/* Create socketpair() to synchronize on daemonized startup. /* Create socketpair() to synchronize on daemonized startup.
...@@ -997,8 +1001,10 @@ void lxc_fini(const char *name, struct lxc_handler *handler) ...@@ -997,8 +1001,10 @@ void lxc_fini(const char *name, struct lxc_handler *handler)
while (namespace_count--) while (namespace_count--)
free(namespaces[namespace_count]); free(namespaces[namespace_count]);
cgroup_ops->payload_destroy(cgroup_ops, handler); if (cgroup_ops) {
cgroup_ops->monitor_destroy(cgroup_ops, handler); cgroup_ops->payload_destroy(cgroup_ops, handler);
cgroup_ops->monitor_destroy(cgroup_ops, handler);
}
if (handler->conf->reboot == REBOOT_NONE) { if (handler->conf->reboot == REBOOT_NONE) {
/* For all new state clients simply close the command socket. /* For all new state clients simply close the command socket.
...@@ -1793,14 +1799,24 @@ static int lxc_spawn(struct lxc_handler *handler) ...@@ -1793,14 +1799,24 @@ static int lxc_spawn(struct lxc_handler *handler)
if (ret < 0) if (ret < 0)
goto out_delete_net; goto out_delete_net;
if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, false)) { if (!cgroup_ops->setup_limits_legacy(cgroup_ops, handler->conf, false)) {
ERROR("Failed to setup cgroup limits for container \"%s\"", name); ERROR("Failed to setup cgroup limits for container \"%s\"", name);
goto out_delete_net; goto out_delete_net;
} }
if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid)) if (!cgroup_ops->payload_enter(cgroup_ops, handler))
goto out_delete_net; goto out_delete_net;
if (!cgroup_ops->payload_delegate_controllers(cgroup_ops)) {
ERROR("Failed to delegate controllers to payload cgroup");
goto out_delete_net;
}
if (!cgroup_ops->setup_limits(cgroup_ops, handler)) {
ERROR("Failed to setup cgroup limits for container \"%s\"", name);
goto out_delete_net;
}
if (!cgroup_ops->chown(cgroup_ops, handler->conf)) if (!cgroup_ops->chown(cgroup_ops, handler->conf))
goto out_delete_net; goto out_delete_net;
...@@ -1873,7 +1889,7 @@ static int lxc_spawn(struct lxc_handler *handler) ...@@ -1873,7 +1889,7 @@ static int lxc_spawn(struct lxc_handler *handler)
if (ret < 0) if (ret < 0)
goto out_delete_net; goto out_delete_net;
if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, true)) { if (!cgroup_ops->setup_limits_legacy(cgroup_ops, handler->conf, true)) {
ERROR("Failed to setup legacy device cgroup controller limits"); ERROR("Failed to setup legacy device cgroup controller limits");
goto out_delete_net; goto out_delete_net;
} }
...@@ -1998,12 +2014,18 @@ int __lxc_start(const char *name, struct lxc_handler *handler, ...@@ -1998,12 +2014,18 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
goto out_fini_nonet; goto out_fini_nonet;
} }
if (!cgroup_ops->monitor_enter(cgroup_ops, handler->monitor_pid)) { if (!cgroup_ops->monitor_enter(cgroup_ops, handler)) {
ERROR("Failed to enter monitor cgroup"); ERROR("Failed to enter monitor cgroup");
ret = -1; ret = -1;
goto out_fini_nonet; goto out_fini_nonet;
} }
if (!cgroup_ops->monitor_delegate_controllers(cgroup_ops)) {
ERROR("Failed to delegate controllers to monitor cgroup");
ret = -1;
goto out_fini_nonet;
}
if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) { if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
/* If the backing store is a device, mount it here and now. */ /* If the backing store is a device, mount it here and now. */
if (rootfs_is_blockdev(conf)) { if (rootfs_is_blockdev(conf)) {
......
...@@ -89,6 +89,9 @@ struct lxc_handler { ...@@ -89,6 +89,9 @@ struct lxc_handler {
*/ */
int proc_pidfd; int proc_pidfd;
/* The grandfather's pid when double-forking. */
pid_t transient_pid;
/* The monitor's pid. */ /* The monitor's pid. */
pid_t monitor_pid; pid_t monitor_pid;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment