cgroup2: rework controller delegation

parent 75001299
...@@ -1184,71 +1184,6 @@ __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops, ...@@ -1184,71 +1184,6 @@ __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
} }
} }
static bool cg_unified_create_cgroup(struct hierarchy *h, char *cgname)
{
__do_free char *add_controllers = NULL, *cgroup = NULL;
size_t i, parts_len;
char **it;
size_t full_len = 0;
char **parts = NULL;
bool bret = false;
if (h->version != CGROUP2_SUPER_MAGIC)
return true;
if (!h->controllers)
return true;
/* For now we simply enable all controllers that we have detected by
* creating a string like "+memory +pids +cpu +io".
* TODO: In the near future we might want to support "-<controller>"
* etc. but whether supporting semantics like this make sense will need
* some thinking.
*/
for (it = h->controllers; it && *it; it++) {
full_len += strlen(*it) + 2;
add_controllers = must_realloc(add_controllers, full_len + 1);
if (h->controllers[0] == *it)
add_controllers[0] = '\0';
(void)strlcat(add_controllers, "+", full_len + 1);
(void)strlcat(add_controllers, *it, full_len + 1);
if ((it + 1) && *(it + 1))
(void)strlcat(add_controllers, " ", full_len + 1);
}
parts = lxc_string_split(cgname, '/');
if (!parts)
goto on_error;
parts_len = lxc_array_len((void **)parts);
if (parts_len > 0)
parts_len--;
cgroup = must_make_path(h->mountpoint, h->container_base_path, NULL);
for (i = 0; i < parts_len; i++) {
int ret;
__do_free char *target = NULL;
cgroup = must_append_path(cgroup, parts[i], NULL);
target = must_make_path(cgroup, "cgroup.subtree_control", NULL);
ret = lxc_write_to_file(target, add_controllers, full_len, false, 0666);
if (ret < 0) {
SYSERROR("Could not enable \"%s\" controllers in the "
"unified cgroup \"%s\"", add_controllers, cgroup);
goto on_error;
}
}
bret = true;
on_error:
lxc_free_array((void **)parts, free);
return bret;
}
static int mkdir_eexist_on_last(const char *dir, mode_t mode) static int mkdir_eexist_on_last(const char *dir, mode_t mode)
{ {
const char *tmp = dir; const char *tmp = dir;
...@@ -1298,7 +1233,7 @@ static bool monitor_create_path_for_hierarchy(struct hierarchy *h, char *cgname) ...@@ -1298,7 +1233,7 @@ static bool monitor_create_path_for_hierarchy(struct hierarchy *h, char *cgname)
return false; return false;
} }
return cg_unified_create_cgroup(h, cgname); return true;
} }
static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgname) static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgname)
...@@ -1317,7 +1252,7 @@ static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgnam ...@@ -1317,7 +1252,7 @@ static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgnam
return false; return false;
} }
return cg_unified_create_cgroup(h, cgname); return true;
} }
static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname, bool monitor) static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname, bool monitor)
...@@ -1400,6 +1335,7 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, ...@@ -1400,6 +1335,7 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
return false; return false;
INFO("The monitor process uses \"%s\" as cgroup", monitor_cgroup); INFO("The monitor process uses \"%s\" as cgroup", monitor_cgroup);
ops->monitor_cgroup = move_ptr(monitor_cgroup);
return true; return true;
} }
...@@ -1479,47 +1415,66 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, ...@@ -1479,47 +1415,66 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
return true; return true;
} }
__cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid, __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops,
bool monitor) struct lxc_handler *handler)
{ {
int len; int monitor_len, transient_len;
char pidstr[INTTYPE_TO_STRLEN(pid_t)]; char monitor[INTTYPE_TO_STRLEN(pid_t)],
transient[INTTYPE_TO_STRLEN(pid_t)];
if (!ops->hierarchies) if (!ops->hierarchies)
return true; return true;
len = snprintf(pidstr, sizeof(pidstr), "%d", pid); monitor_len = snprintf(monitor, sizeof(monitor), "%d", handler->monitor_pid);
if (len < 0 || (size_t)len >= sizeof(pidstr)) if (handler->transient_pid > 0)
return false; transient_len = snprintf(transient, sizeof(transient), "%d",
handler->transient_pid);
for (int i = 0; ops->hierarchies[i]; i++) { for (int i = 0; ops->hierarchies[i]; i++) {
int ret;
__do_free char *path = NULL; __do_free char *path = NULL;
int ret;
if (monitor) path = must_make_path(ops->hierarchies[i]->monitor_full_path,
path = must_make_path(ops->hierarchies[i]->monitor_full_path, "cgroup.procs", NULL);
"cgroup.procs", NULL); ret = lxc_writeat(-1, path, monitor, monitor_len);
else if (ret != 0)
path = must_make_path(ops->hierarchies[i]->container_full_path, return log_error_errno(false, errno, "Failed to enter cgroup \"%s\"", path);
"cgroup.procs", NULL);
ret = lxc_write_to_file(path, pidstr, len, false, 0666); if (handler->transient_pid < 0)
if (ret != 0) { return true;
SYSERROR("Failed to enter cgroup \"%s\"", path);
return false; ret = lxc_writeat(-1, path, transient, transient_len);
} if (ret != 0)
return log_error_errno(false, errno, "Failed to enter cgroup \"%s\"", path);
} }
handler->transient_pid = -1;
return true; return true;
} }
__cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, pid_t pid) __cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops,
struct lxc_handler *handler)
{ {
return __do_cgroup_enter(ops, pid, true); int len;
} char pidstr[INTTYPE_TO_STRLEN(pid_t)];
static bool cgfsng_payload_enter(struct cgroup_ops *ops, pid_t pid) if (!ops->hierarchies)
{ return true;
return __do_cgroup_enter(ops, pid, false);
len = snprintf(pidstr, sizeof(pidstr), "%d", handler->pid);
for (int i = 0; ops->hierarchies[i]; i++) {
__do_free char *path = NULL;
int ret;
path = must_make_path(ops->hierarchies[i]->container_full_path,
"cgroup.procs", NULL);
ret = lxc_writeat(-1, path, pidstr, len);
if (ret != 0)
return log_error_errno(false, errno, "Failed to enter cgroup \"%s\"", path);
}
return true;
} }
static int chowmod(char *path, uid_t chown_uid, gid_t chown_gid, static int chowmod(char *path, uid_t chown_uid, gid_t chown_gid,
...@@ -2625,11 +2580,12 @@ static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename, ...@@ -2625,11 +2580,12 @@ static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
return ret; return ret;
} }
static bool __cg_legacy_setup_limits(struct cgroup_ops *ops, __cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops,
struct lxc_list *cgroup_settings, struct lxc_conf *conf,
bool do_devices) bool do_devices)
{ {
__do_free struct lxc_list *sorted_cgroup_settings = NULL; __do_free struct lxc_list *sorted_cgroup_settings = NULL;
struct lxc_list *cgroup_settings = &conf->cgroup;
struct lxc_list *iterator, *next; struct lxc_list *iterator, *next;
struct lxc_cgroup *cg; struct lxc_cgroup *cg;
bool ret = false; bool ret = false;
...@@ -2699,12 +2655,13 @@ static int bpf_device_cgroup_prepare(struct cgroup_ops *ops, ...@@ -2699,12 +2655,13 @@ static int bpf_device_cgroup_prepare(struct cgroup_ops *ops,
return 0; return 0;
} }
static bool __cg_unified_setup_limits(struct cgroup_ops *ops, __cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops,
struct lxc_list *cgroup_settings, struct lxc_handler *handler)
struct lxc_conf *conf)
{ {
struct lxc_list *iterator; struct lxc_list *iterator;
struct hierarchy *h = ops->unified; struct hierarchy *h = ops->unified;
struct lxc_conf *conf = handler->conf;
struct lxc_list *cgroup_settings = &conf->cgroup2;
if (lxc_list_empty(cgroup_settings)) if (lxc_list_empty(cgroup_settings))
return true; return true;
...@@ -2798,18 +2755,79 @@ __cgfsng_ops bool cgfsng_devices_activate(struct cgroup_ops *ops, ...@@ -2798,18 +2755,79 @@ __cgfsng_ops bool cgfsng_devices_activate(struct cgroup_ops *ops,
return true; return true;
} }
__cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops, bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cgroup)
struct lxc_conf *conf,
bool do_devices)
{ {
if (!__cg_legacy_setup_limits(ops, &conf->cgroup, do_devices)) __do_free char *add_controllers = NULL, *base_path = NULL;
return false; struct hierarchy *unified = ops->unified;
ssize_t parts_len;
char **it;
size_t full_len = 0;
char **parts = NULL;
bool bret = false;
/* for v2 we will have already set up devices */ if (!ops->hierarchies || !pure_unified_layout(ops) ||
if (do_devices) !unified->controllers[0])
return true; return true;
return __cg_unified_setup_limits(ops, &conf->cgroup2, conf); /* For now we simply enable all controllers that we have detected by
* creating a string like "+memory +pids +cpu +io".
* TODO: In the near future we might want to support "-<controller>"
* etc. but whether supporting semantics like this make sense will need
* some thinking.
*/
for (it = unified->controllers; it && *it; it++) {
full_len += strlen(*it) + 2;
add_controllers = must_realloc(add_controllers, full_len + 1);
if (unified->controllers[0] == *it)
add_controllers[0] = '\0';
(void)strlcat(add_controllers, "+", full_len + 1);
(void)strlcat(add_controllers, *it, full_len + 1);
if ((it + 1) && *(it + 1))
(void)strlcat(add_controllers, " ", full_len + 1);
}
parts = lxc_string_split(cgroup, '/');
if (!parts)
goto on_error;
parts_len = lxc_array_len((void **)parts);
if (parts_len > 0)
parts_len--;
base_path = must_make_path(unified->mountpoint, unified->container_base_path, NULL);
for (ssize_t i = -1; i < parts_len; i++) {
int ret;
__do_free char *target = NULL;
if (i >= 0)
base_path = must_append_path(base_path, parts[i], NULL);
target = must_make_path(base_path, "cgroup.subtree_control", NULL);
ret = lxc_writeat(-1, target, add_controllers, full_len);
if (ret < 0) {
SYSERROR("Could not enable \"%s\" controllers in the unified cgroup \"%s\"", add_controllers, target);
goto on_error;
}
TRACE("Enable \"%s\" controllers in the unified cgroup \"%s\"", add_controllers, target);
}
bret = true;
on_error:
lxc_free_array((void **)parts, free);
return bret;
}
__cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)
{
return __cgfsng_delegate_controllers(ops, ops->monitor_cgroup);
}
__cgfsng_ops bool cgfsng_payload_delegate_controllers(struct cgroup_ops *ops)
{
return __cgfsng_delegate_controllers(ops, ops->container_cgroup);
} }
static bool cgroup_use_wants_controllers(const struct cgroup_ops *ops, static bool cgroup_use_wants_controllers(const struct cgroup_ops *ops,
...@@ -3062,15 +3080,15 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative, ...@@ -3062,15 +3080,15 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative,
base_cgroup = cg_unified_get_current_cgroup(relative); base_cgroup = cg_unified_get_current_cgroup(relative);
if (!base_cgroup) if (!base_cgroup)
return -EINVAL; return -EINVAL;
prune_init_scope(base_cgroup); if (!relative)
prune_init_scope(base_cgroup);
/* We assume that we have already been given controllers to delegate /* We assume that we have already been given controllers to delegate
* further down the hierarchy. If not it is up to the user to delegate * further down the hierarchy. If not it is up to the user to delegate
* them to us. * them to us.
*/ */
mountpoint = must_copy_string(DEFAULT_CGROUP_MOUNTPOINT); mountpoint = must_copy_string(DEFAULT_CGROUP_MOUNTPOINT);
subtree_path = must_make_path(mountpoint, base_cgroup, subtree_path = must_make_path(mountpoint, base_cgroup, "cgroup.controllers", NULL);
"cgroup.subtree_control", NULL);
delegatable = cg_unified_get_controllers(subtree_path); delegatable = cg_unified_get_controllers(subtree_path);
if (!delegatable) if (!delegatable)
delegatable = cg_unified_make_empty_controller(); delegatable = cg_unified_make_empty_controller();
...@@ -3162,6 +3180,8 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) ...@@ -3162,6 +3180,8 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
cgfsng_ops->monitor_destroy = cgfsng_monitor_destroy; cgfsng_ops->monitor_destroy = cgfsng_monitor_destroy;
cgfsng_ops->monitor_create = cgfsng_monitor_create; cgfsng_ops->monitor_create = cgfsng_monitor_create;
cgfsng_ops->monitor_enter = cgfsng_monitor_enter; cgfsng_ops->monitor_enter = cgfsng_monitor_enter;
cgfsng_ops->monitor_delegate_controllers = cgfsng_monitor_delegate_controllers;
cgfsng_ops->payload_delegate_controllers = cgfsng_payload_delegate_controllers;
cgfsng_ops->payload_create = cgfsng_payload_create; cgfsng_ops->payload_create = cgfsng_payload_create;
cgfsng_ops->payload_enter = cgfsng_payload_enter; cgfsng_ops->payload_enter = cgfsng_payload_enter;
cgfsng_ops->escape = cgfsng_escape; cgfsng_ops->escape = cgfsng_escape;
...@@ -3172,6 +3192,7 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) ...@@ -3172,6 +3192,7 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
cgfsng_ops->set = cgfsng_set; cgfsng_ops->set = cgfsng_set;
cgfsng_ops->freeze = cgfsng_freeze; cgfsng_ops->freeze = cgfsng_freeze;
cgfsng_ops->unfreeze = cgfsng_unfreeze; cgfsng_ops->unfreeze = cgfsng_unfreeze;
cgfsng_ops->setup_limits_legacy = cgfsng_setup_limits_legacy;
cgfsng_ops->setup_limits = cgfsng_setup_limits; cgfsng_ops->setup_limits = cgfsng_setup_limits;
cgfsng_ops->driver = "cgfsng"; cgfsng_ops->driver = "cgfsng";
cgfsng_ops->version = "1.0.0"; cgfsng_ops->version = "1.0.0";
......
...@@ -65,6 +65,7 @@ void cgroup_exit(struct cgroup_ops *ops) ...@@ -65,6 +65,7 @@ void cgroup_exit(struct cgroup_ops *ops)
free(ops->cgroup_pattern); free(ops->cgroup_pattern);
free(ops->container_cgroup); free(ops->container_cgroup);
free(ops->monitor_cgroup);
if (ops->cgroup2_devices) if (ops->cgroup2_devices)
bpf_program_free(ops->cgroup2_devices); bpf_program_free(ops->cgroup2_devices);
......
...@@ -88,6 +88,7 @@ struct cgroup_ops { ...@@ -88,6 +88,7 @@ struct cgroup_ops {
char **cgroup_use; char **cgroup_use;
char *cgroup_pattern; char *cgroup_pattern;
char *container_cgroup; char *container_cgroup;
char *monitor_cgroup;
/* Static memory, do not free.*/ /* Static memory, do not free.*/
const char *monitor_pattern; const char *monitor_pattern;
...@@ -135,9 +136,9 @@ struct cgroup_ops { ...@@ -135,9 +136,9 @@ struct cgroup_ops {
void (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); void (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler); bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
bool (*monitor_enter)(struct cgroup_ops *ops, pid_t pid); bool (*monitor_enter)(struct cgroup_ops *ops, struct lxc_handler *handler);
bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler); bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
bool (*payload_enter)(struct cgroup_ops *ops, pid_t pid); bool (*payload_enter)(struct cgroup_ops *ops, struct lxc_handler *handler);
const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller); const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller);
bool (*escape)(const struct cgroup_ops *ops, struct lxc_conf *conf); bool (*escape)(const struct cgroup_ops *ops, struct lxc_conf *conf);
int (*num_hierarchies)(struct cgroup_ops *ops); int (*num_hierarchies)(struct cgroup_ops *ops);
...@@ -148,8 +149,9 @@ struct cgroup_ops { ...@@ -148,8 +149,9 @@ struct cgroup_ops {
size_t len, const char *name, const char *lxcpath); size_t len, const char *name, const char *lxcpath);
int (*freeze)(struct cgroup_ops *ops, int timeout); int (*freeze)(struct cgroup_ops *ops, int timeout);
int (*unfreeze)(struct cgroup_ops *ops, int timeout); int (*unfreeze)(struct cgroup_ops *ops, int timeout);
bool (*setup_limits)(struct cgroup_ops *ops, struct lxc_conf *conf, bool (*setup_limits_legacy)(struct cgroup_ops *ops,
bool with_devices); struct lxc_conf *conf, bool with_devices);
bool (*setup_limits)(struct cgroup_ops *ops, struct lxc_handler *handler);
bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf); bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf);
bool (*attach)(struct cgroup_ops *ops, const char *name, bool (*attach)(struct cgroup_ops *ops, const char *name,
const char *lxcpath, pid_t pid); const char *lxcpath, pid_t pid);
...@@ -158,6 +160,8 @@ struct cgroup_ops { ...@@ -158,6 +160,8 @@ struct cgroup_ops {
int (*nrtasks)(struct cgroup_ops *ops); int (*nrtasks)(struct cgroup_ops *ops);
bool (*devices_activate)(struct cgroup_ops *ops, bool (*devices_activate)(struct cgroup_ops *ops,
struct lxc_handler *handler); struct lxc_handler *handler);
bool (*monitor_delegate_controllers)(struct cgroup_ops *ops);
bool (*payload_delegate_controllers)(struct cgroup_ops *ops);
}; };
extern struct cgroup_ops *cgroup_init(struct lxc_conf *conf); extern struct cgroup_ops *cgroup_init(struct lxc_conf *conf);
......
...@@ -824,6 +824,15 @@ static bool wait_on_daemonized_start(struct lxc_handler *handler, int pid) ...@@ -824,6 +824,15 @@ static bool wait_on_daemonized_start(struct lxc_handler *handler, int pid)
{ {
int ret, state; int ret, state;
/* The first child is going to fork() again and then exits. So we reap
* the first child here.
*/
ret = wait_for_pid(pid);
if (ret < 0)
DEBUG("Failed waiting on first child %d", pid);
else
DEBUG("First child %d exited", pid);
/* Close write end of the socket pair. */ /* Close write end of the socket pair. */
close(handler->state_socket_pair[1]); close(handler->state_socket_pair[1]);
handler->state_socket_pair[1] = -1; handler->state_socket_pair[1] = -1;
...@@ -834,15 +843,6 @@ static bool wait_on_daemonized_start(struct lxc_handler *handler, int pid) ...@@ -834,15 +843,6 @@ static bool wait_on_daemonized_start(struct lxc_handler *handler, int pid)
close(handler->state_socket_pair[0]); close(handler->state_socket_pair[0]);
handler->state_socket_pair[0] = -1; handler->state_socket_pair[0] = -1;
/* The first child is going to fork() again and then exits. So we reap
* the first child here.
*/
ret = wait_for_pid(pid);
if (ret < 0)
DEBUG("Failed waiting on first child %d", pid);
else
DEBUG("First child %d exited", pid);
if (state < 0) { if (state < 0) {
SYSERROR("Failed to receive the container state"); SYSERROR("Failed to receive the container state");
return false; return false;
...@@ -935,17 +935,17 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a ...@@ -935,17 +935,17 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
if (c->daemonize) { if (c->daemonize) {
bool started; bool started;
char title[2048]; char title[2048];
pid_t pid; pid_t pid_first, pid_second;
pid = fork(); pid_first = fork();
if (pid < 0) { if (pid_first < 0) {
free_init_cmd(init_cmd); free_init_cmd(init_cmd);
lxc_free_handler(handler); lxc_free_handler(handler);
return false; return false;
} }
/* first parent */ /* first parent */
if (pid != 0) { if (pid_first != 0) {
/* Set to NULL because we don't want father unlink /* Set to NULL because we don't want father unlink
* the PID file, child will do the free and unlink. * the PID file, child will do the free and unlink.
*/ */
...@@ -954,7 +954,7 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a ...@@ -954,7 +954,7 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
/* Wait for container to tell us whether it started /* Wait for container to tell us whether it started
* successfully. * successfully.
*/ */
started = wait_on_daemonized_start(handler, pid); started = wait_on_daemonized_start(handler, pid_first);
free_init_cmd(init_cmd); free_init_cmd(init_cmd);
lxc_free_handler(handler); lxc_free_handler(handler);
...@@ -980,14 +980,14 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a ...@@ -980,14 +980,14 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
* POSIX's daemon() function we change to "/" and redirect * POSIX's daemon() function we change to "/" and redirect
* std{in,out,err} to /dev/null. * std{in,out,err} to /dev/null.
*/ */
pid = fork(); pid_second = fork();
if (pid < 0) { if (pid_second < 0) {
SYSERROR("Failed to fork first child process"); SYSERROR("Failed to fork first child process");
_exit(EXIT_FAILURE); _exit(EXIT_FAILURE);
} }
/* second parent */ /* second parent */
if (pid != 0) { if (pid_second != 0) {
free_init_cmd(init_cmd); free_init_cmd(init_cmd);
lxc_free_handler(handler); lxc_free_handler(handler);
_exit(EXIT_SUCCESS); _exit(EXIT_SUCCESS);
......
...@@ -448,6 +448,12 @@ enum { ...@@ -448,6 +448,12 @@ enum {
-1; \ -1; \
}) })
#define ret_set_errno(__ret__, __errno__) \
({ \
errno = __errno__; \
__ret__; \
})
#define free_replace_move_ptr(a, b) \ #define free_replace_move_ptr(a, b) \
({ \ ({ \
free(a); \ free(a); \
......
...@@ -737,6 +737,10 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf, ...@@ -737,6 +737,10 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf,
handler->nsfd[i] = -1; handler->nsfd[i] = -1;
handler->name = name; handler->name = name;
if (daemonize)
handler->transient_pid = lxc_raw_getpid();
else
handler->transient_pid = -1;
if (daemonize && handler->conf->reboot == REBOOT_NONE) { if (daemonize && handler->conf->reboot == REBOOT_NONE) {
/* Create socketpair() to synchronize on daemonized startup. /* Create socketpair() to synchronize on daemonized startup.
...@@ -912,7 +916,7 @@ int lxc_init(const char *name, struct lxc_handler *handler) ...@@ -912,7 +916,7 @@ int lxc_init(const char *name, struct lxc_handler *handler)
ret = lsm_process_prepare(conf, handler->lxcpath); ret = lsm_process_prepare(conf, handler->lxcpath);
if (ret < 0) { if (ret < 0) {
ERROR("Failed to initialize LSM"); ERROR("Failed to initialize LSM");
goto out_destroy_cgroups; goto out_delete_terminal;
} }
TRACE("Initialized LSM"); TRACE("Initialized LSM");
...@@ -920,10 +924,6 @@ int lxc_init(const char *name, struct lxc_handler *handler) ...@@ -920,10 +924,6 @@ int lxc_init(const char *name, struct lxc_handler *handler)
handler->monitor_status_fd = move_fd(status_fd); handler->monitor_status_fd = move_fd(status_fd);
return 0; return 0;
out_destroy_cgroups:
handler->cgroup_ops->payload_destroy(handler->cgroup_ops, handler);
handler->cgroup_ops->monitor_destroy(handler->cgroup_ops, handler);
out_delete_terminal: out_delete_terminal:
lxc_terminal_delete(&handler->conf->console); lxc_terminal_delete(&handler->conf->console);
...@@ -1016,8 +1016,10 @@ void lxc_fini(const char *name, struct lxc_handler *handler) ...@@ -1016,8 +1016,10 @@ void lxc_fini(const char *name, struct lxc_handler *handler)
lsm_process_cleanup(handler->conf, handler->lxcpath); lsm_process_cleanup(handler->conf, handler->lxcpath);
cgroup_ops->payload_destroy(cgroup_ops, handler); if (cgroup_ops) {
cgroup_ops->monitor_destroy(cgroup_ops, handler); cgroup_ops->payload_destroy(cgroup_ops, handler);
cgroup_ops->monitor_destroy(cgroup_ops, handler);
}
if (handler->conf->reboot == REBOOT_NONE) { if (handler->conf->reboot == REBOOT_NONE) {
/* For all new state clients simply close the command socket. /* For all new state clients simply close the command socket.
...@@ -1813,14 +1815,24 @@ static int lxc_spawn(struct lxc_handler *handler) ...@@ -1813,14 +1815,24 @@ static int lxc_spawn(struct lxc_handler *handler)
if (ret < 0) if (ret < 0)
goto out_delete_net; goto out_delete_net;
if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, false)) { if (!cgroup_ops->setup_limits_legacy(cgroup_ops, handler->conf, false)) {
ERROR("Failed to setup cgroup limits for container \"%s\"", name); ERROR("Failed to setup cgroup limits for container \"%s\"", name);
goto out_delete_net; goto out_delete_net;
} }
if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid)) if (!cgroup_ops->payload_enter(cgroup_ops, handler))
goto out_delete_net; goto out_delete_net;
if (!cgroup_ops->payload_delegate_controllers(cgroup_ops)) {
ERROR("Failed to delegate controllers to payload cgroup");
goto out_delete_net;
}
if (!cgroup_ops->setup_limits(cgroup_ops, handler)) {
ERROR("Failed to setup cgroup limits for container \"%s\"", name);
goto out_delete_net;
}
if (!cgroup_ops->chown(cgroup_ops, handler->conf)) if (!cgroup_ops->chown(cgroup_ops, handler->conf))
goto out_delete_net; goto out_delete_net;
...@@ -1883,7 +1895,7 @@ static int lxc_spawn(struct lxc_handler *handler) ...@@ -1883,7 +1895,7 @@ static int lxc_spawn(struct lxc_handler *handler)
if (ret < 0) if (ret < 0)
goto out_delete_net; goto out_delete_net;
if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, true)) { if (!cgroup_ops->setup_limits_legacy(cgroup_ops, handler->conf, true)) {
ERROR("Failed to setup legacy device cgroup controller limits"); ERROR("Failed to setup legacy device cgroup controller limits");
goto out_delete_net; goto out_delete_net;
} }
...@@ -2015,12 +2027,18 @@ int __lxc_start(const char *name, struct lxc_handler *handler, ...@@ -2015,12 +2027,18 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
goto out_fini_nonet; goto out_fini_nonet;
} }
if (!cgroup_ops->monitor_enter(cgroup_ops, handler->monitor_pid)) { if (!cgroup_ops->monitor_enter(cgroup_ops, handler)) {
ERROR("Failed to enter monitor cgroup"); ERROR("Failed to enter monitor cgroup");
ret = -1; ret = -1;
goto out_fini_nonet; goto out_fini_nonet;
} }
if (!cgroup_ops->monitor_delegate_controllers(cgroup_ops)) {
ERROR("Failed to delegate controllers to monitor cgroup");
ret = -1;
goto out_fini_nonet;
}
if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) { if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
/* If the backing store is a device, mount it here and now. */ /* If the backing store is a device, mount it here and now. */
if (rootfs_is_blockdev(conf)) { if (rootfs_is_blockdev(conf)) {
......
...@@ -89,6 +89,9 @@ struct lxc_handler { ...@@ -89,6 +89,9 @@ struct lxc_handler {
*/ */
int proc_pidfd; int proc_pidfd;
/* The grandfather's pid when double-forking. */
pid_t transient_pid;
/* The monitor's pid. */ /* The monitor's pid. */
pid_t monitor_pid; pid_t monitor_pid;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment