Unverified Commit a3f5fbb3 by Serge Hallyn Committed by GitHub

Merge pull request #2067 from brauner/2018-01-03/allow_fully_unprivileged_containers

conf: write "deny" to /proc/[pid]/setgroups
parents 4f5e5b78 bd8ef4e4
......@@ -1206,7 +1206,7 @@ out_free:
return NULL;
}
static int cgroup_rmdir(char *dirname)
static int recursive_destroy(char *dirname)
{
int ret;
struct dirent *direntp;
......@@ -1241,32 +1241,57 @@ static int cgroup_rmdir(char *dirname)
if (!S_ISDIR(mystat.st_mode))
goto next;
ret = cgroup_rmdir(pathname);
ret = recursive_destroy(pathname);
if (ret < 0)
r = -1;
next:
next:
free(pathname);
}
ret = rmdir(dirname);
if (ret < 0) {
if (!r)
WARN("Failed to delete \"%s\": %s", dirname,
strerror(errno));
WARN("%s - Failed to delete \"%s\"", strerror(errno),
dirname);
r = -1;
}
ret = closedir(dir);
if (ret < 0) {
if (!r)
WARN("Failed to delete \"%s\": %s", dirname,
strerror(errno));
WARN("%s - Failed to delete \"%s\"", strerror(errno),
dirname);
r = -1;
}
return r;
}
static int cgroup_rmdir(char *container_cgroup)
{
int i;
if (!container_cgroup || !hierarchies)
return 0;
for (i = 0; hierarchies[i]; i++) {
int ret;
struct hierarchy *h = hierarchies[i];
if (!h->fullcgpath)
continue;
ret = recursive_destroy(h->fullcgpath);
if (ret < 0)
WARN("Failed to destroy \"%s\"", h->fullcgpath);
free(h->fullcgpath);
h->fullcgpath = NULL;
}
return 0;
}
struct generic_userns_exec_data {
struct cgfsng_handler_data *d;
struct lxc_conf *conf;
......@@ -1274,7 +1299,7 @@ struct generic_userns_exec_data {
char *path;
};
static int rmdir_wrapper(void *data)
static int cgroup_rmdir_wrapper(void *data)
{
struct generic_userns_exec_data *arg = data;
uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
......@@ -1284,48 +1309,33 @@ static int rmdir_wrapper(void *data)
SYSERROR("Failed to setgid to 0");
if (setresuid(nsuid, nsuid, nsuid) < 0)
SYSERROR("Failed to setuid to 0");
if (setgroups(0, NULL) < 0)
if (setgroups(0, NULL) < 0 && errno != EPERM)
SYSERROR("Failed to clear groups");
return cgroup_rmdir(arg->path);
return cgroup_rmdir(arg->d->container_cgroup);
}
void recursive_destroy(char *path, struct lxc_conf *conf)
static void cgfsng_destroy(void *hdata, struct lxc_conf *conf)
{
int r;
int ret;
struct cgfsng_handler_data *d = hdata;
struct generic_userns_exec_data wrap;
if (!d)
return;
wrap.origuid = 0;
wrap.d = NULL;
wrap.path = path;
wrap.d = hdata;
wrap.conf = conf;
if (conf && !lxc_list_empty(&conf->id_map))
r = userns_exec_1(conf, rmdir_wrapper, &wrap, "rmdir_wrapper");
ret = userns_exec_1(conf, cgroup_rmdir_wrapper, &wrap,
"cgroup_rmdir_wrapper");
else
r = cgroup_rmdir(path);
if (r < 0)
ERROR("Error destroying %s", path);
}
static void cgfsng_destroy(void *hdata, struct lxc_conf *conf)
{
struct cgfsng_handler_data *d = hdata;
if (!d)
ret = cgroup_rmdir(d->container_cgroup);
if (ret < 0) {
WARN("Failed to destroy cgroups");
return;
if (d->container_cgroup && hierarchies) {
int i;
for (i = 0; hierarchies[i]; i++) {
struct hierarchy *h = hierarchies[i];
if (h->fullcgpath) {
recursive_destroy(h->fullcgpath, conf);
free(h->fullcgpath);
h->fullcgpath = NULL;
}
}
}
free_handler_data(d);
......@@ -1481,7 +1491,7 @@ static int chown_cgroup_wrapper(void *data)
SYSERROR("Failed to setgid to 0");
if (setresuid(nsuid, nsuid, nsuid) < 0)
SYSERROR("Failed to setuid to 0");
if (setgroups(0, NULL) < 0)
if (setgroups(0, NULL) < 0 && errno != EPERM)
SYSERROR("Failed to clear groups");
destuid = get_ns_uid(arg->origuid);
......
......@@ -1423,6 +1423,15 @@ static struct id_map *find_mapped_nsid_entry(struct lxc_conf *conf, unsigned id,
struct id_map *map;
struct id_map *retmap = NULL;
/* Shortcut for container's root mappings. */
if (id == 0) {
if (idtype == ID_TYPE_UID)
return conf->root_nsuid_map;
if (idtype == ID_TYPE_GID)
return conf->root_nsgid_map;
}
lxc_list_for_each(it, &conf->id_map) {
map = it->elem;
if (map->idtype != idtype)
......@@ -2627,28 +2636,54 @@ struct lxc_conf *lxc_conf_init(void)
}
int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
size_t buf_size)
size_t buf_size)
{
char path[MAXPATHLEN];
int fd, ret;
if (geteuid() != 0 && idtype == ID_TYPE_GID) {
size_t buflen;
ret = snprintf(path, MAXPATHLEN, "/proc/%d/setgroups", pid);
if (ret < 0 || ret >= MAXPATHLEN) {
ERROR("Failed to create string");
return -E2BIG;
}
fd = open(path, O_WRONLY);
if (fd < 0 && errno != ENOENT) {
SYSERROR("Failed to open \"%s\"", path);
return -1;
}
buflen = sizeof("deny\n") - 1;
errno = 0;
ret = lxc_write_nointr(fd, "deny\n", buflen);
if (ret != buflen) {
SYSERROR("Failed to write \"deny\" to \"/proc/%d/setgroups\"", pid);
close(fd);
return -1;
}
close(fd);
}
ret = snprintf(path, MAXPATHLEN, "/proc/%d/%cid_map", pid,
idtype == ID_TYPE_UID ? 'u' : 'g');
if (ret < 0 || ret >= MAXPATHLEN) {
ERROR("failed to create path \"%s\"", path);
ERROR("Failed to create string");
return -E2BIG;
}
fd = open(path, O_WRONLY);
if (fd < 0) {
SYSERROR("failed to open \"%s\"", path);
SYSERROR("Failed to open \"%s\"", path);
return -1;
}
errno = 0;
ret = lxc_write_nointr(fd, buf, buf_size);
if (ret != buf_size) {
SYSERROR("failed to write %cid mapping to \"%s\"",
SYSERROR("Failed to write %cid mapping to \"%s\"",
idtype == ID_TYPE_UID ? 'u' : 'g', path);
close(fd);
return -1;
......@@ -3445,7 +3480,8 @@ int lxc_clear_config_caps(struct lxc_conf *c)
return 0;
}
static int lxc_free_idmap(struct lxc_list *id_map) {
static int lxc_free_idmap(struct lxc_list *id_map)
{
struct lxc_list *it, *next;
lxc_list_for_each_safe(it, id_map, next) {
......@@ -3453,6 +3489,7 @@ static int lxc_free_idmap(struct lxc_list *id_map) {
free(it->elem);
free(it);
}
return 0;
}
......@@ -3829,86 +3866,46 @@ static struct id_map *mapped_hostid_add(struct lxc_conf *conf, uid_t id, enum id
return entry;
}
/* Run a function in a new user namespace.
* The caller's euid/egid will be mapped if it is not already.
* Afaict, userns_exec_1() is only used to operate based on privileges for the
* user's own {g,u}id on the host and for the container root's unmapped {g,u}id.
* This means we require only to establish a mapping from:
* - the container root {g,u}id as seen from the host > user's host {g,u}id
* - the container root -> some sub{g,u}id
* The former we add, if the user did not specifiy a mapping. The latter we
* retrieve from the ontainer's configured {g,u}id mappings as it must have been
* there to start the container in the first place.
*/
int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data,
const char *fn_name)
struct lxc_list *get_minimal_idmap(struct lxc_conf *conf)
{
pid_t pid;
uid_t euid, egid;
struct userns_fn_data d;
int p[2];
struct lxc_list *it;
struct id_map *map;
char c = '1';
int ret = -1, status = -1;
uid_t nsuid = (conf->root_nsuid_map != NULL) ? 0 : conf->init_uid;
gid_t nsgid = (conf->root_nsgid_map != NULL) ? 0 : conf->init_gid;
struct lxc_list *idmap = NULL, *tmplist = NULL;
struct id_map *container_root_uid = NULL, *container_root_gid = NULL,
*host_uid_map = NULL, *host_gid_map = NULL;
ret = pipe(p);
if (ret < 0) {
SYSERROR("opening pipe");
return -1;
}
d.fn = fn;
d.fn_name = fn_name;
d.arg = data;
d.p[0] = p[0];
d.p[1] = p[1];
/* Clone child in new user namespace. */
pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
if (pid < 0) {
ERROR("failed to clone child process in new user namespace");
goto on_error;
}
close(p[0]);
p[0] = -1;
/* Find container root mappings. */
euid = geteuid();
container_root_uid = mapped_nsid_add(conf, nsuid, ID_TYPE_UID);
if (!container_root_uid) {
DEBUG("Failed to find mapping for container root uid %d", 0);
DEBUG("Failed to find mapping for namespace uid %d", 0);
goto on_error;
}
if (euid >= container_root_uid->hostid && euid < container_root_uid->hostid + container_root_uid->range)
euid = geteuid();
if (euid >= container_root_uid->hostid &&
euid < (container_root_uid->hostid + container_root_uid->range))
host_uid_map = container_root_uid;
egid = getegid();
container_root_gid = mapped_nsid_add(conf, nsgid, ID_TYPE_GID);
if (!container_root_gid) {
DEBUG("Failed to find mapping for container root gid %d", 0);
DEBUG("Failed to find mapping for namespace gid %d", 0);
goto on_error;
}
if (egid >= container_root_gid->hostid && egid < container_root_gid->hostid + container_root_gid->range)
egid = getegid();
if (egid >= container_root_gid->hostid &&
egid < (container_root_gid->hostid + container_root_gid->range))
host_gid_map = container_root_gid;
/* Check whether the {g,u}id of the user has a mapping. */
if (!host_uid_map)
host_uid_map = mapped_hostid_add(conf, euid, ID_TYPE_UID);
if (!host_gid_map)
host_gid_map = mapped_hostid_add(conf, egid, ID_TYPE_GID);
if (!host_uid_map) {
DEBUG("Failed to find mapping for uid %d", euid);
goto on_error;
}
if (!host_gid_map)
host_gid_map = mapped_hostid_add(conf, egid, ID_TYPE_GID);
if (!host_gid_map) {
DEBUG("Failed to find mapping for gid %d", egid);
goto on_error;
......@@ -3964,29 +3961,95 @@ int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data,
/* idmap will now keep track of that memory. */
host_gid_map = NULL;
TRACE("Allocated minimal idmapping");
return idmap;
on_error:
if (idmap)
lxc_free_idmap(idmap);
if (container_root_uid)
free(container_root_uid);
if (container_root_gid)
free(container_root_gid);
if (host_uid_map && (host_uid_map != container_root_uid))
free(host_uid_map);
if (host_gid_map && (host_gid_map != container_root_gid))
free(host_gid_map);
return NULL;
}
/* Run a function in a new user namespace.
* The caller's euid/egid will be mapped if it is not already.
* Afaict, userns_exec_1() is only used to operate based on privileges for the
* user's own {g,u}id on the host and for the container root's unmapped {g,u}id.
* This means we require only to establish a mapping from:
* - the container root {g,u}id as seen from the host > user's host {g,u}id
* - the container root -> some sub{g,u}id
* The former we add, if the user did not specifiy a mapping. The latter we
* retrieve from the ontainer's configured {g,u}id mappings as it must have been
* there to start the container in the first place.
*/
int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data,
const char *fn_name)
{
pid_t pid;
struct userns_fn_data d;
int p[2];
char c = '1';
int ret = -1, status = -1;
struct lxc_list *idmap;
idmap = get_minimal_idmap(conf);
if (!idmap)
return -1;
ret = pipe(p);
if (ret < 0) {
SYSERROR("Failed to create pipe");
return -1;
}
d.fn = fn;
d.fn_name = fn_name;
d.arg = data;
d.p[0] = p[0];
d.p[1] = p[1];
/* Clone child in new user namespace. */
pid = lxc_raw_clone_cb(run_userns_fn, &d, CLONE_NEWUSER);
if (pid < 0) {
ERROR("failed to clone child process in new user namespace");
goto on_error;
}
close(p[0]);
p[0] = -1;
if (lxc_log_get_level() == LXC_LOG_LEVEL_TRACE ||
conf->loglevel == LXC_LOG_LEVEL_TRACE) {
struct lxc_list *it;
struct id_map *map;
lxc_list_for_each(it, idmap) {
map = it->elem;
TRACE("establishing %cid mapping for \"%d\" in new "
TRACE("Establishing %cid mapping for \"%d\" in new "
"user namespace: nsuid %lu - hostid %lu - range "
"%lu",
(map->idtype == ID_TYPE_UID) ? 'u' : 'g', pid,
map->nsid, map->hostid, map->range);
"%lu", (map->idtype == ID_TYPE_UID) ? 'u' : 'g',
pid, map->nsid, map->hostid, map->range);
}
}
/* Set up {g,u}id mapping for user namespace of child process. */
ret = lxc_map_ids(idmap, pid);
if (ret < 0) {
ERROR("error setting up {g,u}id mappings for child process "
ERROR("Error setting up {g,u}id mappings for child process "
"\"%d\"", pid);
goto on_error;
}
/* Tell child to proceed. */
if (write(p[1], &c, 1) != 1) {
SYSERROR("failed telling child process \"%d\" to proceed", pid);
SYSERROR("Failed telling child process \"%d\" to proceed", pid);
goto on_error;
}
......@@ -3995,17 +4058,6 @@ on_error:
if (pid > 0)
status = wait_for_pid(pid);
if (idmap)
lxc_free_idmap(idmap);
if (container_root_uid)
free(container_root_uid);
if (container_root_gid)
free(container_root_gid);
if (host_uid_map && (host_uid_map != container_root_uid))
free(host_uid_map);
if (host_gid_map && (host_gid_map != container_root_gid))
free(host_gid_map);
if (p[0] != -1)
close(p[0]);
close(p[1]);
......
......@@ -284,9 +284,11 @@ struct lxc_conf {
struct lxc_list cgroup;
struct {
struct lxc_list id_map;
/* Pointer to the idmap entry for the container's root uid in
* the id_map list. Do not free! */
struct id_map *root_nsuid_map;
/* Pointer to the idmap entry for the container's root gid in
* the id_map list. Do not free! */
struct id_map *root_nsgid_map;
......@@ -410,7 +412,7 @@ struct lxc_conf {
struct lxc_list procs;
};
int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
extern int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
size_t buf_size);
#ifdef HAVE_TLS
......
......@@ -1014,7 +1014,7 @@ static int do_start(void *data)
* user namespace.
*/
ret = lxc_setgroups(0, NULL);
if (ret < 0)
if (ret < 0 && (handler->am_root || errno != EPERM))
goto out_warn_father;
if (!handler->am_root) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment