Commit f0ab9713 by Stéphane Graber Committed by GitHub

Merge pull request #1781 from brauner/stable-2.0

stable 2.0: cherry-picks + delta reduction between master and stable 2.0
parents d3e7b8ad 567d7611
...@@ -2515,14 +2515,19 @@ int lxc_map_ids(struct lxc_list *idmap, pid_t pid) ...@@ -2515,14 +2515,19 @@ int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
lxc_map_ids_exec_wrapper, lxc_map_ids_exec_wrapper,
(void *)mapbuf); (void *)mapbuf);
if (ret < 0) { if (ret < 0) {
ERROR("new%cidmap failed to write mapping: %s", ERROR("new%cidmap failed to write mapping \"%s\": %s",
u_or_g, cmd_output); u_or_g, cmd_output, mapbuf);
return -1; return -1;
} }
TRACE("new%cidmap wrote mapping \"%s\"", u_or_g, mapbuf);
} else { } else {
ret = write_id_mapping(type, pid, mapbuf, pos - mapbuf); ret = write_id_mapping(type, pid, mapbuf, pos - mapbuf);
if (ret < 0) if (ret < 0) {
ERROR("Failed to write mapping \"%s\": %s",
cmd_output, mapbuf);
return -1; return -1;
}
TRACE("Wrote mapping \"%s\"", mapbuf);
} }
memset(mapbuf, 0, sizeof(mapbuf)); memset(mapbuf, 0, sizeof(mapbuf));
...@@ -3006,41 +3011,36 @@ static bool verify_start_hooks(struct lxc_conf *conf) ...@@ -3006,41 +3011,36 @@ static bool verify_start_hooks(struct lxc_conf *conf)
static int lxc_send_ttys_to_parent(struct lxc_handler *handler) static int lxc_send_ttys_to_parent(struct lxc_handler *handler)
{ {
int i; int i;
int *ttyfds;
struct lxc_pty_info *pty_info;
struct lxc_conf *conf = handler->conf; struct lxc_conf *conf = handler->conf;
const struct lxc_tty_info *tty_info = &conf->tty_info; struct lxc_tty_info *tty_info = &conf->tty_info;
int sock = handler->ttysock[0]; int sock = handler->data_sock[0];
int ret = -1; int ret = -1;
size_t num_ttyfds = (2 * conf->tty);
ttyfds = malloc(num_ttyfds * sizeof(int)); if (!conf->tty)
if (!ttyfds) return 0;
return -1;
for (i = 0; i < num_ttyfds; i++) { for (i = 0; i < conf->tty; i++) {
pty_info = &tty_info->pty_info[i / 2]; int ttyfds[2];
ttyfds[i++] = pty_info->slave; struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
ttyfds[i] = pty_info->master;
TRACE("send pty \"%s\" with master fd %d and slave fd %d to " ttyfds[0] = pty_info->master;
"parent", ttyfds[1] = pty_info->slave;
pty_info->name, pty_info->master, pty_info->slave);
ret = lxc_abstract_unix_send_fds(sock, ttyfds, 2, NULL, 0);
if (ret < 0)
break;
TRACE("Send pty \"%s\" with master fd %d and slave fd %d to "
"parent", pty_info->name, pty_info->master, pty_info->slave);
} }
ret = lxc_abstract_unix_send_fds(sock, ttyfds, num_ttyfds, NULL, 0);
if (ret < 0) if (ret < 0)
ERROR("failed to send %d ttys to parent: %s", conf->tty, ERROR("Failed to send %d ttys to parent: %s", conf->tty,
strerror(errno)); strerror(errno));
else else
TRACE("sent %d ttys to parent", conf->tty); TRACE("Sent %d ttys to parent", conf->tty);
close(handler->ttysock[0]);
close(handler->ttysock[1]);
for (i = 0; i < num_ttyfds; i++)
close(ttyfds[i]);
free(ttyfds); lxc_delete_tty(tty_info);
return ret; return ret;
} }
...@@ -3068,6 +3068,11 @@ int lxc_setup(struct lxc_handler *handler) ...@@ -3068,6 +3068,11 @@ int lxc_setup(struct lxc_handler *handler)
return -1; return -1;
} }
if (lxc_network_send_name_and_ifindex_to_parent(handler) < 0) {
ERROR("Failed to network device names and ifindices to parent");
return -1;
}
if (lxc_conf->autodev > 0) { if (lxc_conf->autodev > 0) {
if (mount_autodev(name, &lxc_conf->rootfs, lxcpath)) { if (mount_autodev(name, &lxc_conf->rootfs, lxcpath)) {
ERROR("failed to mount /dev in the container"); ERROR("failed to mount /dev in the container");
...@@ -3243,10 +3248,6 @@ static void lxc_remove_nic(struct lxc_list *it) ...@@ -3243,10 +3248,6 @@ static void lxc_remove_nic(struct lxc_list *it)
lxc_list_del(it); lxc_list_del(it);
free(netdev->link);
free(netdev->name);
if (netdev->type == LXC_NET_VETH)
free(netdev->priv.veth_attr.pair);
free(netdev->upscript); free(netdev->upscript);
free(netdev->downscript); free(netdev->downscript);
free(netdev->hwaddr); free(netdev->hwaddr);
...@@ -3469,17 +3470,6 @@ int lxc_clear_hooks(struct lxc_conf *c, const char *key) ...@@ -3469,17 +3470,6 @@ int lxc_clear_hooks(struct lxc_conf *c, const char *key)
return 0; return 0;
} }
static void lxc_clear_saved_nics(struct lxc_conf *conf)
{
int i;
if (!conf->saved_nics)
return;
for (i=0; i < conf->num_savednics; i++)
free(conf->saved_nics[i].orig_name);
free(conf->saved_nics);
}
static inline void lxc_clear_aliens(struct lxc_conf *conf) static inline void lxc_clear_aliens(struct lxc_conf *conf)
{ {
struct lxc_list *it,*next; struct lxc_list *it,*next;
...@@ -3533,7 +3523,6 @@ void lxc_conf_free(struct lxc_conf *conf) ...@@ -3533,7 +3523,6 @@ void lxc_conf_free(struct lxc_conf *conf)
lxc_clear_cgroups(conf, "lxc.cgroup"); lxc_clear_cgroups(conf, "lxc.cgroup");
lxc_clear_hooks(conf, "lxc.hook"); lxc_clear_hooks(conf, "lxc.hook");
lxc_clear_mount_entries(conf); lxc_clear_mount_entries(conf);
lxc_clear_saved_nics(conf);
lxc_clear_idmaps(conf); lxc_clear_idmaps(conf);
lxc_clear_groups(conf); lxc_clear_groups(conf);
lxc_clear_includes(conf); lxc_clear_includes(conf);
......
...@@ -211,8 +211,6 @@ struct lxc_conf { ...@@ -211,8 +211,6 @@ struct lxc_conf {
struct lxc_list cgroup; struct lxc_list cgroup;
struct lxc_list id_map; struct lxc_list id_map;
struct lxc_list network; struct lxc_list network;
struct saved_nic *saved_nics;
int num_savednics;
int auto_mounts; int auto_mounts;
struct lxc_list mount_list; struct lxc_list mount_list;
struct lxc_list caps; struct lxc_list caps;
......
...@@ -764,11 +764,6 @@ static struct lxc_netdev *network_netdev(const char *key, const char *value, ...@@ -764,11 +764,6 @@ static struct lxc_netdev *network_netdev(const char *key, const char *value,
return netdev; return netdev;
} }
static int network_ifname(char **valuep, const char *value)
{
return set_config_string_item_max(valuep, value, IFNAMSIZ);
}
#ifndef MACVLAN_MODE_PRIVATE #ifndef MACVLAN_MODE_PRIVATE
#define MACVLAN_MODE_PRIVATE 1 #define MACVLAN_MODE_PRIVATE 1
#endif #endif
...@@ -870,7 +865,7 @@ static int set_config_network_link(const char *key, const char *value, ...@@ -870,7 +865,7 @@ static int set_config_network_link(const char *key, const char *value,
if (!netdev) if (!netdev)
return -1; return -1;
return network_ifname(&netdev->link, value); return network_ifname(netdev->link, value);
} }
static int set_config_network_name(const char *key, const char *value, static int set_config_network_name(const char *key, const char *value,
...@@ -882,7 +877,7 @@ static int set_config_network_name(const char *key, const char *value, ...@@ -882,7 +877,7 @@ static int set_config_network_name(const char *key, const char *value,
if (!netdev) if (!netdev)
return -1; return -1;
return network_ifname(&netdev->name, value); return network_ifname(netdev->name, value);
} }
static int set_config_network_veth_pair(const char *key, const char *value, static int set_config_network_veth_pair(const char *key, const char *value,
...@@ -899,7 +894,7 @@ static int set_config_network_veth_pair(const char *key, const char *value, ...@@ -899,7 +894,7 @@ static int set_config_network_veth_pair(const char *key, const char *value,
return -1; return -1;
} }
return network_ifname(&netdev->priv.veth_attr.pair, value); return network_ifname(netdev->priv.veth_attr.pair, value);
} }
static int set_config_network_macvlan_mode(const char *key, const char *value, static int set_config_network_macvlan_mode(const char *key, const char *value,
......
...@@ -260,6 +260,15 @@ void lxc_log_configured_netdevs(const struct lxc_conf *conf) ...@@ -260,6 +260,15 @@ void lxc_log_configured_netdevs(const struct lxc_conf *conf)
switch (netdev->type) { switch (netdev->type) {
case LXC_NET_VETH: case LXC_NET_VETH:
TRACE("type: veth"); TRACE("type: veth");
if (netdev->priv.veth_attr.pair)
TRACE("veth pair: %s",
netdev->priv.veth_attr.pair);
if (netdev->priv.veth_attr.veth1[0] != '\0')
TRACE("veth1 : %s",
netdev->priv.veth_attr.veth1);
if (netdev->priv.veth_attr.ifindex > 0)
TRACE("host side ifindex for veth device: %d",
netdev->priv.veth_attr.ifindex);
break; break;
case LXC_NET_MACVLAN: case LXC_NET_MACVLAN:
TRACE("type: macvlan"); TRACE("type: macvlan");
...@@ -269,6 +278,10 @@ void lxc_log_configured_netdevs(const struct lxc_conf *conf) ...@@ -269,6 +278,10 @@ void lxc_log_configured_netdevs(const struct lxc_conf *conf)
break; break;
case LXC_NET_PHYS: case LXC_NET_PHYS:
TRACE("type: phys"); TRACE("type: phys");
if (netdev->priv.phys_attr.ifindex > 0) {
TRACE("host side ifindex for phys device: %d",
netdev->priv.phys_attr.ifindex);
}
break; break;
case LXC_NET_EMPTY: case LXC_NET_EMPTY:
TRACE("type: empty"); TRACE("type: empty");
...@@ -296,3 +309,14 @@ void lxc_log_configured_netdevs(const struct lxc_conf *conf) ...@@ -296,3 +309,14 @@ void lxc_log_configured_netdevs(const struct lxc_conf *conf)
TRACE("downscript: %s", netdev->downscript); TRACE("downscript: %s", netdev->downscript);
} }
} }
int network_ifname(char *valuep, const char *value)
{
if (strlen(value) >= IFNAMSIZ) {
ERROR("Network devie name \"%s\" is too long (>= %zu)", value,
(size_t)IFNAMSIZ);
}
strcpy(valuep, value);
return 0;
}
...@@ -33,5 +33,6 @@ extern struct lxc_netdev *lxc_find_netdev_by_idx(struct lxc_conf *conf, ...@@ -33,5 +33,6 @@ extern struct lxc_netdev *lxc_find_netdev_by_idx(struct lxc_conf *conf,
extern struct lxc_netdev *lxc_get_netdev_by_idx(struct lxc_conf *conf, extern struct lxc_netdev *lxc_get_netdev_by_idx(struct lxc_conf *conf,
unsigned int idx); unsigned int idx);
extern void lxc_log_configured_netdevs(const struct lxc_conf *conf); extern void lxc_log_configured_netdevs(const struct lxc_conf *conf);
extern int network_ifname(char *valuep, const char *value);
#endif /* __LXC_CONFILE_UTILS_H */ #endif /* __LXC_CONFILE_UTILS_H */
...@@ -126,6 +126,47 @@ static int load_tty_major_minor(char *directory, char *output, int len) ...@@ -126,6 +126,47 @@ static int load_tty_major_minor(char *directory, char *output, int len)
return 0; return 0;
} }
static int cmp_version(const char *v1, const char *v2)
{
int ret;
int oct_v1[3], oct_v2[3];
memset(oct_v1, -1, sizeof(oct_v1));
memset(oct_v2, -1, sizeof(oct_v2));
ret = sscanf(v1, "%d.%d.%d", &oct_v1[0], &oct_v1[1], &oct_v1[2]);
if (ret < 1)
return -1;
ret = sscanf(v2, "%d.%d.%d", &oct_v2[0], &oct_v2[1], &oct_v2[2]);
if (ret < 1)
return -1;
/* Major version is greater. */
if (oct_v1[0] > oct_v2[0])
return 1;
if (oct_v1[0] < oct_v2[0])
return -1;
/* Minor number is greater.*/
if (oct_v1[1] > oct_v2[1])
return 1;
if (oct_v1[1] < oct_v2[1])
return -1;
/* Patch number is greater. */
if (oct_v1[2] > oct_v2[2])
return 1;
/* Patch numbers are equal. */
if (oct_v1[2] == oct_v2[2])
return 0;
return -1;
}
static void exec_criu(struct criu_opts *opts) static void exec_criu(struct criu_opts *opts)
{ {
char **argv, log[PATH_MAX]; char **argv, log[PATH_MAX];
...@@ -499,7 +540,7 @@ static void exec_criu(struct criu_opts *opts) ...@@ -499,7 +540,7 @@ static void exec_criu(struct criu_opts *opts)
struct lxc_netdev *n = it->elem; struct lxc_netdev *n = it->elem;
bool external_not_veth; bool external_not_veth;
if (strcmp(opts->criu_version, CRIU_EXTERNAL_NOT_VETH) >= 0) { if (cmp_version(opts->criu_version, CRIU_EXTERNAL_NOT_VETH) >= 0) {
/* Since criu version 2.8 the usage of --veth-pair /* Since criu version 2.8 the usage of --veth-pair
* has been deprecated: * has been deprecated:
* git tag --contains f2037e6d3445fc400 * git tag --contains f2037e6d3445fc400
...@@ -523,7 +564,7 @@ static void exec_criu(struct criu_opts *opts) ...@@ -523,7 +564,7 @@ static void exec_criu(struct criu_opts *opts)
case LXC_NET_VETH: case LXC_NET_VETH:
veth = n->priv.veth_attr.pair; veth = n->priv.veth_attr.pair;
if (n->link) { if (n->link[0] != '\0') {
if (external_not_veth) if (external_not_veth)
fmt = "veth[%s]:%s@%s"; fmt = "veth[%s]:%s@%s";
else else
...@@ -542,7 +583,7 @@ static void exec_criu(struct criu_opts *opts) ...@@ -542,7 +583,7 @@ static void exec_criu(struct criu_opts *opts)
goto err; goto err;
break; break;
case LXC_NET_MACVLAN: case LXC_NET_MACVLAN:
if (!n->link) { if (n->link[0] == '\0') {
ERROR("no host interface for macvlan %s", n->name); ERROR("no host interface for macvlan %s", n->name);
goto err; goto err;
} }
...@@ -764,11 +805,13 @@ static bool restore_net_info(struct lxc_container *c) ...@@ -764,11 +805,13 @@ static bool restore_net_info(struct lxc_container *c)
snprintf(template, sizeof(template), "vethXXXXXX"); snprintf(template, sizeof(template), "vethXXXXXX");
if (!netdev->priv.veth_attr.pair) if (netdev->priv.veth_attr.pair[0] == '\0' &&
netdev->priv.veth_attr.pair = lxc_mkifname(template); netdev->priv.veth_attr.veth1[0] == '\0') {
if (!lxc_mkifname(template))
goto out_unlock;
if (!netdev->priv.veth_attr.pair) strcpy(netdev->priv.veth_attr.veth1, template);
goto out_unlock; }
} }
has_error = false; has_error = false;
......
...@@ -61,8 +61,8 @@ static void usage(char *me, bool fail) ...@@ -61,8 +61,8 @@ static void usage(char *me, bool fail)
{ {
fprintf(stderr, "Usage: %s create {lxcpath} {name} {pid} {type} " fprintf(stderr, "Usage: %s create {lxcpath} {name} {pid} {type} "
"{bridge} {nicname}\n", me); "{bridge} {nicname}\n", me);
fprintf(stderr, "Usage: %s delete {lxcpath} {name} {pid} {type} " fprintf(stderr, "Usage: %s delete {lxcpath} {name} "
"{bridge} {nicname}\n", me); "{/proc/<pid>/ns/net} {type} {bridge} {nicname}\n", me);
fprintf(stderr, "{nicname} is the name to use inside the container\n"); fprintf(stderr, "{nicname} is the name to use inside the container\n");
if (fail) if (fail)
...@@ -78,7 +78,7 @@ static int open_and_lock(char *path) ...@@ -78,7 +78,7 @@ static int open_and_lock(char *path)
fd = open(path, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR); fd = open(path, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR);
if (fd < 0) { if (fd < 0) {
usernic_error("Failed to open %s: %s.\n", path, usernic_error("Failed to open \"%s\": %s\n", path,
strerror(errno)); strerror(errno));
return -1; return -1;
} }
...@@ -145,7 +145,7 @@ static char **get_groupnames(void) ...@@ -145,7 +145,7 @@ static char **get_groupnames(void)
group_ids = malloc(sizeof(gid_t) * ngroups); group_ids = malloc(sizeof(gid_t) * ngroups);
if (!group_ids) { if (!group_ids) {
usernic_error("Failed to allocate memory while getting groups " usernic_error("Failed to allocate memory while getting groups "
"the user belongs to: %s.\n", "the user belongs to: %s\n",
strerror(errno)); strerror(errno));
return NULL; return NULL;
} }
...@@ -153,7 +153,7 @@ static char **get_groupnames(void) ...@@ -153,7 +153,7 @@ static char **get_groupnames(void)
ret = getgroups(ngroups, group_ids); ret = getgroups(ngroups, group_ids);
if (ret < 0) { if (ret < 0) {
free(group_ids); free(group_ids);
usernic_error("Failed to get process groups: %s.\n", usernic_error("Failed to get process groups: %s\n",
strerror(errno)); strerror(errno));
return NULL; return NULL;
} }
...@@ -162,7 +162,7 @@ static char **get_groupnames(void) ...@@ -162,7 +162,7 @@ static char **get_groupnames(void)
if (!groupnames) { if (!groupnames) {
free(group_ids); free(group_ids);
usernic_error("Failed to allocate memory while getting group " usernic_error("Failed to allocate memory while getting group "
"names: %s.\n", "names: %s\n",
strerror(errno)); strerror(errno));
return NULL; return NULL;
} }
...@@ -172,7 +172,7 @@ static char **get_groupnames(void) ...@@ -172,7 +172,7 @@ static char **get_groupnames(void)
for (i = 0; i < ngroups; i++) { for (i = 0; i < ngroups; i++) {
gr = getgrgid(group_ids[i]); gr = getgrgid(group_ids[i]);
if (!gr) { if (!gr) {
usernic_error("Failed to get group name: %s.\n", usernic_error("Failed to get group name: %s\n",
strerror(errno)); strerror(errno));
free(group_ids); free(group_ids);
free_groupnames(groupnames); free_groupnames(groupnames);
...@@ -181,7 +181,7 @@ static char **get_groupnames(void) ...@@ -181,7 +181,7 @@ static char **get_groupnames(void)
groupnames[i] = strdup(gr->gr_name); groupnames[i] = strdup(gr->gr_name);
if (!groupnames[i]) { if (!groupnames[i]) {
usernic_error("Failed to copy group name \"%s\".", usernic_error("Failed to copy group name \"%s\"",
gr->gr_name); gr->gr_name);
free(group_ids); free(group_ids);
free_groupnames(groupnames); free_groupnames(groupnames);
...@@ -357,95 +357,119 @@ static char *get_eow(char *s, char *e) ...@@ -357,95 +357,119 @@ static char *get_eow(char *s, char *e)
return s; return s;
} }
static char *find_line(char *p, char *e, char *u, char *t, char *l) static char *find_line(char *buf_start, char *buf_end, char *name,
char *net_type, char *net_link, char *net_dev,
bool *owner, bool *found, bool *keep)
{ {
char *p1, *p2, *ret; char *end_of_line, *end_of_word, *line;
while ((p < e) && (p1 = get_eol(p, e)) < e) { while (buf_start < buf_end) {
ret = p; size_t len;
if (*p == '#') char netdev_name[IFNAMSIZ];
goto next;
*found = false;
*keep = true;
*owner = false;
while ((p < e) && isblank(*p)) end_of_line = get_eol(buf_start, buf_end);
p++; if (end_of_line >= buf_end)
return NULL;
p2 = get_eow(p, e); line = buf_start;
if (!p2 || ((size_t)(p2 - p)) != strlen(u) || if (*buf_start == '#')
strncmp(p, u, strlen(u)))
goto next; goto next;
p = p2 + 1; while ((buf_start < buf_end) && isblank(*buf_start))
while ((p < e) && isblank(*p)) buf_start++;
p++;
p2 = get_eow(p, e); /* Check whether the line contains the caller's name. */
if (!p2 || ((size_t)(p2 - p)) != strlen(t) || end_of_word = get_eow(buf_start, buf_end);
strncmp(p, t, strlen(t))) /* corrupt db */
goto next; if (!end_of_word)
return NULL;
p = p2 + 1; if (strncmp(buf_start, name, strlen(name)))
while ((p < e) && isblank(*p)) *found = false;
p++;
p2 = get_eow(p, e); *owner = true;
if (!p2 || ((size_t)(p2 - p)) != strlen(l) ||
strncmp(p, l, strlen(l)))
goto next;
return ret; buf_start = end_of_word + 1;
next: while ((buf_start < buf_end) && isblank(*buf_start))
p = p1 + 1; buf_start++;
}
return NULL; /* Check whether line is of the right network type. */
} end_of_word = get_eow(buf_start, buf_end);
/* corrupt db */
if (!end_of_word)
return NULL;
static bool nic_exists(char *nic) if (strncmp(buf_start, net_type, strlen(net_type)))
{ *found = false;
char path[MAXPATHLEN];
int ret;
struct stat sb;
if (!strcmp(nic, "none")) buf_start = end_of_word + 1;
return true; while ((buf_start < buf_end) && isblank(*buf_start))
buf_start++;
ret = snprintf(path, MAXPATHLEN, "/sys/class/net/%s", nic); /* Check whether line is contains the right link. */
if (ret < 0 || ret >= MAXPATHLEN) end_of_word = get_eow(buf_start, buf_end);
return false; /* corrupt db */
if (!end_of_word)
return NULL;
ret = stat(path, &sb); if (strncmp(buf_start, net_link, strlen(net_link)))
if (ret < 0) *found = false;
return false;
return true; buf_start = end_of_word + 1;
} while ((buf_start < buf_end) && isblank(*buf_start))
buf_start++;
static int instantiate_veth(char *n1, char **n2) /* Check whether line contains the right network device. */
{ end_of_word = get_eow(buf_start, buf_end);
int err; /* corrupt db */
if (!end_of_word)
return NULL;
err = snprintf(*n2, IFNAMSIZ, "%sp", n1); len = end_of_word - buf_start;
if (err < 0 || err >= IFNAMSIZ) { /* corrupt db */
usernic_error("%s\n", "Could not create nic name."); if (len >= IFNAMSIZ)
return -1; return NULL;
memcpy(netdev_name, buf_start, len);
netdev_name[len] = '\0';
*keep = lxc_nic_exists(netdev_name);
if (net_dev && !strcmp(netdev_name, net_dev))
*found = true;
return line;
next:
buf_start = end_of_line + 1;
} }
err = lxc_veth_create(n1, *n2); return NULL;
if (err) { }
usernic_error("Failed to create %s-%s : %s.\n", n1, *n2,
strerror(-err)); static int instantiate_veth(char *veth1, char *veth2)
{
int ret;
ret = lxc_veth_create(veth1, veth2);
if (ret < 0) {
usernic_error("Failed to create %s-%s : %s.\n", veth1, veth2,
strerror(-ret));
return -1; return -1;
} }
/* Changing the high byte of the mac address to 0xfe, the bridge /* Changing the high byte of the mac address to 0xfe, the bridge
* interface will always keep the host's mac address and not take the * interface will always keep the host's mac address and not take the
* mac address of a container. */ * mac address of a container. */
err = setup_private_host_hw_addr(n1); ret = setup_private_host_hw_addr(veth1);
if (err) if (ret < 0)
usernic_error("Failed to change mac address of host interface " usernic_error("Failed to change mac address of host interface "
"%s : %s\n", n1, strerror(-err)); "%s : %s\n", veth1, strerror(-ret));
return netdev_set_flag(n1, IFF_UP); return netdev_set_flag(veth1, IFF_UP);
} }
static int get_mtu(char *name) static int get_mtu(char *name)
...@@ -453,31 +477,32 @@ static int get_mtu(char *name) ...@@ -453,31 +477,32 @@ static int get_mtu(char *name)
int idx; int idx;
idx = if_nametoindex(name); idx = if_nametoindex(name);
if (idx < 0)
return -1;
return netdev_get_mtu(idx); return netdev_get_mtu(idx);
} }
static bool create_nic(char *nic, char *br, int pid, char **cnic) static int create_nic(char *nic, char *br, int pid, char **cnic)
{ {
char *veth1buf, *veth2buf; char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
int mtu, ret; int mtu, ret;
veth1buf = alloca(IFNAMSIZ);
veth2buf = alloca(IFNAMSIZ);
if (!veth1buf || !veth2buf) {
usernic_error("Failed allocate memory: %s.\n", strerror(errno));
return false;
}
ret = snprintf(veth1buf, IFNAMSIZ, "%s", nic); ret = snprintf(veth1buf, IFNAMSIZ, "%s", nic);
if (ret < 0 || ret >= IFNAMSIZ) { if (ret < 0 || ret >= IFNAMSIZ) {
usernic_error("%s", "Could not create nic name.\n"); usernic_error("%s", "Could not create nic name\n");
return false; return -1;
} }
ret = snprintf(veth2buf, IFNAMSIZ, "%sp", veth1buf);
if (ret < 0 || ret >= IFNAMSIZ) {
usernic_error("%s\n", "Could not create nic name");
return -1;
}
/* create the nics */ /* create the nics */
if (instantiate_veth(veth1buf, &veth2buf) < 0) { ret = instantiate_veth(veth1buf, veth2buf);
usernic_error("%s", "Error creating veth tunnel.\n"); if (ret < 0) {
return false; usernic_error("%s", "Error creating veth tunnel\n");
return -1;
} }
if (strcmp(br, "none")) { if (strcmp(br, "none")) {
...@@ -502,7 +527,7 @@ static bool create_nic(char *nic, char *br, int pid, char **cnic) ...@@ -502,7 +527,7 @@ static bool create_nic(char *nic, char *br, int pid, char **cnic)
/* attach veth1 to bridge */ /* attach veth1 to bridge */
ret = lxc_bridge_attach(br, veth1buf); ret = lxc_bridge_attach(br, veth1buf);
if (ret < 0) { if (ret < 0) {
usernic_error("Error attaching %s to %s.\n", veth1buf, br); usernic_error("Error attaching %s to %s\n", veth1buf, br);
goto out_del; goto out_del;
} }
} }
...@@ -517,53 +542,15 @@ static bool create_nic(char *nic, char *br, int pid, char **cnic) ...@@ -517,53 +542,15 @@ static bool create_nic(char *nic, char *br, int pid, char **cnic)
*cnic = strdup(veth2buf); *cnic = strdup(veth2buf);
if (!*cnic) { if (!*cnic) {
usernic_error("Failed to copy string \"%s\".\n", veth2buf); usernic_error("Failed to copy string \"%s\"\n", veth2buf);
return false; return -1;
} }
return true; return 0;
out_del: out_del:
lxc_netdev_delete_by_name(veth1buf); lxc_netdev_delete_by_name(veth1buf);
return false; return -1;
}
/* get_new_nicname() will return the name (vethXXXXXX) which is attached on the
* host to the lxc bridge. The returned string must be freed by caller.
*/
static char *get_new_nicname(char *br, int pid, char **cnic)
{
int ret;
char *nicname;
char template[IFNAMSIZ];
ret = snprintf(template, sizeof(template), "vethXXXXXX");
if (ret < 0 || (size_t)ret >= sizeof(template))
return NULL;
nicname = lxc_mkifname(template);
if (!nicname)
return NULL;
if (!create_nic(nicname, br, pid, cnic)) {
free(nicname);
return NULL;
}
return nicname;
}
static bool get_nic_from_line(char *p, char **nic)
{
int ret;
char user[100], type[100], br[100];
ret = sscanf(p, "%99[^ \t\n] %99[^ \t\n] %99[^ \t\n] %99[^ \t\n]", user,
type, br, *nic);
if (ret != 4)
return false;
return true;
} }
struct entry_line { struct entry_line {
...@@ -572,29 +559,24 @@ struct entry_line { ...@@ -572,29 +559,24 @@ struct entry_line {
bool keep; bool keep;
}; };
static bool cull_entries(int fd, char *me, char *t, char *br, char *nicname, static bool cull_entries(int fd, char *name, char *net_type, char *net_link,
bool *found_nicname) char *net_dev, bool *found_nicname)
{ {
int i, ret; int i, ret;
off_t len; char *buf, *buf_end, *buf_start;
char *buf, *e, *nic, *p;
struct stat sb; struct stat sb;
int n = 0; int n = 0;
bool found, keep;
struct entry_line *entry_lines = NULL; struct entry_line *entry_lines = NULL;
nic = alloca(100);
if (!nic)
return false;
ret = fstat(fd, &sb); ret = fstat(fd, &sb);
if (ret < 0) { if (ret < 0) {
usernic_error("Failed to fstat: %s\n", strerror(errno)); usernic_error("Failed to fstat: %s\n", strerror(errno));
return false; return false;
} }
len = sb.st_size; if (!sb.st_size)
if (len == 0) return false;
return true;
buf = lxc_strmmap(NULL, sb.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); buf = lxc_strmmap(NULL, sb.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (buf == MAP_FAILED) { if (buf == MAP_FAILED) {
...@@ -603,51 +585,48 @@ static bool cull_entries(int fd, char *me, char *t, char *br, char *nicname, ...@@ -603,51 +585,48 @@ static bool cull_entries(int fd, char *me, char *t, char *br, char *nicname,
return false; return false;
} }
p = buf; buf_start = buf;
e = buf + len; buf_end = buf + sb.st_size;
while ((p = find_line(p, e, me, t, br))) { while ((buf_start = find_line(buf_start, buf_end, name, net_type,
net_link, net_dev, &(bool){true}, &found,
&keep))) {
struct entry_line *newe; struct entry_line *newe;
newe = realloc(entry_lines, sizeof(*entry_lines) * (n + 1)); newe = realloc(entry_lines, sizeof(*entry_lines) * (n + 1));
if (!newe) { if (!newe) {
free(entry_lines); free(entry_lines);
lxc_strmunmap(buf, sb.st_size);
return false; return false;
} }
if (found)
*found_nicname = true;
entry_lines = newe; entry_lines = newe;
entry_lines[n].start = p; entry_lines[n].start = buf_start;
entry_lines[n].len = get_eol(p, e) - entry_lines[n].start; entry_lines[n].len = get_eol(buf_start, buf_end) - entry_lines[n].start;
entry_lines[n].keep = true; entry_lines[n].keep = keep;
n++; n++;
if (!get_nic_from_line(p, &nic))
continue;
if (nic && !nic_exists(nic))
entry_lines[n - 1].keep = false;
if (nicname)
if (!strcmp(nic, nicname))
*found_nicname = true;
p += entry_lines[n - 1].len + 1; buf_start += entry_lines[n - 1].len + 1;
if (p >= e) if (buf_start >= buf_end)
break; break;
} }
p = buf; buf_start = buf;
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
if (!entry_lines[i].keep) if (!entry_lines[i].keep)
continue; continue;
memcpy(p, entry_lines[i].start, entry_lines[i].len); memcpy(buf_start, entry_lines[i].start, entry_lines[i].len);
p += entry_lines[i].len; buf_start += entry_lines[i].len;
*p = '\n'; *buf_start = '\n';
p++; buf_start++;
} }
free(entry_lines); free(entry_lines);
lxc_strmunmap(buf, sb.st_size); lxc_strmunmap(buf, sb.st_size);
ret = ftruncate(fd, p - buf); ret = ftruncate(fd, buf_start - buf);
if (ret < 0) if (ret < 0)
usernic_error("Failed to set new file size: %s\n", usernic_error("Failed to set new file size: %s\n",
strerror(errno)); strerror(errno));
...@@ -655,16 +634,19 @@ static bool cull_entries(int fd, char *me, char *t, char *br, char *nicname, ...@@ -655,16 +634,19 @@ static bool cull_entries(int fd, char *me, char *t, char *br, char *nicname,
return true; return true;
} }
static int count_entries(char *buf, off_t len, char *me, char *t, char *br) static int count_entries(char *buf, off_t len, char *name, char *net_type, char *net_link)
{ {
char *e;
int count = 0; int count = 0;
bool owner = false;;
e = &buf[len]; char *buf_end = &buf[len];
while ((buf = find_line(buf, e, me, t, br))) {
count++; buf_end = &buf[len];
buf = get_eol(buf, e) + 1; while ((buf = find_line(buf, buf_end, name, net_type, net_link, NULL,
if (buf >= e) &owner, &(bool){true}, &(bool){true}))) {
if (owner)
count++;
buf = get_eol(buf, buf_end) + 1;
if (buf >= buf_end)
break; break;
} }
...@@ -676,8 +658,9 @@ static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid, ...@@ -676,8 +658,9 @@ static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid,
char *intype, char *br, int allowed, char **cnic) char *intype, char *br, int allowed, char **cnic)
{ {
int ret; int ret;
off_t len, slen; size_t slen;
char *newline, *nicname, *owner; char *newline, *owner;
char nicname[IFNAMSIZ];
struct stat sb; struct stat sb;
struct alloted_s *n; struct alloted_s *n;
int count = 0; int count = 0;
...@@ -691,79 +674,110 @@ static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid, ...@@ -691,79 +674,110 @@ static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid,
owner = names->name; owner = names->name;
if (fstat(fd, &sb) < 0) { ret = fstat(fd, &sb);
if (ret < 0) {
usernic_error("Failed to fstat: %s\n", strerror(errno)); usernic_error("Failed to fstat: %s\n", strerror(errno));
return NULL; return NULL;
} }
len = sb.st_size; if (sb.st_size > 0) {
if (len > 0) { buf = lxc_strmmap(NULL, sb.st_size, PROT_READ | PROT_WRITE,
buf = MAP_SHARED, fd, 0);
mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (buf == MAP_FAILED) { if (buf == MAP_FAILED) {
usernic_error("Failed to establish shared memory mapping: %s\n", usernic_error("Failed to establish shared memory "
strerror(errno)); "mapping: %s\n", strerror(errno));
return NULL; return NULL;
} }
owner = NULL; owner = NULL;
for (n = names; n != NULL; n = n->next) { for (n = names; n != NULL; n = n->next) {
count = count_entries(buf, len, n->name, intype, br); count = count_entries(buf, sb.st_size, n->name, intype, br);
if (count >= n->allowed) if (count >= n->allowed)
continue; continue;
owner = n->name; owner = n->name;
break; break;
} }
lxc_strmunmap(buf, sb.st_size);
} }
if (owner == NULL) if (owner == NULL)
return NULL; return NULL;
nicname = get_new_nicname(br, pid, cnic); ret = snprintf(nicname, sizeof(nicname), "vethXXXXXX");
if (!nicname) { if (ret < 0 || (size_t)ret >= sizeof(nicname))
usernic_error("%s", "Failed to get new nic name\n"); return NULL;
if (!lxc_mkifname(nicname))
return NULL;
ret = create_nic(nicname, br, pid, cnic);
if (ret < 0) {
usernic_error("%s", "Failed to create new nic\n");
return NULL; return NULL;
} }
/* owner ' ' intype ' ' br ' ' *nicname + '\n' + '\0' */ /* strlen(owner)
slen = strlen(owner) + strlen(intype) + strlen(br) + strlen(nicname) + 5; * +
newline = alloca(slen); * " "
* +
* strlen(intype)
* +
* " "
* +
* strlen(br)
* +
* " "
* +
* strlen(nicname)
* +
* \n
* +
* \0
*/
slen = strlen(owner) + strlen(intype) + strlen(br) + strlen(nicname) + 4;
newline = malloc(slen + 1);
if (!newline) { if (!newline) {
free(nicname); free(newline);
usernic_error("Failed allocate memory: %s\n", strerror(errno)); usernic_error("Failed allocate memory: %s\n", strerror(errno));
return NULL; return NULL;
} }
ret = snprintf(newline, slen, "%s %s %s %s\n", owner, intype, br, nicname); ret = snprintf(newline, slen + 1, "%s %s %s %s\n", owner, intype, br, nicname);
if (ret < 0 || ret >= slen) { if (ret < 0 || (size_t)ret >= (slen + 1)) {
if (lxc_netdev_delete_by_name(nicname) != 0) if (lxc_netdev_delete_by_name(nicname) != 0)
usernic_error("Error unlinking %s\n", nicname); usernic_error("Error unlinking %s\n", nicname);
free(nicname); free(newline);
return NULL; return NULL;
} }
if (len)
munmap(buf, len);
if (ftruncate(fd, len + slen)) /* Note that the file needs to be truncated to the size **without** the
usernic_error("Failed to set new file size: %s\n", * \0 byte! Files are not \0-terminated!
strerror(errno)); */
ret = ftruncate(fd, sb.st_size + slen);
if (ret < 0)
usernic_error("Failed to truncate file: %s\n", strerror(errno));
buf = mmap(NULL, len + slen, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); buf = lxc_strmmap(NULL, sb.st_size + slen, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
if (buf == MAP_FAILED) { if (buf == MAP_FAILED) {
usernic_error("Failed to establish shared memory mapping: %s\n", usernic_error("Failed to establish shared memory mapping: %s\n",
strerror(errno)); strerror(errno));
if (lxc_netdev_delete_by_name(nicname) != 0) if (lxc_netdev_delete_by_name(nicname) != 0)
usernic_error("Error unlinking %s\n", nicname); usernic_error("Error unlinking %s\n", nicname);
free(nicname); free(newline);
return NULL; return NULL;
} }
strcpy(buf + len, newline); /* Note that the memory needs to be moved in the buffer **without** the
munmap(buf, len + slen); * \0 byte! Files are not \0-terminated!
*/
memmove(buf + sb.st_size, newline, slen);
free(newline);
lxc_strmunmap(buf, sb.st_size + slen);
return nicname; return strdup(nicname);
} }
static bool create_db_dir(char *fnam) static bool create_db_dir(char *fnam)
...@@ -797,7 +811,7 @@ again: ...@@ -797,7 +811,7 @@ again:
} }
static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname, static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname,
int *ifidx) int *container_veth_ifidx)
{ {
int ret; int ret;
uid_t ruid, suid, euid; uid_t ruid, suid, euid;
...@@ -813,7 +827,7 @@ static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname, ...@@ -813,7 +827,7 @@ static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname,
fd = lxc_preserve_ns(pid, "net"); fd = lxc_preserve_ns(pid, "net");
if (fd < 0) { if (fd < 0) {
usernic_error("Failed opening network namespace path for '%d'.", pid); usernic_error("Failed opening network namespace path for %d", pid);
goto do_partial_cleanup; goto do_partial_cleanup;
} }
...@@ -830,7 +844,7 @@ static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname, ...@@ -830,7 +844,7 @@ static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname,
fd = -1; fd = -1;
if (ret < 0) { if (ret < 0) {
usernic_error("Failed to setns() to the network namespace of " usernic_error("Failed to setns() to the network namespace of "
"the container with PID %d: %s.\n", "the container with PID %d: %s\n",
pid, strerror(errno)); pid, strerror(errno));
goto do_partial_cleanup; goto do_partial_cleanup;
} }
...@@ -839,11 +853,12 @@ static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname, ...@@ -839,11 +853,12 @@ static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname,
if (ret < 0) { if (ret < 0) {
usernic_error("Failed to drop privilege by setting effective " usernic_error("Failed to drop privilege by setting effective "
"user id and real user id to %d, and saved user " "user id and real user id to %d, and saved user "
"ID to 0: %s.\n", "ID to 0: %s\n",
ruid, strerror(errno)); ruid, strerror(errno));
// COMMENT(brauner): It's ok to jump to do_full_cleanup here /* It's ok to jump to do_full_cleanup here since setresuid()
// since setresuid() will succeed when trying to set real, * will succeed when trying to set real, effective, and saved to
// effective, and saved to values they currently have. * values they currently have.
*/
goto do_full_cleanup; goto do_full_cleanup;
} }
...@@ -880,7 +895,7 @@ static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname, ...@@ -880,7 +895,7 @@ static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname,
/* Allocation failure for strdup() is checked below. */ /* Allocation failure for strdup() is checked below. */
name = strdup(ifname); name = strdup(ifname);
string_ret = name; string_ret = name;
*ifidx = ifindex; *container_veth_ifidx = ifindex;
do_full_cleanup: do_full_cleanup:
ret = setresuid(ruid, euid, suid); ret = setresuid(ruid, euid, suid);
...@@ -913,10 +928,8 @@ do_partial_cleanup: ...@@ -913,10 +928,8 @@ do_partial_cleanup:
return string_ret; return string_ret;
} }
/* /* If the caller (real uid, not effective uid) may read the /proc/[pid]/ns/net,
* If the caller (real uid, not effective uid) may read the * then it is either the caller's netns or one which it created.
* /proc/[pid]/ns/net, then it is either the caller's netns or one
* which it created.
*/ */
static bool may_access_netns(int pid) static bool may_access_netns(int pid)
{ {
...@@ -937,7 +950,7 @@ static bool may_access_netns(int pid) ...@@ -937,7 +950,7 @@ static bool may_access_netns(int pid)
if (ret < 0) { if (ret < 0) {
usernic_error("Failed to drop privilege by setting effective " usernic_error("Failed to drop privilege by setting effective "
"user id and real user id to %d, and saved user " "user id and real user id to %d, and saved user "
"ID to %d: %s.\n", "ID to %d: %s\n",
ruid, euid, strerror(errno)); ruid, euid, strerror(errno));
return false; return false;
} }
...@@ -956,7 +969,7 @@ static bool may_access_netns(int pid) ...@@ -956,7 +969,7 @@ static bool may_access_netns(int pid)
ret = setresuid(ruid, euid, suid); ret = setresuid(ruid, euid, suid);
if (ret < 0) { if (ret < 0) {
usernic_error("Failed to restore user id to %d, real user id " usernic_error("Failed to restore user id to %d, real user id "
"to %d, and saved user ID to %d: %s.\n", "to %d, and saved user ID to %d: %s\n",
ruid, euid, suid, strerror(errno)); ruid, euid, suid, strerror(errno));
may_access = false; may_access = false;
} }
...@@ -977,13 +990,89 @@ struct user_nic_args { ...@@ -977,13 +990,89 @@ struct user_nic_args {
#define LXC_USERNIC_CREATE 0 #define LXC_USERNIC_CREATE 0
#define LXC_USERNIC_DELETE 1 #define LXC_USERNIC_DELETE 1
static bool is_privileged_over_netns(int netns_fd)
{
int ret;
uid_t euid, ruid, suid;
bool bret = false;
int ofd = -1;
ofd = lxc_preserve_ns(getpid(), "net");
if (ofd < 0) {
usernic_error("Failed opening network namespace path for %d", getpid());
return false;
}
ret = getresuid(&ruid, &euid, &suid);
if (ret < 0) {
usernic_error("Failed to retrieve real, effective, and saved "
"user IDs: %s\n",
strerror(errno));
goto do_partial_cleanup;
}
ret = setns(netns_fd, CLONE_NEWNET);
if (ret < 0) {
usernic_error("Failed to setns() to network namespace %s\n",
strerror(errno));
goto do_partial_cleanup;
}
ret = setresuid(ruid, ruid, 0);
if (ret < 0) {
usernic_error("Failed to drop privilege by setting effective "
"user id and real user id to %d, and saved user "
"ID to 0: %s\n",
ruid, strerror(errno));
/* It's ok to jump to do_full_cleanup here since setresuid()
* will succeed when trying to set real, effective, and saved to
* values they currently have.
*/
goto do_full_cleanup;
}
/* Test whether we are privileged over the network namespace. To do this
* we try to delete the loopback interface which is not possible. If we
* are privileged over the network namespace we will get ENOTSUP. If we
* are not privileged over the network namespace we will get EPERM.
*/
ret = lxc_netdev_delete_by_name("lo");
if (ret == -ENOTSUP)
bret = true;
do_full_cleanup:
ret = setresuid(ruid, euid, suid);
if (ret < 0) {
usernic_error("Failed to restore privilege by setting "
"effective user id to %d, real user id to %d, "
"and saved user ID to %d: %s\n", ruid, euid, suid,
strerror(errno));
bret = false;
}
ret = setns(ofd, CLONE_NEWNET);
if (ret < 0) {
usernic_error("Failed to setns() to original network namespace "
"of PID %d: %s\n", ofd, strerror(errno));
bret = false;
}
do_partial_cleanup:
close(ofd);
return bret;
}
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
int fd, ifindex, n, pid, request, ret; int fd, n, pid, request, ret;
char *me, *newname; char *me, *newname;
struct user_nic_args args;
int container_veth_ifidx = -1, host_veth_ifidx = -1, netns_fd = -1;
char *cnic = NULL, *nicname = NULL; char *cnic = NULL, *nicname = NULL;
struct alloted_s *alloted = NULL; struct alloted_s *alloted = NULL;
struct user_nic_args args;
if (argc < 7 || argc > 8) { if (argc < 7 || argc > 8) {
usage(argv[0], true); usage(argv[0], true);
...@@ -1028,26 +1117,50 @@ int main(int argc, char *argv[]) ...@@ -1028,26 +1117,50 @@ int main(int argc, char *argv[])
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
ret = lxc_safe_int(args.pid, &pid); if (request == LXC_USERNIC_CREATE) {
if (ret < 0) { ret = lxc_safe_int(args.pid, &pid);
usernic_error("Could not read pid: %s\n", args.pid); if (ret < 0) {
exit(EXIT_FAILURE); usernic_error("Could not read pid: %s\n", args.pid);
exit(EXIT_FAILURE);
}
} else if (request == LXC_USERNIC_DELETE) {
netns_fd = open(args.pid, O_RDONLY);
if (netns_fd < 0) {
usernic_error("Could not open \"%s\": %s\n", args.pid,
strerror(errno));
exit(EXIT_FAILURE);
}
} }
if (!create_db_dir(LXC_USERNIC_DB)) { if (!create_db_dir(LXC_USERNIC_DB)) {
usernic_error("%s", "Failed to create directory for db file.\n"); usernic_error("%s", "Failed to create directory for db file\n");
if (netns_fd >= 0)
close(netns_fd);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
fd = open_and_lock(LXC_USERNIC_DB); fd = open_and_lock(LXC_USERNIC_DB);
if (fd < 0) { if (fd < 0) {
usernic_error("Failed to lock %s\n", LXC_USERNIC_DB); usernic_error("Failed to lock %s\n", LXC_USERNIC_DB);
if (netns_fd >= 0)
close(netns_fd);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
if (!may_access_netns(pid)) { if (request == LXC_USERNIC_CREATE) {
usernic_error("User %s may not modify netns for pid %d.\n", me, pid); if (!may_access_netns(pid)) {
exit(EXIT_FAILURE); usernic_error("User %s may not modify netns for pid %d\n", me, pid);
exit(EXIT_FAILURE);
}
} else if (request == LXC_USERNIC_DELETE) {
bool has_priv;
has_priv = is_privileged_over_netns(netns_fd);
close(netns_fd);
if (!has_priv) {
usernic_error("%s", "Process is not privileged over "
"network namespace\n");
exit(EXIT_FAILURE);
}
} }
n = get_alloted(me, args.type, args.link, &alloted); n = get_alloted(me, args.type, args.link, &alloted);
...@@ -1078,8 +1191,8 @@ int main(int argc, char *argv[]) ...@@ -1078,8 +1191,8 @@ int main(int argc, char *argv[])
free_alloted(&alloted); free_alloted(&alloted);
if (!found_nicname) { if (!found_nicname) {
usernic_error("%s", "Caller is not allowed to delete " usernic_error("Caller is not allowed to delete network "
"network device\n"); "device \"%s\"\n", args.veth_name);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
...@@ -1105,7 +1218,8 @@ int main(int argc, char *argv[]) ...@@ -1105,7 +1218,8 @@ int main(int argc, char *argv[])
} }
/* Now rename the link. */ /* Now rename the link. */
newname = lxc_secure_rename_in_ns(pid, cnic, args.veth_name, &ifindex); newname = lxc_secure_rename_in_ns(pid, cnic, args.veth_name,
&container_veth_ifidx);
if (!newname) { if (!newname) {
usernic_error("%s", "Failed to rename the link\n"); usernic_error("%s", "Failed to rename the link\n");
ret = lxc_netdev_delete_by_name(cnic); ret = lxc_netdev_delete_by_name(cnic);
...@@ -1115,8 +1229,19 @@ int main(int argc, char *argv[]) ...@@ -1115,8 +1229,19 @@ int main(int argc, char *argv[])
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
/* Write the name of the interface pair to the stdout: eth0:veth9MT2L4 */ host_veth_ifidx = if_nametoindex(nicname);
fprintf(stdout, "%s:%s:%d\n", newname, nicname, ifindex); if (!host_veth_ifidx) {
free(newname);
free(nicname);
usernic_error("Failed to get netdev index: %s\n", strerror(errno));
exit(EXIT_FAILURE);
}
/* Write names of veth pairs and their ifindeces to stout:
* (e.g. eth0:731:veth9MT2L4:730)
*/
fprintf(stdout, "%s:%d:%s:%d\n", newname, container_veth_ifidx, nicname,
host_veth_ifidx);
free(newname); free(newname);
free(nicname); free(nicname);
exit(EXIT_SUCCESS); exit(EXIT_SUCCESS);
......
...@@ -45,9 +45,9 @@ ...@@ -45,9 +45,9 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/types.h> #include <sys/types.h>
#include "af_unix.h"
#include "conf.h" #include "conf.h"
#include "config.h" #include "config.h"
#include "confile_utils.h"
#include "log.h" #include "log.h"
#include "network.h" #include "network.h"
#include "nl.h" #include "nl.h"
...@@ -102,7 +102,7 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd ...@@ -102,7 +102,7 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ]; char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
unsigned int mtu = 0; unsigned int mtu = 0;
if (netdev->priv.veth_attr.pair) { if (netdev->priv.veth_attr.pair[0] != '\0') {
veth1 = netdev->priv.veth_attr.pair; veth1 = netdev->priv.veth_attr.pair;
if (handler->conf->reboot) if (handler->conf->reboot)
lxc_netdev_delete_by_name(veth1); lxc_netdev_delete_by_name(veth1);
...@@ -141,6 +141,18 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd ...@@ -141,6 +141,18 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
goto out_delete; goto out_delete;
} }
/* Retrieve ifindex of the host's veth device. */
netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
if (!netdev->priv.veth_attr.ifindex) {
ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
goto out_delete;
}
/* Note that we're retrieving the container's ifindex in the host's
* network namespace because we need it to move the device from the
* host's network namespace to the container's network namespace later
* on.
*/
netdev->ifindex = if_nametoindex(veth2); netdev->ifindex = if_nametoindex(veth2);
if (!netdev->ifindex) { if (!netdev->ifindex) {
ERROR("Failed to retrieve ifindex for \"%s\"", veth2); ERROR("Failed to retrieve ifindex for \"%s\"", veth2);
...@@ -152,7 +164,7 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd ...@@ -152,7 +164,7 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
WARN("Failed to parse mtu"); WARN("Failed to parse mtu");
else else
INFO("Retrieved mtu %d", mtu); INFO("Retrieved mtu %d", mtu);
} else if (netdev->link) { } else if (netdev->link[0] != '\0') {
bridge_index = if_nametoindex(netdev->link); bridge_index = if_nametoindex(netdev->link);
if (bridge_index) { if (bridge_index) {
mtu = netdev_get_mtu(bridge_index); mtu = netdev_get_mtu(bridge_index);
...@@ -175,7 +187,7 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd ...@@ -175,7 +187,7 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
} }
} }
if (netdev->link) { if (netdev->link[0] != '\0') {
err = lxc_bridge_attach(netdev->link, veth1); err = lxc_bridge_attach(netdev->link, veth1);
if (err) { if (err) {
ERROR("Failed to attach \"%s\" to bridge \"%s\": %s", ERROR("Failed to attach \"%s\" to bridge \"%s\": %s",
...@@ -206,9 +218,6 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd ...@@ -206,9 +218,6 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
out_delete: out_delete:
if (netdev->ifindex != 0) if (netdev->ifindex != 0)
lxc_netdev_delete_by_name(veth1); lxc_netdev_delete_by_name(veth1);
if (!netdev->priv.veth_attr.pair)
free(veth1);
free(veth2);
return -1; return -1;
} }
...@@ -217,7 +226,7 @@ static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *n ...@@ -217,7 +226,7 @@ static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *n
char peerbuf[IFNAMSIZ], *peer; char peerbuf[IFNAMSIZ], *peer;
int err; int err;
if (!netdev->link) { if (netdev->link[0] == '\0') {
ERROR("No link for macvlan network device specified"); ERROR("No link for macvlan network device specified");
return -1; return -1;
} }
...@@ -235,29 +244,29 @@ static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *n ...@@ -235,29 +244,29 @@ static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *n
if (err) { if (err) {
ERROR("Failed to create macvlan interface \"%s\" on \"%s\": %s", ERROR("Failed to create macvlan interface \"%s\" on \"%s\": %s",
peer, netdev->link, strerror(-err)); peer, netdev->link, strerror(-err));
goto out; goto on_error;
} }
netdev->ifindex = if_nametoindex(peer); netdev->ifindex = if_nametoindex(peer);
if (!netdev->ifindex) { if (!netdev->ifindex) {
ERROR("Failed to retrieve ifindex for \"%s\"", peer); ERROR("Failed to retrieve ifindex for \"%s\"", peer);
goto out; goto on_error;
} }
if (netdev->upscript) { if (netdev->upscript) {
err = run_script(handler->name, "net", netdev->upscript, "up", err = run_script(handler->name, "net", netdev->upscript, "up",
"macvlan", netdev->link, (char*) NULL); "macvlan", netdev->link, (char*) NULL);
if (err) if (err)
goto out; goto on_error;
} }
DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d", DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
peer, netdev->ifindex, netdev->priv.macvlan_attr.mode); peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
return 0; return 0;
out:
on_error:
lxc_netdev_delete_by_name(peer); lxc_netdev_delete_by_name(peer);
free(peer);
return -1; return -1;
} }
...@@ -268,7 +277,7 @@ static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netd ...@@ -268,7 +277,7 @@ static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netd
static uint16_t vlan_cntr = 0; static uint16_t vlan_cntr = 0;
unsigned int mtu = 0; unsigned int mtu = 0;
if (!netdev->link) { if (netdev->link[0] == '\0') {
ERROR("No link for vlan network device specified"); ERROR("No link for vlan network device specified");
return -1; return -1;
} }
...@@ -297,7 +306,7 @@ static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netd ...@@ -297,7 +306,7 @@ static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netd
if (lxc_safe_uint(netdev->mtu, &mtu) < 0) { if (lxc_safe_uint(netdev->mtu, &mtu) < 0) {
ERROR("Failed to retrieve mtu from \"%d\"/\"%s\".", ERROR("Failed to retrieve mtu from \"%d\"/\"%s\".",
netdev->ifindex, netdev->ifindex,
netdev->name ? netdev->name : "(null)"); netdev->name[0] != '\0' ? netdev->name : "(null)");
return -1; return -1;
} }
err = lxc_netdev_set_mtu(peer, mtu); err = lxc_netdev_set_mtu(peer, mtu);
...@@ -314,17 +323,29 @@ static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netd ...@@ -314,17 +323,29 @@ static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netd
static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev) static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
{ {
if (!netdev->link) { if (netdev->link[0] == '\0') {
ERROR("No link for physical interface specified"); ERROR("No link for physical interface specified");
return -1; return -1;
} }
/* Note that we're retrieving the container's ifindex in the host's
* network namespace because we need it to move the device from the
* host's network namespace to the container's network namespace later
* on.
* Note that netdev->link will contain the name of the physical network
* device in the host's namespace.
*/
netdev->ifindex = if_nametoindex(netdev->link); netdev->ifindex = if_nametoindex(netdev->link);
if (!netdev->ifindex) { if (!netdev->ifindex) {
ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link); ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link);
return -1; return -1;
} }
/* Store the ifindex of the host's network device in the host's
* namespace.
*/
netdev->priv.phys_attr.ifindex = netdev->ifindex;
if (netdev->upscript) { if (netdev->upscript) {
int err; int err;
err = run_script(handler->name, "net", netdev->upscript, err = run_script(handler->name, "net", netdev->upscript,
...@@ -369,7 +390,7 @@ static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev) ...@@ -369,7 +390,7 @@ static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
char *veth1; char *veth1;
int err; int err;
if (netdev->priv.veth_attr.pair) if (netdev->priv.veth_attr.pair[0] != '\0')
veth1 = netdev->priv.veth_attr.pair; veth1 = netdev->priv.veth_attr.pair;
else else
veth1 = netdev->priv.veth_attr.veth1; veth1 = netdev->priv.veth_attr.veth1;
...@@ -1866,43 +1887,43 @@ const char *lxc_net_type_to_str(int type) ...@@ -1866,43 +1887,43 @@ const char *lxc_net_type_to_str(int type)
return lxc_network_types[type]; return lxc_network_types[type];
} }
static const char padchar[] = static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
char *lxc_mkifname(const char *template) char *lxc_mkifname(char *template)
{ {
char *name = NULL;
size_t i = 0;
FILE *urandom;
unsigned int seed; unsigned int seed;
struct ifaddrs *ifaddr, *ifa; FILE *urandom;
int ifexists = 0; struct ifaddrs *ifa, *ifaddr;
char name[IFNAMSIZ];
bool exists = false;
size_t i = 0;
if (strlen(template) >= IFNAMSIZ)
return NULL;
/* Get all the network interfaces */ /* Get all the network interfaces. */
getifaddrs(&ifaddr); getifaddrs(&ifaddr);
/* Initialize the random number generator */ /* Initialize the random number generator. */
urandom = fopen ("/dev/urandom", "r"); urandom = fopen("/dev/urandom", "r");
if (urandom != NULL) { if (urandom != NULL) {
if (fread (&seed, sizeof(seed), 1, urandom) <= 0) if (fread(&seed, sizeof(seed), 1, urandom) <= 0)
seed = time(0); seed = time(0);
fclose(urandom); fclose(urandom);
} } else {
else
seed = time(0); seed = time(0);
}
#ifndef HAVE_RAND_R #ifndef HAVE_RAND_R
srand(seed); srand(seed);
#endif #endif
/* Generate random names until we find one that doesn't exist */ /* Generate random names until we find one that doesn't exist. */
while(1) { while (true) {
ifexists = 0; name[0] = '\0';
name = strdup(template); strcpy(name, template);
if (name == NULL)
return NULL;
exists = false;
for (i = 0; i < strlen(name); i++) { for (i = 0; i < strlen(name); i++) {
if (name[i] == 'X') { if (name[i] == 'X') {
#ifdef HAVE_RAND_R #ifdef HAVE_RAND_R
...@@ -1914,20 +1935,18 @@ char *lxc_mkifname(const char *template) ...@@ -1914,20 +1935,18 @@ char *lxc_mkifname(const char *template)
} }
for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) { for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
if (strcmp(ifa->ifa_name, name) == 0) { if (!strcmp(ifa->ifa_name, name)) {
ifexists = 1; exists = true;
break; break;
} }
} }
if (ifexists == 0) if (!exists)
break; break;
free(name);
} }
freeifaddrs(ifaddr); freeifaddrs(ifaddr);
return name; return strcpy(template, name);
} }
int setup_private_host_hw_addr(char *veth1) int setup_private_host_hw_addr(char *veth1)
...@@ -1975,7 +1994,7 @@ int lxc_find_gateway_addresses(struct lxc_handler *handler) ...@@ -1975,7 +1994,7 @@ int lxc_find_gateway_addresses(struct lxc_handler *handler)
return -1; return -1;
} }
if (!netdev->link) { if (netdev->link[0] == '\0') {
ERROR("Automatic gateway detection needs a link interface"); ERROR("Automatic gateway detection needs a link interface");
return -1; return -1;
} }
...@@ -2005,8 +2024,8 @@ int lxc_find_gateway_addresses(struct lxc_handler *handler) ...@@ -2005,8 +2024,8 @@ int lxc_find_gateway_addresses(struct lxc_handler *handler)
} }
#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic" #define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
static int lxc_create_network_unpriv(const char *lxcpath, char *lxcname, static int lxc_create_network_unpriv_exec(const char *lxcpath, char *lxcname,
struct lxc_netdev *netdev, pid_t pid) struct lxc_netdev *netdev, pid_t pid)
{ {
int ret; int ret;
pid_t child; pid_t child;
...@@ -2049,7 +2068,7 @@ static int lxc_create_network_unpriv(const char *lxcpath, char *lxcname, ...@@ -2049,7 +2068,7 @@ static int lxc_create_network_unpriv(const char *lxcpath, char *lxcname,
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
if (netdev->link) if (netdev->link[0] != '\0')
strncpy(netdev_link, netdev->link, IFNAMSIZ); strncpy(netdev_link, netdev->link, IFNAMSIZ);
else else
strncpy(netdev_link, "none", IFNAMSIZ); strncpy(netdev_link, "none", IFNAMSIZ);
...@@ -2061,8 +2080,8 @@ static int lxc_create_network_unpriv(const char *lxcpath, char *lxcname, ...@@ -2061,8 +2080,8 @@ static int lxc_create_network_unpriv(const char *lxcpath, char *lxcname,
INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath, INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
lxcname, pidstr, netdev_link, lxcname, pidstr, netdev_link,
netdev->name ? netdev->name : "(null)"); netdev->name[0] != '\0' ? netdev->name : "(null)");
if (netdev->name) if (netdev->name[0] != '\0')
execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create", execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
lxcpath, lxcname, pidstr, "veth", netdev_link, lxcpath, lxcname, pidstr, "veth", netdev_link,
netdev->name, (char *)NULL); netdev->name, (char *)NULL);
...@@ -2079,7 +2098,7 @@ static int lxc_create_network_unpriv(const char *lxcpath, char *lxcname, ...@@ -2079,7 +2098,7 @@ static int lxc_create_network_unpriv(const char *lxcpath, char *lxcname,
bytes = read(pipefd[0], &buffer, MAXPATHLEN); bytes = read(pipefd[0], &buffer, MAXPATHLEN);
if (bytes < 0) { if (bytes < 0) {
SYSERROR("Failed to read from pipe file descriptor."); SYSERROR("Failed to read from pipe file descriptor");
close(pipefd[0]); close(pipefd[0]);
return -1; return -1;
} }
...@@ -2096,44 +2115,62 @@ static int lxc_create_network_unpriv(const char *lxcpath, char *lxcname, ...@@ -2096,44 +2115,62 @@ static int lxc_create_network_unpriv(const char *lxcpath, char *lxcname,
/* netdev->name */ /* netdev->name */
token = strtok_r(buffer, ":", &saveptr); token = strtok_r(buffer, ":", &saveptr);
if (!token) if (!token) {
return -1; ERROR("Failed to parse lxc-user-nic output");
netdev->name = malloc(IFNAMSIZ + 1);
if (!netdev->name) {
SYSERROR("Failed to allocate memory.");
return -1; return -1;
} }
memset(netdev->name, 0, IFNAMSIZ + 1); memset(netdev->name, 0, IFNAMSIZ + 1);
strncpy(netdev->name, token, IFNAMSIZ); strncpy(netdev->name, token, IFNAMSIZ);
/* netdev->priv.veth_attr.pair */ /* netdev->ifindex */
token = strtok_r(NULL, ":", &saveptr); token = strtok_r(NULL, ":", &saveptr);
if (!token) if (!token) {
ERROR("Failed to parse lxc-user-nic output");
return -1; return -1;
}
netdev->priv.veth_attr.pair = strdup(token); ret = lxc_safe_int(token, &netdev->ifindex);
if (!netdev->priv.veth_attr.pair) { if (ret < 0) {
ERROR("Failed to allocate memory."); ERROR("%s - Failed to convert string \"%s\" to integer",
strerror(-ret), token);
return -1; return -1;
} }
/* netdev->ifindex */ /* netdev->priv.veth_attr.veth1 */
token = strtok_r(NULL, ":", &saveptr);
if (!token) {
ERROR("Failed to parse lxc-user-nic output");
return -1;
}
if (strlen(token) >= IFNAMSIZ) {
ERROR("Host side veth device name returned by lxc-user-nic is "
"too long");
return -E2BIG;
}
strcpy(netdev->priv.veth_attr.veth1, token);
/* netdev->priv.veth_attr.ifindex */
token = strtok_r(NULL, ":", &saveptr); token = strtok_r(NULL, ":", &saveptr);
if (!token) if (!token) {
ERROR("Failed to parse lxc-user-nic output");
return -1; return -1;
}
ret = lxc_safe_int(token, &netdev->ifindex); ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
if (ret < 0) { if (ret < 0) {
ERROR("Failed to parse ifindex for network device \"%s\"", netdev->name); ERROR("%s - Failed to convert string \"%s\" to integer",
strerror(-ret), token);
return -1; return -1;
} }
return 0; return 0;
} }
static int lxc_delete_network_unpriv(const char *lxcpath, char *lxcname, static int lxc_delete_network_unpriv_exec(const char *lxcpath, char *lxcname,
struct lxc_netdev *netdev, pid_t pid) struct lxc_netdev *netdev,
const char *netns_path)
{ {
int bytes, ret; int bytes, ret;
pid_t child; pid_t child;
...@@ -2160,8 +2197,8 @@ static int lxc_delete_network_unpriv(const char *lxcpath, char *lxcname, ...@@ -2160,8 +2197,8 @@ static int lxc_delete_network_unpriv(const char *lxcpath, char *lxcname,
} }
if (child == 0) { if (child == 0) {
char *hostveth;
int ret; int ret;
char pidstr[LXC_NUMSTRLEN64];
close(pipefd[0]); close(pipefd[0]);
...@@ -2174,20 +2211,26 @@ static int lxc_delete_network_unpriv(const char *lxcpath, char *lxcname, ...@@ -2174,20 +2211,26 @@ static int lxc_delete_network_unpriv(const char *lxcpath, char *lxcname,
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
if (!netdev->link) if (netdev->priv.veth_attr.pair[0] != '\0')
SYSERROR("Network link for network device \"%s\" is " hostveth = netdev->priv.veth_attr.pair;
"missing", netdev->priv.veth_attr.pair); else
hostveth = netdev->priv.veth_attr.veth1;
if (hostveth[0] == '\0') {
SYSERROR("Host side veth device name is missing");
exit(EXIT_FAILURE);
}
ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid); if (netdev->link[0] == '\0') {
if (ret < 0 || ret >= LXC_NUMSTRLEN64) SYSERROR("Network link for network device \"%s\" is "
"missing", netdev->priv.veth_attr.veth1);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
pidstr[LXC_NUMSTRLEN64 - 1] = '\0'; }
INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath, INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
lxcname, pidstr, netdev->link, netdev->priv.veth_attr.pair); lxcname, netns_path, netdev->link, hostveth);
execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath, execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
lxcname, pidstr, "veth", netdev->link, lxcname, netns_path, "veth", netdev->link, hostveth,
netdev->priv.veth_attr.pair, (char *)NULL); (char *)NULL);
SYSERROR("Failed to exec lxc-user-nic."); SYSERROR("Failed to exec lxc-user-nic.");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
...@@ -2214,15 +2257,103 @@ static int lxc_delete_network_unpriv(const char *lxcpath, char *lxcname, ...@@ -2214,15 +2257,103 @@ static int lxc_delete_network_unpriv(const char *lxcpath, char *lxcname,
return 0; return 0;
} }
bool lxc_delete_network_unpriv(struct lxc_handler *handler)
{
int ret;
struct lxc_list *iterator;
struct lxc_list *network = &handler->conf->network;
/* strlen("/proc/") = 6
* +
* LXC_NUMSTRLEN64
* +
* strlen("/fd/") = 4
* +
* LXC_NUMSTRLEN64
* +
* \0
*/
char netns_path[6 + LXC_NUMSTRLEN64 + 4 + LXC_NUMSTRLEN64 + 1];
bool deleted_all = true;
if (handler->am_root)
return true;
*netns_path = '\0';
if (handler->netnsfd < 0) {
DEBUG("Cannot not guarantee safe deletion of network devices. "
"Manual cleanup maybe needed");
return false;
}
ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
getpid(), handler->netnsfd);
if (ret < 0 || ret >= sizeof(netns_path))
return false;
lxc_list_for_each(iterator, network) {
char *hostveth = NULL;
struct lxc_netdev *netdev = iterator->elem;
/* We can only delete devices whose ifindex we have. If we don't
* have the index it means that we didn't create it.
*/
if (!netdev->ifindex)
continue;
if (netdev->type == LXC_NET_PHYS) {
ret = lxc_netdev_rename_by_index(netdev->ifindex,
netdev->link);
if (ret < 0)
WARN("Failed to rename interface with index %d "
"to its initial name \"%s\"",
netdev->ifindex, netdev->link);
else
TRACE("Renamed interface with index %d to its "
"initial name \"%s\"",
netdev->ifindex, netdev->link);
continue;
}
ret = netdev_deconf[netdev->type](handler, netdev);
if (ret < 0)
WARN("Failed to deconfigure network device");
if (netdev->type != LXC_NET_VETH)
continue;
if (!is_ovs_bridge(netdev->link))
continue;
if (netdev->priv.veth_attr.pair[0] != '\0')
hostveth = netdev->priv.veth_attr.pair;
else
hostveth = netdev->priv.veth_attr.veth1;
if (hostveth[0] == '\0')
continue;
ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
handler->name, netdev,
netns_path);
if (ret < 0) {
deleted_all = false;
WARN("Failed to remove port \"%s\" from openvswitch "
"bridge \"%s\"", hostveth, netdev->link);
continue;
}
INFO("Removed interface \"%s\" from \"%s\"", hostveth,
netdev->link);
}
return deleted_all;
}
int lxc_create_network_priv(struct lxc_handler *handler) int lxc_create_network_priv(struct lxc_handler *handler)
{ {
bool am_root;
struct lxc_list *iterator; struct lxc_list *iterator;
struct lxc_list *network = &handler->conf->network; struct lxc_list *network = &handler->conf->network;
/* We need to be root. */ if (!handler->am_root)
am_root = (getuid() == 0);
if (!am_root)
return 0; return 0;
lxc_list_for_each(iterator, network) { lxc_list_for_each(iterator, network) {
...@@ -2243,33 +2374,19 @@ int lxc_create_network_priv(struct lxc_handler *handler) ...@@ -2243,33 +2374,19 @@ int lxc_create_network_priv(struct lxc_handler *handler)
return 0; return 0;
} }
int lxc_create_network(const char *lxcpath, char *lxcname, int lxc_network_move_created_netdev_priv(const char *lxcpath, char *lxcname,
struct lxc_list *network, pid_t pid) struct lxc_list *network, pid_t pid)
{ {
int err; int ret;
bool am_root;
char ifname[IFNAMSIZ]; char ifname[IFNAMSIZ];
struct lxc_list *iterator; struct lxc_list *iterator;
am_root = (getuid() == 0); if (am_unpriv())
return 0;
lxc_list_for_each(iterator, network) { lxc_list_for_each(iterator, network) {
struct lxc_netdev *netdev = iterator->elem; struct lxc_netdev *netdev = iterator->elem;
if (netdev->type == LXC_NET_VETH && !am_root) {
if (netdev->mtu)
INFO("mtu ignored due to insufficient privilege");
if (lxc_create_network_unpriv(lxcpath, lxcname, netdev, pid))
return -1;
/* lxc-user-nic has moved the nic to the new ns.
* unpriv_assign_nic() fills in netdev->name.
* netdev->ifindex will be filled in at
* lxc_setup_netdev_in_child_namespaces().
*/
continue;
}
/* empty network namespace, nothing to move */
if (!netdev->ifindex) if (!netdev->ifindex)
continue; continue;
...@@ -2280,29 +2397,67 @@ int lxc_create_network(const char *lxcpath, char *lxcname, ...@@ -2280,29 +2397,67 @@ int lxc_create_network(const char *lxcpath, char *lxcname,
return -1; return -1;
} }
err = lxc_netdev_move_by_name(ifname, pid, NULL); ret = lxc_netdev_move_by_name(ifname, pid, NULL);
if (err) { if (ret) {
ERROR("Failed to move network device \"%s\" to " ERROR("Failed to move network device \"%s\" to "
"network namespace %d: %s", ifname, pid, "network namespace %d: %s", ifname, pid,
strerror(-err)); strerror(-ret));
return -1; return -1;
} }
DEBUG("Moved network device \"%s\"/\"%s\" to network namespace " DEBUG("Moved network device \"%s\"/\"%s\" to network namespace "
"of %d:", ifname, netdev->name ? netdev->name : "(null)", "of %d",
ifname, netdev->name[0] != '\0' ? netdev->name : "(null)",
pid); pid);
} }
return 0; return 0;
} }
bool lxc_delete_network(struct lxc_handler *handler) int lxc_create_network_unpriv(const char *lxcpath, char *lxcname,
struct lxc_list *network, pid_t pid)
{
struct lxc_list *iterator;
if (!am_unpriv())
return 0;
lxc_list_for_each(iterator, network) {
struct lxc_netdev *netdev = iterator->elem;
if (netdev->type == LXC_NET_EMPTY)
continue;
if (netdev->type == LXC_NET_NONE)
continue;
if (netdev->type != LXC_NET_VETH) {
ERROR("Networks of type %s are not supported by "
"unprivileged containers",
lxc_net_type_to_str(netdev->type));
return -1;
}
if (netdev->mtu)
INFO("mtu ignored due to insufficient privilege");
if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev, pid))
return -1;
}
return 0;
}
bool lxc_delete_network_priv(struct lxc_handler *handler)
{ {
int ret; int ret;
struct lxc_list *iterator; struct lxc_list *iterator;
struct lxc_list *network = &handler->conf->network; struct lxc_list *network = &handler->conf->network;
bool deleted_all = true; bool deleted_all = true;
if (!handler->am_root)
return true;
lxc_list_for_each(iterator, network) { lxc_list_for_each(iterator, network) {
char *hostveth = NULL; char *hostveth = NULL;
struct lxc_netdev *netdev = iterator->elem; struct lxc_netdev *netdev = iterator->elem;
...@@ -2317,12 +2472,13 @@ bool lxc_delete_network(struct lxc_handler *handler) ...@@ -2317,12 +2472,13 @@ bool lxc_delete_network(struct lxc_handler *handler)
ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link); ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
if (ret < 0) if (ret < 0)
WARN("Failed to rename interface with index %d " WARN("Failed to rename interface with index %d "
"to its initial name \"%s\"", "from \"%s\" to its initial name \"%s\"",
netdev->ifindex, netdev->link); netdev->ifindex, netdev->name, netdev->link);
else else
TRACE("Renamed interface with index %d to its " TRACE("Renamed interface with index %d from "
"initial name \"%s\"", "\"%s\" to its initial name \"%s\"",
netdev->ifindex, netdev->link); netdev->ifindex, netdev->name,
netdev->link);
continue; continue;
} }
...@@ -2334,53 +2490,36 @@ bool lxc_delete_network(struct lxc_handler *handler) ...@@ -2334,53 +2490,36 @@ bool lxc_delete_network(struct lxc_handler *handler)
* namespace is destroyed but in case we did not move the * namespace is destroyed but in case we did not move the
* interface to the network namespace, we have to destroy it. * interface to the network namespace, we have to destroy it.
*/ */
if (!am_unpriv()) { ret = lxc_netdev_delete_by_index(netdev->ifindex);
ret = lxc_netdev_delete_by_index(netdev->ifindex); if (-ret == ENODEV) {
if (-ret == ENODEV) { INFO("Interface \"%s\" with index %d already "
INFO("Interface \"%s\" with index %d already " "deleted or existing in different network "
"deleted or existing in different network " "namespace",
"namespace", netdev->name[0] != '\0' ? netdev->name : "(null)",
netdev->name ? netdev->name : "(null)", netdev->ifindex);
netdev->ifindex); } else if (ret < 0) {
} else if (ret < 0) { deleted_all = false;
deleted_all = false; WARN("Failed to remove interface \"%s\" with "
WARN("Failed to remove interface \"%s\" with " "index %d: %s",
"index %d: %s", netdev->name[0] != '\0' ? netdev->name : "(null)",
netdev->name ? netdev->name : "(null)", netdev->ifindex, strerror(-ret));
netdev->ifindex, strerror(-ret)); continue;
continue;
}
INFO("Removed interface \"%s\" with index %d",
netdev->name ? netdev->name : "(null)",
netdev->ifindex);
} }
INFO("Removed interface \"%s\" with index %d",
netdev->name[0] != '\0' ? netdev->name : "(null)",
netdev->ifindex);
if (netdev->type != LXC_NET_VETH) if (netdev->type != LXC_NET_VETH)
continue; continue;
if (am_unpriv()) {
if (is_ovs_bridge(netdev->link)) {
ret = lxc_delete_network_unpriv(handler->lxcpath,
handler->name,
netdev, getpid());
if (ret < 0)
WARN("Failed to remove port \"%s\" "
"from openvswitch bridge \"%s\"",
netdev->priv.veth_attr.pair,
netdev->link);
}
continue;
}
/* Explicitly delete host veth device to prevent lingering /* Explicitly delete host veth device to prevent lingering
* devices. We had issues in LXD around this. * devices. We had issues in LXD around this.
*/ */
if (netdev->priv.veth_attr.pair) if (netdev->priv.veth_attr.pair[0] != '\0')
hostveth = netdev->priv.veth_attr.pair; hostveth = netdev->priv.veth_attr.pair;
else else
hostveth = netdev->priv.veth_attr.veth1; hostveth = netdev->priv.veth_attr.veth1;
if (*hostveth == '\0') if (hostveth[0] == '\0')
continue; continue;
ret = lxc_netdev_delete_by_name(hostveth); ret = lxc_netdev_delete_by_name(hostveth);
...@@ -2435,51 +2574,69 @@ int lxc_requests_empty_network(struct lxc_handler *handler) ...@@ -2435,51 +2574,69 @@ int lxc_requests_empty_network(struct lxc_handler *handler)
} }
/* try to move physical nics to the init netns */ /* try to move physical nics to the init netns */
void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf) int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
{ {
int ret; int ret;
int i, oldfd; int oldfd;
char ifname[IFNAMSIZ]; char ifname[IFNAMSIZ];
struct lxc_list *iterator;
int netnsfd = handler->netnsfd;
struct lxc_conf *conf = handler->conf;
if (netnsfd < 0 || conf->num_savednics == 0) /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
return; * the parent network namespace. We won't have this capability if we are
* unprivileged.
*/
if (!handler->am_root)
return 0;
INFO("Trying to restore network device names in original namespace for " TRACE("Moving physical network devices back to parent network namespace");
"%d network devices", conf->num_savednics);
oldfd = lxc_preserve_ns(getpid(), "net"); oldfd = lxc_preserve_ns(getpid(), "net");
if (oldfd < 0) { if (oldfd < 0) {
SYSERROR("Failed to preserve network namespace"); SYSERROR("Failed to preserve network namespace");
return; return -1;
} }
ret = setns(netnsfd, 0); ret = setns(netnsfd, CLONE_NEWNET);
if (ret < 0) { if (ret < 0) {
SYSERROR("Failed to enter network namespace"); SYSERROR("Failed to enter network namespace");
close(oldfd); close(oldfd);
return; return -1;
} }
for (i = 0; i < conf->num_savednics; i++) { lxc_list_for_each(iterator, &conf->network) {
struct saved_nic *s = &conf->saved_nics[i]; struct lxc_netdev *netdev = iterator->elem;
if (netdev->type != LXC_NET_PHYS)
continue;
/* retrieve the name of the interface */ /* Retrieve the name of the interface in the container's network
if (!if_indextoname(s->ifindex, ifname)) { * namespace.
*/
if (!if_indextoname(netdev->ifindex, ifname)) {
WARN("No interface corresponding to ifindex %d", WARN("No interface corresponding to ifindex %d",
s->ifindex); netdev->ifindex);
continue; continue;
} }
if (lxc_netdev_move_by_name(ifname, 1, s->orig_name))
ret = lxc_netdev_move_by_name(ifname, 1, netdev->link);
if (ret < 0)
WARN("Error moving network device \"%s\" back to " WARN("Error moving network device \"%s\" back to "
"network namespace", ifname); "network namespace", ifname);
free(s->orig_name); else
TRACE("Moved network device \"%s\" back to network "
"namespace", ifname);
} }
conf->num_savednics = 0;
ret = setns(oldfd, 0); ret = setns(oldfd, CLONE_NEWNET);
if (ret < 0)
SYSERROR("Failed to enter network namespace");
close(oldfd); close(oldfd);
if (ret < 0) {
SYSERROR("Failed to enter network namespace");
return -1;
}
return 0;
} }
static int setup_hw_addr(char *hwaddr, const char *ifname) static int setup_hw_addr(char *hwaddr, const char *ifname)
...@@ -2584,8 +2741,7 @@ static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev) ...@@ -2584,8 +2741,7 @@ static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
if (netdev->type != LXC_NET_VETH) { if (netdev->type != LXC_NET_VETH) {
net_type_name = lxc_net_type_to_str(netdev->type); net_type_name = lxc_net_type_to_str(netdev->type);
ERROR("%s networks are not supported for containers " ERROR("%s networks are not supported for containers "
"not setup up by privileged users", "not setup up by privileged users", net_type_name);
net_type_name);
return -1; return -1;
} }
...@@ -2613,9 +2769,12 @@ static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev) ...@@ -2613,9 +2769,12 @@ static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
* When the IFLA_IFNAME attribute is passed something like "<prefix>%d" * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
* netlink will replace the format specifier with an appropriate index. * netlink will replace the format specifier with an appropriate index.
*/ */
if (!netdev->name) if (netdev->name[0] == '\0') {
netdev->name = netdev->type == LXC_NET_PHYS ? if (netdev->type == LXC_NET_PHYS)
netdev->link : "eth%d"; strcpy(netdev->name, netdev->link);
else
strcpy(netdev->name, "eth%d");
}
/* rename the interface name */ /* rename the interface name */
if (strcmp(ifname, netdev->name) != 0) { if (strcmp(ifname, netdev->name) != 0) {
...@@ -2636,6 +2795,12 @@ static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev) ...@@ -2636,6 +2795,12 @@ static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
return -1; return -1;
} }
/* Now update the recorded name of the network device to reflect the
* name of the network device in the child's network namespace. We will
* later on send this information back to the parent.
*/
strcpy(netdev->name, current_ifname);
/* set a mac address */ /* set a mac address */
if (netdev->hwaddr) { if (netdev->hwaddr) {
if (setup_hw_addr(netdev->hwaddr, current_ifname)) { if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
...@@ -2760,7 +2925,7 @@ static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev) ...@@ -2760,7 +2925,7 @@ static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
} }
} }
DEBUG("Network devie \"%s\" has been setup", current_ifname); DEBUG("Network device \"%s\" has been setup", current_ifname);
return 0; return 0;
} }
...@@ -2771,8 +2936,6 @@ int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf, ...@@ -2771,8 +2936,6 @@ int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
struct lxc_list *iterator; struct lxc_list *iterator;
struct lxc_netdev *netdev; struct lxc_netdev *netdev;
lxc_log_configured_netdevs(conf);
lxc_list_for_each(iterator, network) { lxc_list_for_each(iterator, network) {
netdev = iterator->elem; netdev = iterator->elem;
...@@ -2794,3 +2957,114 @@ int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf, ...@@ -2794,3 +2957,114 @@ int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
return 0; return 0;
} }
int lxc_network_send_veth_names_to_child(struct lxc_handler *handler)
{
struct lxc_list *iterator;
struct lxc_list *network = &handler->conf->network;
int data_sock = handler->data_sock[0];
if (handler->am_root)
return 0;
lxc_list_for_each(iterator, network) {
int ret;
struct lxc_netdev *netdev = iterator->elem;
if (netdev->type != LXC_NET_VETH)
continue;
ret = send(data_sock, netdev->name, IFNAMSIZ, 0);
if (ret < 0)
return -1;
TRACE("Sent network device name \"%s\" to child", netdev->name);
}
return 0;
}
int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler)
{
struct lxc_list *iterator;
struct lxc_list *network = &handler->conf->network;
int data_sock = handler->data_sock[1];
if (handler->am_root)
return 0;
lxc_list_for_each(iterator, network) {
int ret;
struct lxc_netdev *netdev = iterator->elem;
if (netdev->type != LXC_NET_VETH)
continue;
ret = recv(data_sock, netdev->name, IFNAMSIZ, 0);
if (ret < 0)
return -1;
TRACE("Received network device name \"%s\" from parent", netdev->name);
}
return 0;
}
int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
{
struct lxc_list *iterator, *network;
int data_sock = handler->data_sock[0];
if (!handler->am_root)
return 0;
network = &handler->conf->network;
lxc_list_for_each(iterator, network) {
int ret;
struct lxc_netdev *netdev = iterator->elem;
/* Send network device name in the child's namespace to parent. */
ret = send(data_sock, netdev->name, IFNAMSIZ, 0);
if (ret < 0)
return -1;
/* Send network device ifindex in the child's namespace to
* parent.
*/
ret = send(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
if (ret < 0)
return -1;
}
TRACE("Sent network device names and ifindeces to parent");
return 0;
}
int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
{
struct lxc_list *iterator, *network;
int data_sock = handler->data_sock[1];
if (!handler->am_root)
return 0;
network = &handler->conf->network;
lxc_list_for_each(iterator, network) {
int ret;
struct lxc_netdev *netdev = iterator->elem;
/* Receive network device name in the child's namespace to
* parent.
*/
ret = recv(data_sock, netdev->name, IFNAMSIZ, 0);
if (ret < 0)
return -1;
/* Receive network device ifindex in the child's namespace to
* parent.
*/
ret = recv(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
if (ret < 0)
return -1;
}
return 0;
}
...@@ -79,9 +79,21 @@ struct lxc_route6 { ...@@ -79,9 +79,21 @@ struct lxc_route6 {
struct in6_addr addr; struct in6_addr addr;
}; };
/* Contains information about the host side veth device.
* @pair : Name of the host side veth device.
* If the user requested that the host veth device be created with a
* specific names this field will be set. If this field is set @veth1
* is not set.
* @veth1 : Name of the host side veth device.
* If the user did not request that the host veth device be created
* with a specific name this field will be set. If this field is set
* @pair is not set.
* @ifindex : Ifindex of the network device.
*/
struct ifla_veth { struct ifla_veth {
char *pair; /* pair name */ char pair[IFNAMSIZ];
char veth1[IFNAMSIZ]; /* needed for deconf */ char veth1[IFNAMSIZ];
int ifindex;
}; };
struct ifla_vlan { struct ifla_vlan {
...@@ -95,60 +107,83 @@ struct ifla_macvlan { ...@@ -95,60 +107,83 @@ struct ifla_macvlan {
int mode; /* private, vepa, bridge, passthru */ int mode; /* private, vepa, bridge, passthru */
}; };
/* Contains information about the physical network device as seen from the host.
* @ifindex : The ifindex of the physical network device in the host's network
* namespace.
*/
struct ifla_phys {
int ifindex;
};
union netdev_p { union netdev_p {
struct ifla_macvlan macvlan_attr;
struct ifla_phys phys_attr;
struct ifla_veth veth_attr; struct ifla_veth veth_attr;
struct ifla_vlan vlan_attr; struct ifla_vlan vlan_attr;
struct ifla_macvlan macvlan_attr;
}; };
/* /*
* Defines a structure to configure a network device * Defines a structure to configure a network device
* @link : lxc.net.[i].link, name of bridge or host iface to attach if any * @idx : network counter
* @name : lxc.net.[i].name, name of iface on the container side * @ifindex : ifindex of the network device
* @flags : flag of the network device (IFF_UP, ... ) * Note that this is the ifindex of the network device in
* @ipv4 : a list of ipv4 addresses to be set on the network device * the container's network namespace. If the network device
* @ipv6 : a list of ipv6 addresses to be set on the network device * consists of a pair of network devices (e.g. veth pairs
* @upscript : a script filename to be executed during interface configuration * attached to a network bridge) then this index cannot be
* @downscript : a script filename to be executed during interface destruction * used to identify or modify the host veth device. See
* @idx : network counter * struct ifla_veth for the host side information.
* @type : network type (veth, macvlan, vlan, ...)
* @flags : flag of the network device (IFF_UP, ... )
* @link : lxc.net.[i].link, name of bridge or host iface to attach
* if any
* @name : lxc.net.[i].name, name of iface on the container side
* @hwaddr : mac address
* @mtu : maximum transmission unit
* @priv : information specific to the specificed network type
* Note that this is a union so whether accessing a struct
* is possible is dependent on the network type.
* @ipv4 : a list of ipv4 addresses to be set on the network device
* @ipv6 : a list of ipv6 addresses to be set on the network device
* @ipv4_gateway_auto : whether the ipv4 gateway is to be automatically gathered
* from the associated @link
* @ipv4_gateway : ipv4 gateway
* @ipv6_gateway_auto : whether the ipv6 gateway is to be automatically gathered
* from the associated @link
* @ipv6_gateway : ipv6 gateway
* @upscript : a script filename to be executed during interface
* configuration
* @downscript : a script filename to be executed during interface
* destruction
*/ */
struct lxc_netdev { struct lxc_netdev {
ssize_t idx; ssize_t idx;
int ifindex;
int type; int type;
int flags; int flags;
int ifindex; char link[IFNAMSIZ];
char *link; char name[IFNAMSIZ];
char *name;
char *hwaddr; char *hwaddr;
char *mtu; char *mtu;
union netdev_p priv; union netdev_p priv;
struct lxc_list ipv4; struct lxc_list ipv4;
struct lxc_list ipv6; struct lxc_list ipv6;
struct in_addr *ipv4_gateway;
bool ipv4_gateway_auto; bool ipv4_gateway_auto;
struct in6_addr *ipv6_gateway; struct in_addr *ipv4_gateway;
bool ipv6_gateway_auto; bool ipv6_gateway_auto;
struct in6_addr *ipv6_gateway;
char *upscript; char *upscript;
char *downscript; char *downscript;
}; };
struct saved_nic {
int ifindex;
char *orig_name;
};
/* Convert a string mac address to a socket structure. */ /* Convert a string mac address to a socket structure. */
extern int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr); extern int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr);
/* /* Move a device between namespaces. */
* Move a device between namespaces extern int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname);
*/ extern int lxc_netdev_move_by_name(const char *ifname, pid_t pid,
extern int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char* ifname); const char *newname);
extern int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname);
/* /* Delete a network device. */
* Delete a network device
*/
extern int lxc_netdev_delete_by_name(const char *name); extern int lxc_netdev_delete_by_name(const char *name);
extern int lxc_netdev_delete_by_index(int ifindex); extern int lxc_netdev_delete_by_index(int ifindex);
...@@ -251,19 +286,28 @@ extern int lxc_neigh_proxy_off(const char *name, int family); ...@@ -251,19 +286,28 @@ extern int lxc_neigh_proxy_off(const char *name, int family);
/* Generate a new unique network interface name. /* Generate a new unique network interface name.
* Allocated memory must be freed by caller. * Allocated memory must be freed by caller.
*/ */
extern char *lxc_mkifname(const char *template); extern char *lxc_mkifname(char *template);
extern const char *lxc_net_type_to_str(int type); extern const char *lxc_net_type_to_str(int type);
extern int setup_private_host_hw_addr(char *veth1); extern int setup_private_host_hw_addr(char *veth1);
extern int netdev_get_mtu(int ifindex); extern int netdev_get_mtu(int ifindex);
extern int lxc_create_network_priv(struct lxc_handler *handler); extern int lxc_create_network_priv(struct lxc_handler *handler);
extern bool lxc_delete_network(struct lxc_handler *handler); extern int lxc_network_move_created_netdev_priv(const char *lxcpath,
char *lxcname,
struct lxc_list *network,
pid_t pid);
extern bool lxc_delete_network_priv(struct lxc_handler *handler);
extern bool lxc_delete_network_unpriv(struct lxc_handler *handler);
extern int lxc_find_gateway_addresses(struct lxc_handler *handler); extern int lxc_find_gateway_addresses(struct lxc_handler *handler);
extern int lxc_create_network(const char *lxcpath, char *lxcname, extern int lxc_create_network_unpriv(const char *lxcpath, char *lxcname,
struct lxc_list *network, pid_t pid); struct lxc_list *network, pid_t pid);
extern int lxc_requests_empty_network(struct lxc_handler *handler); extern int lxc_requests_empty_network(struct lxc_handler *handler);
extern void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf); extern int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler);
extern int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf, extern int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
struct lxc_list *network); struct lxc_list *network);
extern int lxc_network_send_veth_names_to_child(struct lxc_handler *handler);
extern int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler);
extern int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler);
extern int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler);
#endif /* __LXC_NETWORK_H */ #endif /* __LXC_NETWORK_H */
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
* *
* Authors: * Authors:
* Daniel Lezcano <daniel.lezcano at free.fr> * Daniel Lezcano <daniel.lezcano at free.fr>
* Serge Hallyn <serge@hallyn.com>
* Christian Brauner <christian.brauner@ubuntu.com>
* *
* This library is free software; you can redistribute it and/or * This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public * modify it under the terms of the GNU Lesser General Public
...@@ -60,6 +62,7 @@ ...@@ -60,6 +62,7 @@
#include "commands.h" #include "commands.h"
#include "commands_utils.h" #include "commands_utils.h"
#include "conf.h" #include "conf.h"
#include "confile_utils.h"
#include "console.h" #include "console.h"
#include "error.h" #include "error.h"
#include "log.h" #include "log.h"
...@@ -81,7 +84,7 @@ ...@@ -81,7 +84,7 @@
lxc_log_define(lxc_start, lxc); lxc_log_define(lxc_start, lxc);
extern void mod_all_rdeps(struct lxc_container *c, bool inc); extern void mod_all_rdeps(struct lxc_container *c, bool inc);
static bool do_destroy_container(struct lxc_conf *conf); static bool do_destroy_container(struct lxc_handler *handler);
static int lxc_rmdir_onedev_wrapper(void *data); static int lxc_rmdir_onedev_wrapper(void *data);
static void lxc_destroy_container_on_signal(struct lxc_handler *handler, static void lxc_destroy_container_on_signal(struct lxc_handler *handler,
const char *name); const char *name);
...@@ -531,7 +534,12 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf, ...@@ -531,7 +534,12 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf,
memset(handler, 0, sizeof(*handler)); memset(handler, 0, sizeof(*handler));
handler->ttysock[0] = handler->ttysock[1] = -1; /* Note that am_unpriv() checks the effective uid. We probably don't
* care if we are real root only if we are running as root so this
* should be fine.
*/
handler->am_root = !am_unpriv();
handler->data_sock[0] = handler->data_sock[1] = -1;
handler->conf = conf; handler->conf = conf;
handler->lxcpath = lxcpath; handler->lxcpath = lxcpath;
handler->pinfd = -1; handler->pinfd = -1;
...@@ -755,9 +763,9 @@ void lxc_fini(const char *name, struct lxc_handler *handler) ...@@ -755,9 +763,9 @@ void lxc_fini(const char *name, struct lxc_handler *handler)
free(cur); free(cur);
} }
if (handler->ttysock[0] != -1) { if (handler->data_sock[0] != -1) {
close(handler->ttysock[0]); close(handler->data_sock[0]);
close(handler->ttysock[1]); close(handler->data_sock[1]);
} }
if (handler->conf->ephemeral == 1 && handler->conf->reboot != 1) if (handler->conf->ephemeral == 1 && handler->conf->reboot != 1)
...@@ -848,51 +856,6 @@ static int must_drop_cap_sys_boot(struct lxc_conf *conf) ...@@ -848,51 +856,6 @@ static int must_drop_cap_sys_boot(struct lxc_conf *conf)
return 0; return 0;
} }
/* netpipe is used in the unprivileged case to transfer the ifindexes from
* parent to child
*/
static int netpipe = -1;
static inline int count_veths(struct lxc_list *network)
{
struct lxc_list *iterator;
struct lxc_netdev *netdev;
int count = 0;
lxc_list_for_each(iterator, network) {
netdev = iterator->elem;
if (netdev->type != LXC_NET_VETH)
continue;
count++;
}
return count;
}
static int read_unpriv_netifindex(struct lxc_list *network)
{
struct lxc_list *iterator;
struct lxc_netdev *netdev;
if (netpipe == -1)
return 0;
lxc_list_for_each(iterator, network) {
netdev = iterator->elem;
if (netdev->type != LXC_NET_VETH)
continue;
if (!(netdev->name = malloc(IFNAMSIZ))) {
ERROR("Out of memory.");
close(netpipe);
return -1;
}
if (read(netpipe, netdev->name, IFNAMSIZ) != IFNAMSIZ) {
close(netpipe);
return -1;
}
}
close(netpipe);
return 0;
}
static int do_start(void *data) static int do_start(void *data)
{ {
struct lxc_list *iterator; struct lxc_list *iterator;
...@@ -945,8 +908,10 @@ static int do_start(void *data) ...@@ -945,8 +908,10 @@ static int do_start(void *data)
if (lxc_sync_barrier_parent(handler, LXC_SYNC_CONFIGURE)) if (lxc_sync_barrier_parent(handler, LXC_SYNC_CONFIGURE))
return -1; return -1;
if (read_unpriv_netifindex(&handler->conf->network) < 0) if (lxc_network_recv_veth_names_from_parent(handler) < 0) {
ERROR("Failed to receive veth names from parent");
goto out_warn_father; goto out_warn_father;
}
/* If we are in a new user namespace, become root there to have /* If we are in a new user namespace, become root there to have
* privilege over our namespace. * privilege over our namespace.
...@@ -1025,7 +990,10 @@ static int do_start(void *data) ...@@ -1025,7 +990,10 @@ static int do_start(void *data)
} }
/* Setup the container, ip, names, utsname, ... */ /* Setup the container, ip, names, utsname, ... */
if (lxc_setup(handler)) { ret = lxc_setup(handler);
close(handler->data_sock[0]);
close(handler->data_sock[1]);
if (ret < 0) {
ERROR("Failed to setup container \"%s\".", handler->name); ERROR("Failed to setup container \"%s\".", handler->name);
goto out_warn_father; goto out_warn_father;
} }
...@@ -1159,46 +1127,14 @@ out_error: ...@@ -1159,46 +1127,14 @@ out_error:
return -1; return -1;
} }
static int save_phys_nics(struct lxc_conf *conf)
{
struct lxc_list *iterator;
int am_root = (getuid() == 0);
if (!am_root)
return 0;
lxc_list_for_each(iterator, &conf->network) {
struct lxc_netdev *netdev = iterator->elem;
if (netdev->type != LXC_NET_PHYS)
continue;
conf->saved_nics = realloc(conf->saved_nics,
(conf->num_savednics+1)*sizeof(struct saved_nic));
if (!conf->saved_nics)
return -1;
conf->saved_nics[conf->num_savednics].ifindex = netdev->ifindex;
conf->saved_nics[conf->num_savednics].orig_name = strdup(netdev->link);
if (!conf->saved_nics[conf->num_savednics].orig_name)
return -1;
INFO("Stored saved_nic #%d idx %d name %s.", conf->num_savednics,
conf->saved_nics[conf->num_savednics].ifindex,
conf->saved_nics[conf->num_savednics].orig_name);
conf->num_savednics++;
}
return 0;
}
static int lxc_recv_ttys_from_child(struct lxc_handler *handler) static int lxc_recv_ttys_from_child(struct lxc_handler *handler)
{ {
int i; int i;
int *ttyfds;
struct lxc_pty_info *pty_info; struct lxc_pty_info *pty_info;
int ret = -1; int ret = -1;
int sock = handler->ttysock[1]; int sock = handler->data_sock[1];
struct lxc_conf *conf = handler->conf; struct lxc_conf *conf = handler->conf;
struct lxc_tty_info *tty_info = &conf->tty_info; struct lxc_tty_info *tty_info = &conf->tty_info;
size_t num_ttyfds = (2 * conf->tty);
if (!conf->tty) if (!conf->tty)
return 0; return 0;
...@@ -1207,29 +1143,27 @@ static int lxc_recv_ttys_from_child(struct lxc_handler *handler) ...@@ -1207,29 +1143,27 @@ static int lxc_recv_ttys_from_child(struct lxc_handler *handler)
if (!tty_info->pty_info) if (!tty_info->pty_info)
return -1; return -1;
ttyfds = malloc(num_ttyfds * sizeof(int)); for (i = 0; i < conf->tty; i++) {
if (!ttyfds) int ttyfds[2];
return -1;
ret = lxc_abstract_unix_recv_fds(sock, ttyfds, 2, NULL, 0);
if (ret < 0)
break;
ret = lxc_abstract_unix_recv_fds(sock, ttyfds, num_ttyfds, NULL, 0); pty_info = &tty_info->pty_info[i];
for (i = 0; (ret >= 0 && *ttyfds != -1) && (i < num_ttyfds); i++) {
pty_info = &tty_info->pty_info[i / 2];
pty_info->busy = 0; pty_info->busy = 0;
pty_info->slave = ttyfds[i++]; pty_info->master = ttyfds[0];
pty_info->master = ttyfds[i]; pty_info->slave = ttyfds[1];
TRACE("received pty with master fd %d and slave fd %d from " TRACE("Received pty with master fd %d and slave fd %d from "
"parent", pty_info->master, pty_info->slave); "parent", pty_info->master, pty_info->slave);
} }
tty_info->nbtty = conf->tty;
free(ttyfds);
if (ret < 0) if (ret < 0)
ERROR("failed to receive %d ttys from child: %s", conf->tty, ERROR("Failed to receive %d ttys from child: %s", conf->tty,
strerror(errno)); strerror(errno));
else else
TRACE("received %d ttys from child", conf->tty); TRACE("Received %d ttys from child", conf->tty);
tty_info->nbtty = conf->tty;
return ret; return ret;
} }
...@@ -1268,16 +1202,14 @@ void resolve_clone_flags(struct lxc_handler *handler) ...@@ -1268,16 +1202,14 @@ void resolve_clone_flags(struct lxc_handler *handler)
*/ */
static int lxc_spawn(struct lxc_handler *handler) static int lxc_spawn(struct lxc_handler *handler)
{ {
int failed_before_rename = 0; int i, flags, ret;
const char *name = handler->name; const char *name = handler->name;
bool cgroups_connected = false;
int saved_ns_fd[LXC_NS_MAX];
int preserve_mask = 0, i, flags;
int netpipepair[2], nveths;
bool wants_to_map_ids; bool wants_to_map_ids;
int saved_ns_fd[LXC_NS_MAX];
struct lxc_list *id_map; struct lxc_list *id_map;
int failed_before_rename = 0, preserve_mask = 0;
bool cgroups_connected = false;
netpipe = -1;
id_map = &handler->conf->id_map; id_map = &handler->conf->id_map;
wants_to_map_ids = !lxc_list_empty(id_map); wants_to_map_ids = !lxc_list_empty(id_map);
...@@ -1288,7 +1220,9 @@ static int lxc_spawn(struct lxc_handler *handler) ...@@ -1288,7 +1220,9 @@ static int lxc_spawn(struct lxc_handler *handler)
if (lxc_sync_init(handler)) if (lxc_sync_init(handler))
return -1; return -1;
if (socketpair(AF_UNIX, SOCK_DGRAM, 0, handler->ttysock) < 0) { ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0,
handler->data_sock);
if (ret < 0) {
lxc_sync_fini(handler); lxc_sync_fini(handler);
return -1; return -1;
} }
...@@ -1318,11 +1252,6 @@ static int lxc_spawn(struct lxc_handler *handler) ...@@ -1318,11 +1252,6 @@ static int lxc_spawn(struct lxc_handler *handler)
return -1; return -1;
} }
} }
if (save_phys_nics(handler->conf)) {
ERROR("Failed to save physical nic info.");
goto out_abort;
}
} }
if (!cgroup_init(handler)) { if (!cgroup_init(handler)) {
...@@ -1353,15 +1282,6 @@ static int lxc_spawn(struct lxc_handler *handler) ...@@ -1353,15 +1282,6 @@ static int lxc_spawn(struct lxc_handler *handler)
if (attach_ns(handler->conf->inherit_ns_fd) < 0) if (attach_ns(handler->conf->inherit_ns_fd) < 0)
goto out_delete_net; goto out_delete_net;
if (am_unpriv() && (nveths = count_veths(&handler->conf->network))) {
if (pipe(netpipepair) < 0) {
SYSERROR("Failed to create pipe.");
goto out_delete_net;
}
/* Store netpipe in the global var for do_start's use. */
netpipe = netpipepair[0];
}
/* Create a process in a new set of namespaces. */ /* Create a process in a new set of namespaces. */
flags = handler->clone_flags; flags = handler->clone_flags;
if (handler->clone_flags & CLONE_NEWUSER) { if (handler->clone_flags & CLONE_NEWUSER) {
...@@ -1377,6 +1297,7 @@ static int lxc_spawn(struct lxc_handler *handler) ...@@ -1377,6 +1297,7 @@ static int lxc_spawn(struct lxc_handler *handler)
SYSERROR("Failed to clone a new set of namespaces."); SYSERROR("Failed to clone a new set of namespaces.");
goto out_delete_net; goto out_delete_net;
} }
for (i = 0; i < LXC_NS_MAX; i++) for (i = 0; i < LXC_NS_MAX; i++)
if (flags & ns_info[i].clone_flag) if (flags & ns_info[i].clone_flag)
INFO("Cloned %s.", ns_info[i].flag_name); INFO("Cloned %s.", ns_info[i].flag_name);
...@@ -1428,30 +1349,33 @@ static int lxc_spawn(struct lxc_handler *handler) ...@@ -1428,30 +1349,33 @@ static int lxc_spawn(struct lxc_handler *handler)
if (failed_before_rename) if (failed_before_rename)
goto out_delete_net; goto out_delete_net;
handler->netnsfd = lxc_preserve_ns(handler->pid, "net");
if (handler->netnsfd < 0) {
ERROR("Failed to preserve network namespace");
goto out_delete_net;
}
/* Create the network configuration. */ /* Create the network configuration. */
if (handler->clone_flags & CLONE_NEWNET) { if (handler->clone_flags & CLONE_NEWNET) {
if (lxc_create_network(handler->lxcpath, handler->name, if (lxc_network_move_created_netdev_priv(handler->lxcpath,
&handler->conf->network, handler->pid)) { handler->name,
&handler->conf->network,
handler->pid)) {
ERROR("Failed to create the configured network."); ERROR("Failed to create the configured network.");
goto out_delete_net; goto out_delete_net;
} }
}
if (netpipe != -1) { if (lxc_create_network_unpriv(handler->lxcpath, handler->name,
struct lxc_list *iterator; &handler->conf->network,
struct lxc_netdev *netdev; handler->pid)) {
ERROR("Failed to create the configured network.");
close(netpipe); goto out_delete_net;
lxc_list_for_each(iterator, &handler->conf->network) {
netdev = iterator->elem;
if (netdev->type != LXC_NET_VETH)
continue;
if (write(netpipepair[1], netdev->name, IFNAMSIZ) != IFNAMSIZ) {
ERROR("Error writing veth name to container.");
goto out_delete_net;
}
} }
close(netpipepair[1]); }
if (lxc_network_send_veth_names_to_child(handler) < 0) {
ERROR("Failed to send veth names to child");
goto out_delete_net;
} }
/* Tell the child to continue its initialization. We'll get /* Tell the child to continue its initialization. We'll get
...@@ -1481,6 +1405,19 @@ static int lxc_spawn(struct lxc_handler *handler) ...@@ -1481,6 +1405,19 @@ static int lxc_spawn(struct lxc_handler *handler)
if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CGROUP)) if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CGROUP))
return -1; return -1;
if (lxc_network_recv_name_and_ifindex_from_child(handler) < 0) {
ERROR("Failed to receive names and ifindices for network "
"devices from child");
goto out_delete_net;
}
/* Now all networks are created, network devices are moved into place,
* and the correct names and ifindeces in the respective namespaces have
* been recorded. The corresponding structs have now all been filled. So
* log them for debugging purposes.
*/
lxc_log_configured_netdevs(handler->conf);
/* Read tty fds allocated by child. */ /* Read tty fds allocated by child. */
if (lxc_recv_ttys_from_child(handler) < 0) { if (lxc_recv_ttys_from_child(handler) < 0) {
ERROR("Failed to receive tty info from child process."); ERROR("Failed to receive tty info from child process.");
...@@ -1497,15 +1434,22 @@ static int lxc_spawn(struct lxc_handler *handler) ...@@ -1497,15 +1434,22 @@ static int lxc_spawn(struct lxc_handler *handler)
} }
lxc_sync_fini(handler); lxc_sync_fini(handler);
handler->netnsfd = lxc_preserve_ns(handler->pid, "net");
return 0; return 0;
out_delete_net: out_delete_net:
if (cgroups_connected) if (cgroups_connected)
cgroup_disconnect(); cgroup_disconnect();
if (handler->clone_flags & CLONE_NEWNET)
lxc_delete_network(handler); if (handler->clone_flags & CLONE_NEWNET) {
DEBUG("Tearing down network devices");
if (!lxc_delete_network_priv(handler))
DEBUG("Failed tearing down network devices");
if (!lxc_delete_network_unpriv(handler))
DEBUG("Failed tearing down network devices");
}
out_abort: out_abort:
lxc_abort(name, handler); lxc_abort(name, handler);
lxc_sync_fini(handler); lxc_sync_fini(handler);
...@@ -1514,6 +1458,11 @@ out_abort: ...@@ -1514,6 +1458,11 @@ out_abort:
handler->pinfd = -1; handler->pinfd = -1;
} }
if (handler->netnsfd >= 0) {
close(handler->netnsfd);
handler->netnsfd = -1;
}
return -1; return -1;
} }
...@@ -1523,7 +1472,6 @@ int __lxc_start(const char *name, struct lxc_handler *handler, ...@@ -1523,7 +1472,6 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
{ {
int status; int status;
int err = -1; int err = -1;
bool removed_all_netdevs = true;
struct lxc_conf *conf = handler->conf; struct lxc_conf *conf = handler->conf;
if (lxc_init(name, handler) < 0) { if (lxc_init(name, handler) < 0) {
...@@ -1580,10 +1528,6 @@ int __lxc_start(const char *name, struct lxc_handler *handler, ...@@ -1580,10 +1528,6 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
err = lxc_poll(name, handler); err = lxc_poll(name, handler);
if (err) { if (err) {
ERROR("LXC mainloop exited with error: %d.", err); ERROR("LXC mainloop exited with error: %d.", err);
if (handler->netnsfd >= 0) {
close(handler->netnsfd);
handler->netnsfd = -1;
}
goto out_abort; goto out_abort;
} }
...@@ -1612,11 +1556,10 @@ int __lxc_start(const char *name, struct lxc_handler *handler, ...@@ -1612,11 +1556,10 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
} }
} }
DEBUG("Pushing physical nics back to host namespace"); err = lxc_restore_phys_nics_to_netns(handler);
lxc_restore_phys_nics_to_netns(handler->netnsfd, handler->conf); if (err < 0)
ERROR("Failed to move physical network devices back to parent "
DEBUG("Tearing down virtual network devices used by container \"%s\".", name); "network namespace");
removed_all_netdevs = lxc_delete_network(handler);
if (handler->pinfd >= 0) { if (handler->pinfd >= 0) {
close(handler->pinfd); close(handler->pinfd);
...@@ -1625,12 +1568,18 @@ int __lxc_start(const char *name, struct lxc_handler *handler, ...@@ -1625,12 +1568,18 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
lxc_monitor_send_exit_code(name, status, handler->lxcpath); lxc_monitor_send_exit_code(name, status, handler->lxcpath);
err = lxc_error_set_and_log(handler->pid, status); err = lxc_error_set_and_log(handler->pid, status);
out_fini: out_fini:
if (!removed_all_netdevs) { DEBUG("Tearing down network devices");
DEBUG("Failed tearing down network devices used by container. Trying again!"); if (!lxc_delete_network_priv(handler))
removed_all_netdevs = lxc_delete_network(handler); DEBUG("Failed tearing down network devices");
if (!removed_all_netdevs)
DEBUG("Failed tearing down network devices used by container. Not trying again!"); if (!lxc_delete_network_unpriv(handler))
DEBUG("Failed tearing down network devices");
if (handler->netnsfd >= 0) {
close(handler->netnsfd);
handler->netnsfd = -1;
} }
out_detach_blockdev: out_detach_blockdev:
...@@ -1692,7 +1641,7 @@ static void lxc_destroy_container_on_signal(struct lxc_handler *handler, ...@@ -1692,7 +1641,7 @@ static void lxc_destroy_container_on_signal(struct lxc_handler *handler,
int ret = 0; int ret = 0;
struct lxc_container *c; struct lxc_container *c;
if (handler->conf->rootfs.path && handler->conf->rootfs.mount) { if (handler->conf->rootfs.path && handler->conf->rootfs.mount) {
bret = do_destroy_container(handler->conf); bret = do_destroy_container(handler);
if (!bret) { if (!bret) {
ERROR("Error destroying rootfs for container \"%s\".", name); ERROR("Error destroying rootfs for container \"%s\".", name);
return; return;
...@@ -1718,7 +1667,7 @@ static void lxc_destroy_container_on_signal(struct lxc_handler *handler, ...@@ -1718,7 +1667,7 @@ static void lxc_destroy_container_on_signal(struct lxc_handler *handler,
} }
} }
if (am_unpriv()) if (!handler->am_root)
ret = userns_exec_1(handler->conf, lxc_rmdir_onedev_wrapper, ret = userns_exec_1(handler->conf, lxc_rmdir_onedev_wrapper,
destroy, "lxc_rmdir_onedev_wrapper"); destroy, "lxc_rmdir_onedev_wrapper");
else else
...@@ -1737,14 +1686,14 @@ static int lxc_rmdir_onedev_wrapper(void *data) ...@@ -1737,14 +1686,14 @@ static int lxc_rmdir_onedev_wrapper(void *data)
return lxc_rmdir_onedev(arg, NULL); return lxc_rmdir_onedev(arg, NULL);
} }
static bool do_destroy_container(struct lxc_conf *conf) { static bool do_destroy_container(struct lxc_handler *handler) {
if (am_unpriv()) { if (!handler->am_root) {
if (userns_exec_1(conf, storage_destroy_wrapper, conf, if (userns_exec_1(handler->conf, storage_destroy_wrapper,
"storage_destroy_wrapper") < 0) handler->conf, "storage_destroy_wrapper") < 0)
return false; return false;
return true; return true;
} }
return storage_destroy(conf); return storage_destroy(handler->conf);
} }
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
* *
* Authors: * Authors:
* Daniel Lezcano <daniel.lezcano at free.fr> * Daniel Lezcano <daniel.lezcano at free.fr>
* Serge Hallyn <serge@hallyn.com>
* Christian Brauner <christian.brauner@ubuntu.com>
* *
* This library is free software; you can redistribute it and/or * This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public * modify it under the terms of the GNU Lesser General Public
...@@ -24,39 +26,86 @@ ...@@ -24,39 +26,86 @@
#define __LXC_START_H #define __LXC_START_H
#include <signal.h> #include <signal.h>
#include <stdbool.h>
#include <sys/param.h> #include <sys/param.h>
#include <sys/socket.h> #include <sys/socket.h>
#include <sys/un.h> #include <sys/un.h>
#include <stdbool.h>
#include "conf.h" #include "conf.h"
#include "config.h" #include "config.h"
#include "state.h"
#include "namespace.h" #include "namespace.h"
#include "state.h"
struct lxc_handler { struct lxc_handler {
pid_t pid; /* The clone flags that were requested. */
char *name;
lxc_state_t state;
int clone_flags; int clone_flags;
int sigfd;
sigset_t oldmask; /* File descriptors referring to the network namespace of the container. */
struct lxc_conf *conf; int netnsfd;
struct lxc_operations *ops;
void *data; /* File descriptor to pin the rootfs for privileged containers. */
int sv[2];
int pinfd; int pinfd;
const char *lxcpath;
void *cgroup_data; /* Signal file descriptor. */
int ttysock[2]; // socketpair for child->parent tty fd passing int sigfd;
bool backgrounded; // indicates whether should we close std{in,out,err} on start
/* List of file descriptors referring to the namespaces of the
* container. Note that these are not necessarily identical to
* the "clone_flags" handler field in case namespace inheritance is
* requested.
*/
int nsfd[LXC_NS_MAX]; int nsfd[LXC_NS_MAX];
int netnsfd;
/* The socketpair() fds used to wait on successful daemonized /* Abstract unix domain SOCK_DGRAM socketpair to pass arbitrary data
* startup. * between child and parent.
*/ */
int data_sock[2];
/* The socketpair() fds used to wait on successful daemonized startup. */
int state_socket_pair[2]; int state_socket_pair[2];
/* Socketpair to synchronize processes during container creation. */
int sync_sock[2];
/* The name of the container. */
char *name;
/* The path the container is running in. */
const char *lxcpath;
/* Whether the container's startup process euid is 0. */
bool am_root;
/* Indicates whether should we close std{in,out,err} on start. */
bool backgrounded;
/* The child's pid. */
pid_t pid;
/* The signal mask prior to setting up the signal file descriptor. */
sigset_t oldmask;
/* The container's in-memory configuration. */
struct lxc_conf *conf;
/* A list of clients registered to be informed about a container state. */
struct lxc_list state_clients; struct lxc_list state_clients;
/* A set of operations to be performed at various stages of the
* container's life.
*/
struct lxc_operations *ops;
/* This holds the cgroup information. Note that the data here is
* specific to the cgroup driver used.
*/
void *cgroup_data;
/* Data to be passed to handler ops. */
void *data;
/* Current state of the container. */
lxc_state_t state;
}; };
struct lxc_operations { struct lxc_operations {
...@@ -89,8 +138,8 @@ extern void lxc_fini(const char *name, struct lxc_handler *handler); ...@@ -89,8 +138,8 @@ extern void lxc_fini(const char *name, struct lxc_handler *handler);
*/ */
extern int lxc_check_inherited(struct lxc_conf *conf, bool closeall, extern int lxc_check_inherited(struct lxc_conf *conf, bool closeall,
int *fds_to_ignore, size_t len_fds); int *fds_to_ignore, size_t len_fds);
int __lxc_start(const char *, struct lxc_handler *, struct lxc_operations *, extern int __lxc_start(const char *, struct lxc_handler *,
void *, const char *, bool); struct lxc_operations *, void *, const char *, bool);
extern void resolve_clone_flags(struct lxc_handler *handler); extern void resolve_clone_flags(struct lxc_handler *handler);
#endif #endif
......
...@@ -86,63 +86,63 @@ static int __sync_barrier(int fd, int sequence) ...@@ -86,63 +86,63 @@ static int __sync_barrier(int fd, int sequence)
int lxc_sync_barrier_parent(struct lxc_handler *handler, int sequence) int lxc_sync_barrier_parent(struct lxc_handler *handler, int sequence)
{ {
return __sync_barrier(handler->sv[0], sequence); return __sync_barrier(handler->sync_sock[0], sequence);
} }
int lxc_sync_barrier_child(struct lxc_handler *handler, int sequence) int lxc_sync_barrier_child(struct lxc_handler *handler, int sequence)
{ {
return __sync_barrier(handler->sv[1], sequence); return __sync_barrier(handler->sync_sock[1], sequence);
} }
int lxc_sync_wake_parent(struct lxc_handler *handler, int sequence) int lxc_sync_wake_parent(struct lxc_handler *handler, int sequence)
{ {
return __sync_wake(handler->sv[0], sequence); return __sync_wake(handler->sync_sock[0], sequence);
} }
int lxc_sync_wait_parent(struct lxc_handler *handler, int sequence) int lxc_sync_wait_parent(struct lxc_handler *handler, int sequence)
{ {
return __sync_wait(handler->sv[0], sequence); return __sync_wait(handler->sync_sock[0], sequence);
} }
int lxc_sync_wait_child(struct lxc_handler *handler, int sequence) int lxc_sync_wait_child(struct lxc_handler *handler, int sequence)
{ {
return __sync_wait(handler->sv[1], sequence); return __sync_wait(handler->sync_sock[1], sequence);
} }
int lxc_sync_wake_child(struct lxc_handler *handler, int sequence) int lxc_sync_wake_child(struct lxc_handler *handler, int sequence)
{ {
return __sync_wake(handler->sv[1], sequence); return __sync_wake(handler->sync_sock[1], sequence);
} }
int lxc_sync_init(struct lxc_handler *handler) int lxc_sync_init(struct lxc_handler *handler)
{ {
int ret; int ret;
ret = socketpair(AF_LOCAL, SOCK_STREAM, 0, handler->sv); ret = socketpair(AF_LOCAL, SOCK_STREAM, 0, handler->sync_sock);
if (ret) { if (ret) {
SYSERROR("failed to create synchronization socketpair"); SYSERROR("failed to create synchronization socketpair");
return -1; return -1;
} }
/* Be sure we don't inherit this after the exec */ /* Be sure we don't inherit this after the exec */
fcntl(handler->sv[0], F_SETFD, FD_CLOEXEC); fcntl(handler->sync_sock[0], F_SETFD, FD_CLOEXEC);
return 0; return 0;
} }
void lxc_sync_fini_child(struct lxc_handler *handler) void lxc_sync_fini_child(struct lxc_handler *handler)
{ {
if (handler->sv[0] != -1) { if (handler->sync_sock[0] != -1) {
close(handler->sv[0]); close(handler->sync_sock[0]);
handler->sv[0] = -1; handler->sync_sock[0] = -1;
} }
} }
void lxc_sync_fini_parent(struct lxc_handler *handler) void lxc_sync_fini_parent(struct lxc_handler *handler)
{ {
if (handler->sv[1] != -1) { if (handler->sync_sock[1] != -1) {
close(handler->sv[1]); close(handler->sync_sock[1]);
handler->sv[1] = -1; handler->sync_sock[1] = -1;
} }
} }
......
...@@ -2406,3 +2406,24 @@ bool has_fs_type(const char *path, fs_type_magic magic_val) ...@@ -2406,3 +2406,24 @@ bool has_fs_type(const char *path, fs_type_magic magic_val)
return has_type; return has_type;
} }
bool lxc_nic_exists(char *nic)
{
#define __LXC_SYS_CLASS_NET_LEN 15 + IFNAMSIZ + 1
char path[__LXC_SYS_CLASS_NET_LEN];
int ret;
struct stat sb;
if (!strcmp(nic, "none"))
return true;
ret = snprintf(path, __LXC_SYS_CLASS_NET_LEN, "/sys/class/net/%s", nic);
if (ret < 0 || (size_t)ret >= __LXC_SYS_CLASS_NET_LEN)
return false;
ret = stat(path, &sb);
if (ret < 0)
return false;
return true;
}
...@@ -390,7 +390,8 @@ void *must_realloc(void *orig, size_t sz); ...@@ -390,7 +390,8 @@ void *must_realloc(void *orig, size_t sz);
/* __typeof__ should be safe to use with all compilers. */ /* __typeof__ should be safe to use with all compilers. */
typedef __typeof__(((struct statfs *)NULL)->f_type) fs_type_magic; typedef __typeof__(((struct statfs *)NULL)->f_type) fs_type_magic;
bool has_fs_type(const char *path, fs_type_magic magic_val); extern bool has_fs_type(const char *path, fs_type_magic magic_val);
bool is_fs_type(const struct statfs *fs, fs_type_magic magic_val); extern bool is_fs_type(const struct statfs *fs, fs_type_magic magic_val);
extern bool lxc_nic_exists(char *nic);
#endif /* __LXC_UTILS_H */ #endif /* __LXC_UTILS_H */
...@@ -92,7 +92,15 @@ configure_ubuntu() ...@@ -92,7 +92,15 @@ configure_ubuntu()
password=$5 password=$5
# configure the network using the dhcp # configure the network using the dhcp
cat <<EOF > $rootfs/etc/network/interfaces if chroot $rootfs which netplan >/dev/null 2>&1; then
cat <<EOF > $rootfs/etc/netplan/10-lxc.yaml
network:
ethernets:
eth0: {dhcp4: true}
version: 2
EOF
else
cat <<EOF > $rootfs/etc/network/interfaces
# This file describes the network interfaces available on your system # This file describes the network interfaces available on your system
# and how to activate them. For more information, see interfaces(5). # and how to activate them. For more information, see interfaces(5).
...@@ -103,6 +111,7 @@ iface lo inet loopback ...@@ -103,6 +111,7 @@ iface lo inet loopback
auto eth0 auto eth0
iface eth0 inet dhcp iface eth0 inet dhcp
EOF EOF
fi
# set the hostname # set the hostname
cat <<EOF > $rootfs/etc/hostname cat <<EOF > $rootfs/etc/hostname
...@@ -366,7 +375,13 @@ download_ubuntu() ...@@ -366,7 +375,13 @@ download_ubuntu()
debootstrap_parameters="$debootstrap_parameters --variant=$variant" debootstrap_parameters="$debootstrap_parameters --variant=$variant"
fi fi
if [ "$variant" = 'minbase' ]; then if [ "$variant" = 'minbase' ]; then
packages_template="${packages_template},sudo,ifupdown,isc-dhcp-client" packages_template="${packages_template},sudo"
# Newer releases use netplan, EOL releases not supported
case $release in
trusty|xenial|zesty)
packages_template="${packages_template},ifupdown,isc-dhcp-client"
;;
esac
fi fi
echo "Installing packages in template: ${packages_template}" echo "Installing packages in template: ${packages_template}"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment