Unverified Commit 88e27cd6 by Stéphane Graber Committed by GitHub

Merge pull request #3699 from brauner/2021-02-26/network

network: fix networks with switched names
parents 32d40452 3a197a1b
......@@ -165,7 +165,7 @@ struct lxc_netdev *lxc_network_add(struct lxc_list *networks, int idx, bool tail
__do_free struct lxc_netdev *netdev = NULL;
/* network does not exist */
netdev = malloc(sizeof(*netdev));
netdev = zalloc(sizeof(*netdev));
if (!netdev)
return ret_set_errno(NULL, ENOMEM);
......@@ -177,7 +177,7 @@ struct lxc_netdev *lxc_network_add(struct lxc_list *networks, int idx, bool tail
netdev->idx = idx;
/* prepare new list */
newlist = malloc(sizeof(*newlist));
newlist = zalloc(sizeof(*newlist));
if (!newlist)
return ret_set_errno(NULL, ENOMEM);
......
......@@ -37,6 +37,7 @@
#include "network.h"
#include "nl.h"
#include "process_utils.h"
#include "string_utils.h"
#include "syscall_wrappers.h"
#include "utils.h"
......@@ -46,8 +47,74 @@
lxc_log_define(network, lxc);
typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
typedef int (*instantiate_ns_cb)(struct lxc_netdev *);
typedef int (*netdev_configure_server_cb)(struct lxc_handler *, struct lxc_netdev *);
typedef int (*netdev_configure_container_cb)(struct lxc_netdev *);
typedef int (*netdev_shutdown_server_cb)(struct lxc_handler *, struct lxc_netdev *);
const struct lxc_network_info {
const char *name;
const char template[IFNAMSIZ];
size_t template_len;
} lxc_network_info[LXC_NET_MAXCONFTYPE + 1] = {
[LXC_NET_EMPTY] = { "empty", "emptXXXXXX", STRLITERALLEN("emptXXXXXX") },
[LXC_NET_VETH] = { "veth", "vethXXXXXX", STRLITERALLEN("vethXXXXXX") },
[LXC_NET_MACVLAN] = { "macvlan", "macvXXXXXX", STRLITERALLEN("macvXXXXXX") },
[LXC_NET_IPVLAN] = { "ipvlan", "ipvlXXXXXX", STRLITERALLEN("ipvlXXXXXX") },
[LXC_NET_PHYS] = { "phys", "physXXXXXX", STRLITERALLEN("physXXXXXX") },
[LXC_NET_VLAN] = { "vlan", "vlanXXXXXX", STRLITERALLEN("vlanXXXXXX") },
[LXC_NET_NONE] = { "none", "noneXXXXXX", STRLITERALLEN("noneXXXXXX") },
[LXC_NET_MAXCONFTYPE] = { NULL, "", 0 }
};
const char *lxc_net_type_to_str(int type)
{
if (type < 0 || type > LXC_NET_MAXCONFTYPE)
return NULL;
return lxc_network_info[type].name;
}
static const char padchar[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
char *lxc_ifname_alnum_case_sensitive(char *template)
{
char name[IFNAMSIZ];
size_t i = 0;
#ifdef HAVE_RAND_R
unsigned int seed;
seed = randseed(false);
#else
(void)randseed(true);
#endif
if (strlen(template) >= IFNAMSIZ)
return NULL;
/* Generate random names until we find one that doesn't exist. */
for (;;) {
name[0] = '\0';
(void)strlcpy(name, template, IFNAMSIZ);
for (i = 0; i < strlen(name); i++) {
if (name[i] == 'X') {
#ifdef HAVE_RAND_R
name[i] = padchar[rand_r(&seed) % strlen(padchar)];
#else
name[i] = padchar[rand() % strlen(padchar)];
#endif
}
}
if (if_nametoindex(name) == 0)
break;
}
(void)strlcpy(template, name, strlen(template) + 1);
return template;
}
static const char loop_device[] = "lo";
static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
......@@ -537,7 +604,7 @@ static int setup_veth_ovs_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
return 0;
}
static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
static int netdev_configure_server_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
{
int err;
unsigned int mtu = 1500;
......@@ -591,11 +658,28 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
if (err)
return log_error_errno(-1, -err, "Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
/*
* Veth devices are directly created in the container's network
* namespace so the device doesn't need to be moved into the
* container's network namespace. Make this explicit by setting the
* devices ifindex to 0.
*/
netdev->ifindex = 0;
strlcpy(netdev->created_name, veth2, IFNAMSIZ);
/* changing the high byte of the mac address to 0xfe, the bridge interface
/*
* Since the device won't be moved transient name generation won't
* happen. But the transient name is needed for the container to
* retrieve the ifindex for the device.
*/
strlcpy(netdev->transient_name, veth2, IFNAMSIZ);
/*
* Changing the high byte of the mac address to 0xfe, the bridge interface
* will always keep the host's mac address and not take the mac address
* of a container */
* of a container.
*/
err = setup_private_host_hw_addr(veth1);
if (err) {
errno = -err;
......@@ -771,7 +855,7 @@ out_delete:
return -1;
}
static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
static int netdev_configure_server_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
{
char peer[IFNAMSIZ];
int err;
......@@ -837,7 +921,7 @@ static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *n
goto on_error;
}
DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
DEBUG("Instantiated macvlan \"%s\" with ifindex %d and mode %d",
peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
return 0;
......@@ -921,7 +1005,7 @@ static int lxc_ipvlan_create(const char *parent, const char *name, int mode, int
return netlink_transaction(nlh_ptr, nlmsg, answer);
}
static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
static int netdev_configure_server_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
{
char peer[IFNAMSIZ];
int err;
......@@ -985,7 +1069,7 @@ static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *ne
goto on_error;
}
DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d", peer,
DEBUG("Instantiated ipvlan \"%s\" with ifindex %d and mode %d", peer,
netdev->ifindex, netdev->priv.macvlan_attr.mode);
return 0;
......@@ -995,7 +1079,7 @@ on_error:
return -1;
}
static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
static int netdev_configure_server_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
{
char peer[IFNAMSIZ];
int err;
......@@ -1059,7 +1143,7 @@ static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netd
}
}
DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"", peer,
DEBUG("Instantiated vlan \"%s\" with ifindex \"%d\"", peer,
netdev->ifindex);
return 0;
......@@ -1069,7 +1153,7 @@ on_error:
return -1;
}
static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
static int netdev_configure_server_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
{
int err, mtu_orig = 0;
......@@ -1133,13 +1217,13 @@ static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netd
return -1;
}
DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link,
DEBUG("Instantiated phys \"%s\" with ifindex \"%d\"", netdev->link,
netdev->ifindex);
return 0;
}
static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
static int netdev_configure_server_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
{
int ret;
char *argv[] = {
......@@ -1159,45 +1243,44 @@ static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *net
return 0;
}
static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
static int netdev_configure_server_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
{
netdev->ifindex = 0;
return 0;
}
static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
[LXC_NET_VETH] = instantiate_veth,
[LXC_NET_MACVLAN] = instantiate_macvlan,
[LXC_NET_IPVLAN] = instantiate_ipvlan,
[LXC_NET_VLAN] = instantiate_vlan,
[LXC_NET_PHYS] = instantiate_phys,
[LXC_NET_EMPTY] = instantiate_empty,
[LXC_NET_NONE] = instantiate_none,
static netdev_configure_server_cb netdev_configure_server[LXC_NET_MAXCONFTYPE + 1] = {
[LXC_NET_VETH] = netdev_configure_server_veth,
[LXC_NET_MACVLAN] = netdev_configure_server_macvlan,
[LXC_NET_IPVLAN] = netdev_configure_server_ipvlan,
[LXC_NET_VLAN] = netdev_configure_server_vlan,
[LXC_NET_PHYS] = netdev_configure_server_phys,
[LXC_NET_EMPTY] = netdev_configure_server_empty,
[LXC_NET_NONE] = netdev_configure_server_none,
};
static int __instantiate_ns_common(struct lxc_netdev *netdev)
static int __netdev_configure_container_common(struct lxc_netdev *netdev)
{
char current_ifname[IFNAMSIZ];
netdev->ifindex = if_nametoindex(netdev->created_name);
netdev->ifindex = if_nametoindex(netdev->transient_name);
if (!netdev->ifindex)
return log_error_errno(-1,
errno, "Failed to retrieve ifindex for network device with name %s",
netdev->created_name);
netdev->transient_name);
if (is_empty_string(netdev->name))
(void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
if (!strequal(netdev->created_name, netdev->name)) {
if (!strequal(netdev->transient_name, netdev->name)) {
int ret;
ret = lxc_netdev_rename_by_name(netdev->created_name, netdev->name);
ret = lxc_netdev_rename_by_name(netdev->transient_name, netdev->name);
if (ret)
return log_error_errno(-1, -ret, "Failed to rename network device \"%s\" to \"%s\"",
netdev->created_name,
netdev->name);
netdev->transient_name, netdev->name);
TRACE("Renamed network device from \"%s\" to \"%s\"", netdev->created_name, netdev->name);
TRACE("Renamed network device from \"%s\" to \"%s\"", netdev->transient_name, netdev->name);
}
/*
......@@ -1213,57 +1296,58 @@ static int __instantiate_ns_common(struct lxc_netdev *netdev)
* later on send this information back to the parent.
*/
(void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
netdev->transient_name[0] = '\0';
return 0;
}
static int instantiate_ns_veth(struct lxc_netdev *netdev)
static int netdev_configure_container_veth(struct lxc_netdev *netdev)
{
return __instantiate_ns_common(netdev);
return __netdev_configure_container_common(netdev);
}
static int instantiate_ns_macvlan(struct lxc_netdev *netdev)
static int netdev_configure_container_macvlan(struct lxc_netdev *netdev)
{
return __instantiate_ns_common(netdev);
return __netdev_configure_container_common(netdev);
}
static int instantiate_ns_ipvlan(struct lxc_netdev *netdev)
static int netdev_configure_container_ipvlan(struct lxc_netdev *netdev)
{
return __instantiate_ns_common(netdev);
return __netdev_configure_container_common(netdev);
}
static int instantiate_ns_vlan(struct lxc_netdev *netdev)
static int netdev_configure_container_vlan(struct lxc_netdev *netdev)
{
return __instantiate_ns_common(netdev);
return __netdev_configure_container_common(netdev);
}
static int instantiate_ns_phys(struct lxc_netdev *netdev)
static int netdev_configure_container_phys(struct lxc_netdev *netdev)
{
return __instantiate_ns_common(netdev);
return __netdev_configure_container_common(netdev);
}
static int instantiate_ns_empty(struct lxc_netdev *netdev)
static int netdev_configure_container_empty(struct lxc_netdev *netdev)
{
return 0;
}
static int instantiate_ns_none(struct lxc_netdev *netdev)
static int netdev_configure_container_none(struct lxc_netdev *netdev)
{
return 0;
}
static instantiate_ns_cb netdev_ns_conf[LXC_NET_MAXCONFTYPE + 1] = {
[LXC_NET_VETH] = instantiate_ns_veth,
[LXC_NET_MACVLAN] = instantiate_ns_macvlan,
[LXC_NET_IPVLAN] = instantiate_ns_ipvlan,
[LXC_NET_VLAN] = instantiate_ns_vlan,
[LXC_NET_PHYS] = instantiate_ns_phys,
[LXC_NET_EMPTY] = instantiate_ns_empty,
[LXC_NET_NONE] = instantiate_ns_none,
static netdev_configure_container_cb netdev_configure_container[LXC_NET_MAXCONFTYPE + 1] = {
[LXC_NET_VETH] = netdev_configure_container_veth,
[LXC_NET_MACVLAN] = netdev_configure_container_macvlan,
[LXC_NET_IPVLAN] = netdev_configure_container_ipvlan,
[LXC_NET_VLAN] = netdev_configure_container_vlan,
[LXC_NET_PHYS] = netdev_configure_container_phys,
[LXC_NET_EMPTY] = netdev_configure_container_empty,
[LXC_NET_NONE] = netdev_configure_container_none,
};
static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
static int netdev_shutdown_server_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
{
int ret;
char *argv[] = {
......@@ -1290,7 +1374,7 @@ static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
return 0;
}
static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
static int netdev_shutdown_server_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
{
int ret;
char *argv[] = {
......@@ -1310,7 +1394,7 @@ static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netd
return 0;
}
static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
static int netdev_shutdown_server_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
{
int ret;
char *argv[] = {
......@@ -1330,7 +1414,7 @@ static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netde
return 0;
}
static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
static int netdev_shutdown_server_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
{
int ret;
char *argv[] = {
......@@ -1350,7 +1434,7 @@ static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
return 0;
}
static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
static int netdev_shutdown_server_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
{
int ret;
char *argv[] = {
......@@ -1370,7 +1454,7 @@ static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
return 0;
}
static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
static int netdev_shutdown_server_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
{
int ret;
char *argv[] = {
......@@ -1389,19 +1473,19 @@ static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev
return 0;
}
static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
static int netdev_shutdown_server_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
{
return 0;
}
static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
[LXC_NET_VETH] = shutdown_veth,
[LXC_NET_MACVLAN] = shutdown_macvlan,
[LXC_NET_IPVLAN] = shutdown_ipvlan,
[LXC_NET_VLAN] = shutdown_vlan,
[LXC_NET_PHYS] = shutdown_phys,
[LXC_NET_EMPTY] = shutdown_empty,
[LXC_NET_NONE] = shutdown_none,
static netdev_shutdown_server_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
[LXC_NET_VETH] = netdev_shutdown_server_veth,
[LXC_NET_MACVLAN] = netdev_shutdown_server_macvlan,
[LXC_NET_IPVLAN] = netdev_shutdown_server_ipvlan,
[LXC_NET_VLAN] = netdev_shutdown_server_vlan,
[LXC_NET_PHYS] = netdev_shutdown_server_phys,
[LXC_NET_EMPTY] = netdev_shutdown_server_empty,
[LXC_NET_NONE] = netdev_shutdown_server_none,
};
static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
......@@ -2734,66 +2818,6 @@ int lxc_bridge_attach(const char *bridge, const char *ifname)
return err;
}
static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
[LXC_NET_EMPTY] = "empty",
[LXC_NET_VETH] = "veth",
[LXC_NET_MACVLAN] = "macvlan",
[LXC_NET_IPVLAN] = "ipvlan",
[LXC_NET_PHYS] = "phys",
[LXC_NET_VLAN] = "vlan",
[LXC_NET_NONE] = "none",
};
const char *lxc_net_type_to_str(int type)
{
if (type < 0 || type > LXC_NET_MAXCONFTYPE)
return NULL;
return lxc_network_types[type];
}
static const char padchar[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
char *lxc_ifname_alnum_case_sensitive(char *template)
{
char name[IFNAMSIZ];
size_t i = 0;
#ifdef HAVE_RAND_R
unsigned int seed;
seed = randseed(false);
#else
(void)randseed(true);
#endif
if (strlen(template) >= IFNAMSIZ)
return NULL;
/* Generate random names until we find one that doesn't exist. */
for (;;) {
name[0] = '\0';
(void)strlcpy(name, template, IFNAMSIZ);
for (i = 0; i < strlen(name); i++) {
if (name[i] == 'X') {
#ifdef HAVE_RAND_R
name[i] = padchar[rand_r(&seed) % strlen(padchar)];
#else
name[i] = padchar[rand() % strlen(padchar)];
#endif
}
}
if (if_nametoindex(name) == 0)
break;
}
(void)strlcpy(template, name, strlen(template) + 1);
return template;
}
int setup_private_host_hw_addr(char *veth1)
{
__do_close int sockfd = -EBADF;
......@@ -2859,8 +2883,10 @@ int lxc_find_gateway_addresses(struct lxc_handler *handler)
}
#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
static int lxc_create_network_unpriv_exec(const char *lxcpath,
const char *lxcname,
struct lxc_netdev *netdev, pid_t pid,
unsigned int hooks_version)
{
int ret;
pid_t child;
......@@ -2871,7 +2897,9 @@ static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcna
size_t retlen;
if (netdev->type != LXC_NET_VETH)
return log_error_errno(-1, errno, "Network type %d not support for unprivileged use", netdev->type);
return log_error_errno(-1, errno,
"Network type %d not support for unprivileged use",
netdev->type);
ret = pipe(pipefd);
if (ret < 0)
......@@ -2913,8 +2941,7 @@ static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcna
pidstr[sizeof(pidstr) - 1] = '\0';
INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
lxcname, pidstr, netdev_link,
!is_empty_string(netdev->name) ? netdev->name : "(null)");
lxcname, pidstr, netdev_link, !is_empty_string(netdev->name) ? netdev->name : "(null)");
if (!is_empty_string(netdev->name))
execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
lxcpath, lxcname, pidstr, "veth", netdev_link,
......@@ -2941,7 +2968,8 @@ static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcna
ret = wait_for_pid(child);
close(pipefd[0]);
if (ret != 0 || bytes < 0)
return log_error(-1, "lxc-user-nic failed to configure requested network: %s", buffer[0] != '\0' ? buffer : "(null)");
return log_error(-1, "lxc-user-nic failed to configure requested network: %s",
buffer[0] != '\0' ? buffer : "(null)");
TRACE("Received output \"%s\" from lxc-user-nic", buffer);
/* netdev->name */
......@@ -2951,14 +2979,18 @@ static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcna
/*
* lxc-user-nic will take care of proper network device naming. So
* netdev->name and netdev->created_name need to be identical to not
* netdev->name and netdev->transient_name need to be identical to not
* trigger another rename later on.
*/
retlen = strlcpy(netdev->name, token, IFNAMSIZ);
if (retlen < IFNAMSIZ)
retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
if (retlen < IFNAMSIZ) {
retlen = strlcpy(netdev->transient_name, token, IFNAMSIZ);
if (retlen < IFNAMSIZ)
retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
}
if (retlen >= IFNAMSIZ)
return log_error_errno(-1, E2BIG, "Container side veth device name returned by lxc-user-nic is too long");
return log_error_errno(-1, E2BIG,
"Container side veth device name returned by lxc-user-nic is too long");
/* netdev->ifindex */
token = strtok_r(NULL, ":", &saveptr);
......@@ -2967,7 +2999,8 @@ static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcna
ret = lxc_safe_int(token, &netdev->ifindex);
if (ret < 0)
return log_error_errno(-1, -ret, "Failed to convert string \"%s\" to integer", token);
return log_error_errno(-1, -ret,
"Failed to convert string \"%s\" to integer", token);
/* netdev->priv.veth_attr.veth1 */
token = strtok_r(NULL, ":", &saveptr);
......@@ -2976,7 +3009,8 @@ static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcna
retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
if (retlen >= IFNAMSIZ)
return log_error_errno(-1, E2BIG, "Host side veth device name returned by lxc-user-nic is too long");
return log_error_errno(-1, E2BIG,
"Host side veth device name returned by lxc-user-nic is too long");
/* netdev->priv.veth_attr.ifindex */
token = strtok_r(NULL, ":", &saveptr);
......@@ -2985,7 +3019,8 @@ static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcna
ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
if (ret < 0)
return log_error_errno(-1, -ret, "Failed to convert string \"%s\" to integer", token);
return log_error_errno(-1, -ret,
"Failed to convert string \"%s\" to integer", token);
if (netdev->upscript) {
char *argv[] = {
......@@ -2999,7 +3034,7 @@ static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcna
netdev->upscript, "up", argv);
if (ret < 0)
return -1;
}
}
return 0;
}
......@@ -3379,13 +3414,85 @@ static int lxc_create_network_priv(struct lxc_handler *handler)
return log_error_errno(-1, errno, "Failed to setup l2proxy");
}
if (netdev_conf[netdev->type](handler, netdev))
if (netdev_configure_server[netdev->type](handler, netdev))
return log_error_errno(-1, errno, "Failed to create network device");
}
return 0;
}
/*
* LXC moves network devices into the target namespace based on their created
* name. The created name can either be randomly generated for e.g. veth
* devices or it can be the name of the existing device in the server's
* namespaces. This is e.g. the case when moving physical devices. However this
* can lead to weird clashes. Consider we have a network namespace that has the
* following devices:
* 4: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
* link/ether 00:16:3e:91:d3:ae brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:e7:5d:10
* altname enp7s0
* 5: eth2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
* link/ether 00:16:3e:e7:5d:10 brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:91:d3:ae
* altname enp8s0
*
* and the user generates the following network config for their container:
*
* lxc.net.0.type = phys
* lxc.net.0.name = eth1
* lxc.net.0.link = eth2
*
* lxc.net.1.type = phys
* lxc.net.1.name = eth2
* lxc.net.1.link = eth1
*
* This would cause LXC to move the devices eth1 and eth2 from the server's
* network namespace into the container's network namespace:
*
* 24: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
* link/ether 00:16:3e:91:d3:ae brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:e7:5d:10
* altname enp7s0
* 25: eth2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
* link/ether 00:16:3e:e7:5d:10 brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:91:d3:ae
* altname enp8s0
*
* According to the network config above we now need to rename the network
* devices in the container's network namespace. Let's say we start with
* renaming eth2 to eth1. This would immediately lead to a clash since the
* container's network namespace already contains a network device with that
* name. Renaming the other device would have the same problem.
*
* There are multiple ways to fix this but I'm concerned with keeping the logic
* somewhat reasonable which is why we simply start creating transient device
* names that are unique which we'll use to move and rename the network device
* in the container's network namespace at the same time. And then we rename
* based on those random devices names to the target name.
*
* Note that the transient name is based on the type of network device as
* specified in the LXC config. However, that doesn't mean it's correct. LXD
* passes veth devices and a range of other network devices (e.g. Infiniband
* VFs etc.) via LXC_NET_PHYS even though they're not really "physical" in the
* sense we like to think about it so you might see a veth device being
* assigned a "physXXXXXX" transient name. That's not a problem.
*/
static int create_transient_name(struct lxc_netdev *netdev)
{
const struct lxc_network_info *info;
if (!is_empty_string(netdev->transient_name))
return syserror_set(-EINVAL, "Network device already had a transient name %s",
netdev->transient_name);
info = &lxc_network_info[netdev->type];
strlcpy(netdev->transient_name, info->template, info->template_len + 1);
if (!lxc_ifname_alnum_case_sensitive(netdev->transient_name))
return syserror_set(-EINVAL, "Failed to create transient name for network device %s", netdev->created_name);
TRACE("Created transient name %s for network device", netdev->transient_name);
return 0;
}
int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
{
pid_t pid = handler->pid;
......@@ -3400,23 +3507,35 @@ int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
int ret;
struct lxc_netdev *netdev = iterator->elem;
/*
* Veth devices are directly created in the container's network
* namespace so the device doesn't need to be moved into the
* container's network namespace. The transient name will
* already have been set above when we created the veth tunnel.
*
* Other than this special case this also catches all
* LXC_NET_EMPTY and LXC_NET_NONE devices.
*/
if (!netdev->ifindex)
continue;
ret = create_transient_name(netdev);
if (ret < 0)
return ret;
if (netdev->type == LXC_NET_PHYS)
physname = is_wlan(netdev->link);
if (physname)
ret = lxc_netdev_move_wlan(physname, netdev->link, pid, NULL);
ret = lxc_netdev_move_wlan(physname, netdev->link, pid, netdev->transient_name);
else
ret = lxc_netdev_move_by_index(netdev->ifindex, pid, NULL);
ret = lxc_netdev_move_by_index(netdev->ifindex, pid, netdev->transient_name);
if (ret)
return log_error_errno(-1, -ret, "Failed to move network device \"%s\" with ifindex %d to network namespace %d",
netdev->created_name,
netdev->ifindex, pid);
return log_error_errno(-1, -ret, "Failed to move network device \"%s\" with ifindex %d to network namespace %d and rename to %s",
netdev->created_name, netdev->ifindex, pid, netdev->transient_name);
DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d",
netdev->created_name, netdev->ifindex, pid);
DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d and renamed to %s",
maybe_empty(netdev->created_name), netdev->ifindex, pid, netdev->transient_name);
}
return 0;
......@@ -3834,24 +3953,78 @@ static int lxc_network_setup_in_child_namespaces_common(struct lxc_netdev *netde
return 0;
}
/**
* Consider the following network layout:
*
* lxc.net.0.type = phys
* lxc.net.0.link = eth2
* lxc.net.0.name = eth%d
*
* lxc.net.1.type = phys
* lxc.net.1.link = eth1
* lxc.net.1.name = eth0
*
* If we simply follow this order and create the first network first the kernel
* will allocate eth0 for the first network but the second network requests
* that eth1 be renamed to eth0 in the container's network namespace which
* would lead to a clash.
*
* Note, we don't handle cases like:
*
* lxc.net.0.type = phys
* lxc.net.0.link = eth2
* lxc.net.0.name = eth0
*
* lxc.net.1.type = phys
* lxc.net.1.link = eth1
* lxc.net.1.name = eth0
*
* That'll brutally fail of course but there's nothing we can do about it.
*/
int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
struct lxc_list *network)
{
struct lxc_list *iterator;
bool needs_second_pass = false;
lxc_list_for_each (iterator, network) {
if (lxc_list_empty(network))
return 0;
/* Configure all devices that have a specific target name. */
lxc_list_for_each(iterator, network) {
struct lxc_netdev *netdev = iterator->elem;
int ret;
ret = netdev_ns_conf[netdev->type](netdev);
if (is_empty_string(netdev->name) || strequal(netdev->name, "eth%d")) {
needs_second_pass = true;
continue;
}
ret = netdev_configure_container[netdev->type](netdev);
if (!ret)
ret = lxc_network_setup_in_child_namespaces_common(netdev);
if (ret)
return log_error_errno(-1, errno, "Failed to setup netdev");
}
if (!lxc_list_empty(network))
INFO("Network has been setup");
INFO("Finished setting up network devices with caller assigned names");
if (needs_second_pass) {
/* Configure all devices that have a kernel assigned name. */
lxc_list_for_each(iterator, network) {
struct lxc_netdev *netdev = iterator->elem;
int ret;
if (!is_empty_string(netdev->name) && !strequal(netdev->name, "eth%d"))
continue;
ret = netdev_configure_container[netdev->type](netdev);
if (!ret)
ret = lxc_network_setup_in_child_namespaces_common(netdev);
if (ret)
return log_error_errno(-1, errno, "Failed to setup netdev");
}
INFO("Finished setting up network devices with kernel assigned names");
}
return 0;
}
......@@ -3873,11 +4046,11 @@ int lxc_network_send_to_child(struct lxc_handler *handler)
if (ret < 0)
return -1;
ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
ret = lxc_send_nointr(data_sock, netdev->transient_name, IFNAMSIZ, MSG_NOSIGNAL);
if (ret < 0)
return -1;
TRACE("Sent network device name \"%s\" to child", netdev->created_name);
TRACE("Sent network device name \"%s\" to child", netdev->transient_name);
}
return 0;
......@@ -3900,11 +4073,11 @@ int lxc_network_recv_from_parent(struct lxc_handler *handler)
if (ret < 0)
return -1;
ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
ret = lxc_recv_nointr(data_sock, netdev->transient_name, IFNAMSIZ, 0);
if (ret < 0)
return -1;
TRACE("Received network device name \"%s\" from parent", netdev->created_name);
TRACE("Received network device name \"%s\" from parent", netdev->transient_name);
}
return 0;
......
......@@ -165,6 +165,7 @@ struct lxc_netdev {
bool l2proxy;
char name[IFNAMSIZ];
char created_name[IFNAMSIZ];
char transient_name[IFNAMSIZ];
char *hwaddr;
char *mtu;
union netdev_p priv;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment