#include "config-host.h"
-#include <signal.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/wait.h>
+#include <sys/socket.h>
#include <net/if.h>
#include "net.h"
+#include "monitor.h"
#include "sysemu.h"
#include "qemu-char.h"
#include "qemu-common.h"
+#include "qemu-error.h"
#include "net/tap-linux.h"
+#include "hw/vhost_net.h"
+
/* Maximum GSO packet size (64k) plus plenty of room for
* the ethernet and virtio_net headers
*/
#define TAP_BUFSIZE (4096 + 65536)
typedef struct TAPState {
- VLANClientState *vc;
+ VLANClientState nc;
int fd;
char down_script[1024];
char down_script_arg[128];
uint8_t buf[TAP_BUFSIZE];
unsigned int read_poll : 1;
unsigned int write_poll : 1;
- unsigned int has_vnet_hdr : 1;
unsigned int using_vnet_hdr : 1;
unsigned int has_ufo: 1;
+ VHostNetState *vhost_net;
+ unsigned host_vnet_hdr_len;
} TAPState;
static int launch_script(const char *setup_script, const char *ifname, int fd);
tap_write_poll(s, 0);
- qemu_flush_queued_packets(s->vc);
+ qemu_flush_queued_packets(&s->nc);
}
static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
return len;
}
-static ssize_t tap_receive_iov(VLANClientState *vc, const struct iovec *iov,
+static ssize_t tap_receive_iov(VLANClientState *nc, const struct iovec *iov,
int iovcnt)
{
- TAPState *s = vc->opaque;
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
const struct iovec *iovp = iov;
struct iovec iov_copy[iovcnt + 1];
- struct virtio_net_hdr hdr = { 0, };
+ struct virtio_net_hdr_mrg_rxbuf hdr = { };
- if (s->has_vnet_hdr && !s->using_vnet_hdr) {
+ if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
iov_copy[0].iov_base = &hdr;
- iov_copy[0].iov_len = sizeof(hdr);
+ iov_copy[0].iov_len = s->host_vnet_hdr_len;
memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
iovp = iov_copy;
iovcnt++;
return tap_write_packet(s, iovp, iovcnt);
}
-static ssize_t tap_receive_raw(VLANClientState *vc, const uint8_t *buf, size_t size)
+static ssize_t tap_receive_raw(VLANClientState *nc, const uint8_t *buf, size_t size)
{
- TAPState *s = vc->opaque;
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
struct iovec iov[2];
int iovcnt = 0;
- struct virtio_net_hdr hdr = { 0, };
+ struct virtio_net_hdr_mrg_rxbuf hdr = { };
- if (s->has_vnet_hdr) {
+ if (s->host_vnet_hdr_len) {
iov[iovcnt].iov_base = &hdr;
- iov[iovcnt].iov_len = sizeof(hdr);
+ iov[iovcnt].iov_len = s->host_vnet_hdr_len;
iovcnt++;
}
return tap_write_packet(s, iov, iovcnt);
}
-static ssize_t tap_receive(VLANClientState *vc, const uint8_t *buf, size_t size)
+static ssize_t tap_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
{
- TAPState *s = vc->opaque;
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
struct iovec iov[1];
- if (s->has_vnet_hdr && !s->using_vnet_hdr) {
- return tap_receive_raw(vc, buf, size);
+ if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
+ return tap_receive_raw(nc, buf, size);
}
iov[0].iov_base = (char *)buf;
{
TAPState *s = opaque;
- return qemu_can_send_packet(s->vc);
+ return qemu_can_send_packet(&s->nc);
}
#ifndef __sun__
}
#endif
-static void tap_send_completed(VLANClientState *vc, ssize_t len)
+static void tap_send_completed(VLANClientState *nc, ssize_t len)
{
- TAPState *s = vc->opaque;
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
tap_read_poll(s, 1);
}
break;
}
- if (s->has_vnet_hdr && !s->using_vnet_hdr) {
- buf += sizeof(struct virtio_net_hdr);
- size -= sizeof(struct virtio_net_hdr);
+ if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
+ buf += s->host_vnet_hdr_len;
+ size -= s->host_vnet_hdr_len;
}
- size = qemu_send_packet_async(s->vc, buf, size, tap_send_completed);
+ size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
if (size == 0) {
tap_read_poll(s, 0);
}
- } while (size > 0);
+ } while (size > 0 && qemu_can_send_packet(&s->nc));
}
-int tap_has_ufo(VLANClientState *vc)
+int tap_has_ufo(VLANClientState *nc)
{
- TAPState *s = vc->opaque;
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
- assert(vc->type == NET_CLIENT_TYPE_TAP);
+ assert(nc->info->type == NET_CLIENT_TYPE_TAP);
return s->has_ufo;
}
-int tap_has_vnet_hdr(VLANClientState *vc)
+int tap_has_vnet_hdr(VLANClientState *nc)
+{
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
+
+ assert(nc->info->type == NET_CLIENT_TYPE_TAP);
+
+ return !!s->host_vnet_hdr_len;
+}
+
+int tap_has_vnet_hdr_len(VLANClientState *nc, int len)
{
- TAPState *s = vc->opaque;
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
- assert(vc->type == NET_CLIENT_TYPE_TAP);
+ assert(nc->info->type == NET_CLIENT_TYPE_TAP);
- return s->has_vnet_hdr;
+ return tap_probe_vnet_hdr_len(s->fd, len);
}
-void tap_using_vnet_hdr(VLANClientState *vc, int using_vnet_hdr)
+void tap_set_vnet_hdr_len(VLANClientState *nc, int len)
{
- TAPState *s = vc->opaque;
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
+
+ assert(nc->info->type == NET_CLIENT_TYPE_TAP);
+ assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
+ len == sizeof(struct virtio_net_hdr));
+
+ tap_fd_set_vnet_hdr_len(s->fd, len);
+ s->host_vnet_hdr_len = len;
+}
+
+void tap_using_vnet_hdr(VLANClientState *nc, int using_vnet_hdr)
+{
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
using_vnet_hdr = using_vnet_hdr != 0;
- assert(vc->type == NET_CLIENT_TYPE_TAP);
- assert(s->has_vnet_hdr == using_vnet_hdr);
+ assert(nc->info->type == NET_CLIENT_TYPE_TAP);
+ assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
s->using_vnet_hdr = using_vnet_hdr;
}
-void tap_set_offload(VLANClientState *vc, int csum, int tso4,
+void tap_set_offload(VLANClientState *nc, int csum, int tso4,
int tso6, int ecn, int ufo)
{
- TAPState *s = vc->opaque;
- unsigned int offload = 0;
-
- if (csum) {
- offload |= TUN_F_CSUM;
- if (tso4)
- offload |= TUN_F_TSO4;
- if (tso6)
- offload |= TUN_F_TSO6;
- if ((tso4 || tso6) && ecn)
- offload |= TUN_F_TSO_ECN;
- if (ufo)
- offload |= TUN_F_UFO;
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
+ if (s->fd < 0) {
+ return;
}
- if (ioctl(s->fd, TUNSETOFFLOAD, offload) != 0) {
- offload &= ~TUN_F_UFO;
- if (ioctl(s->fd, TUNSETOFFLOAD, offload) != 0) {
- fprintf(stderr, "TUNSETOFFLOAD ioctl() failed: %s\n",
- strerror(errno));
- }
- }
+ tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
}
-static void tap_cleanup(VLANClientState *vc)
+static void tap_cleanup(VLANClientState *nc)
{
- TAPState *s = vc->opaque;
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
+
+ if (s->vhost_net) {
+ vhost_net_cleanup(s->vhost_net);
+ s->vhost_net = NULL;
+ }
- qemu_purge_queued_packets(vc);
+ qemu_purge_queued_packets(nc);
if (s->down_script[0])
launch_script(s->down_script, s->down_script_arg, s->fd);
tap_read_poll(s, 0);
tap_write_poll(s, 0);
close(s->fd);
- qemu_free(s);
+ s->fd = -1;
+}
+
+static void tap_poll(VLANClientState *nc, bool enable)
+{
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
+ tap_read_poll(s, enable);
+ tap_write_poll(s, enable);
+}
+
+int tap_get_fd(VLANClientState *nc)
+{
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
+ assert(nc->info->type == NET_CLIENT_TYPE_TAP);
+ return s->fd;
}
/* fd support */
+static NetClientInfo net_tap_info = {
+ .type = NET_CLIENT_TYPE_TAP,
+ .size = sizeof(TAPState),
+ .receive = tap_receive,
+ .receive_raw = tap_receive_raw,
+ .receive_iov = tap_receive_iov,
+ .poll = tap_poll,
+ .cleanup = tap_cleanup,
+};
+
static TAPState *net_tap_fd_init(VLANState *vlan,
const char *model,
const char *name,
int fd,
int vnet_hdr)
{
+ VLANClientState *nc;
TAPState *s;
- unsigned int offload;
- s = qemu_mallocz(sizeof(TAPState));
+ nc = qemu_new_net_client(&net_tap_info, vlan, NULL, model, name);
+
+ s = DO_UPCAST(TAPState, nc, nc);
+
s->fd = fd;
- s->has_vnet_hdr = vnet_hdr != 0;
+ s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
s->using_vnet_hdr = 0;
- s->vc = qemu_new_vlan_client(NET_CLIENT_TYPE_TAP,
- vlan, NULL, model, name, NULL,
- tap_receive, tap_receive_raw,
- tap_receive_iov, tap_cleanup, s);
- s->has_ufo = 0;
- /* Check if tap supports UFO */
- offload = TUN_F_CSUM | TUN_F_UFO;
- if (ioctl(s->fd, TUNSETOFFLOAD, offload) == 0)
- s->has_ufo = 1;
- tap_set_offload(s->vc, 0, 0, 0, 0, 0);
+ s->has_ufo = tap_probe_has_ufo(s->fd);
+ tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
tap_read_poll(s, 1);
+ s->vhost_net = NULL;
return s;
}
static int launch_script(const char *setup_script, const char *ifname, int fd)
{
- sigset_t oldmask, mask;
int pid, status;
char *args[3];
char **parg;
- sigemptyset(&mask);
- sigaddset(&mask, SIGCHLD);
- sigprocmask(SIG_BLOCK, &mask, &oldmask);
-
/* try to launch network script */
pid = fork();
if (pid == 0) {
parg = args;
*parg++ = (char *)setup_script;
*parg++ = (char *)ifname;
- *parg++ = NULL;
+ *parg = NULL;
execv(setup_script, args);
_exit(1);
} else if (pid > 0) {
while (waitpid(pid, &status, 0) != pid) {
/* loop */
}
- sigprocmask(SIG_SETMASK, &oldmask, NULL);
if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
return 0;
return -1;
}
+static int recv_fd(int c)
+{
+ int fd;
+ uint8_t msgbuf[CMSG_SPACE(sizeof(fd))];
+ struct msghdr msg = {
+ .msg_control = msgbuf,
+ .msg_controllen = sizeof(msgbuf),
+ };
+ struct cmsghdr *cmsg;
+ struct iovec iov;
+ uint8_t req[1];
+ ssize_t len;
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
+ msg.msg_controllen = cmsg->cmsg_len;
+
+ iov.iov_base = req;
+ iov.iov_len = sizeof(req);
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ len = recvmsg(c, &msg, 0);
+ if (len > 0) {
+ memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd));
+ return fd;
+ }
+
+ return len;
+}
+
+static int net_bridge_run_helper(const char *helper, const char *bridge)
+{
+ sigset_t oldmask, mask;
+ int pid, status;
+ char *args[5];
+ char **parg;
+ int sv[2];
+
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGCHLD);
+ sigprocmask(SIG_BLOCK, &mask, &oldmask);
+
+ if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
+ return -1;
+ }
+
+ /* try to launch bridge helper */
+ pid = fork();
+ if (pid == 0) {
+ int open_max = sysconf(_SC_OPEN_MAX), i;
+ char fd_buf[6+10];
+ char br_buf[6+IFNAMSIZ] = {0};
+ char helper_cmd[PATH_MAX + sizeof(fd_buf) + sizeof(br_buf) + 15];
+
+ for (i = 0; i < open_max; i++) {
+ if (i != STDIN_FILENO &&
+ i != STDOUT_FILENO &&
+ i != STDERR_FILENO &&
+ i != sv[1]) {
+ close(i);
+ }
+ }
+
+ snprintf(fd_buf, sizeof(fd_buf), "%s%d", "--fd=", sv[1]);
+
+ if (strrchr(helper, ' ') || strrchr(helper, '\t')) {
+ /* assume helper is a command */
+
+ if (strstr(helper, "--br=") == NULL) {
+ snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);
+ }
+
+ snprintf(helper_cmd, sizeof(helper_cmd), "%s %s %s %s",
+ helper, "--use-vnet", fd_buf, br_buf);
+
+ parg = args;
+ *parg++ = (char *)"sh";
+ *parg++ = (char *)"-c";
+ *parg++ = helper_cmd;
+ *parg++ = NULL;
+
+ execv("/bin/sh", args);
+ } else {
+ /* assume helper is just the executable path name */
+
+ snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);
+
+ parg = args;
+ *parg++ = (char *)helper;
+ *parg++ = (char *)"--use-vnet";
+ *parg++ = fd_buf;
+ *parg++ = br_buf;
+ *parg++ = NULL;
+
+ execv(helper, args);
+ }
+ _exit(1);
+
+ } else if (pid > 0) {
+ int fd;
+
+ close(sv[1]);
+
+ do {
+ fd = recv_fd(sv[0]);
+ } while (fd == -1 && errno == EINTR);
+
+ close(sv[0]);
+
+ while (waitpid(pid, &status, 0) != pid) {
+ /* loop */
+ }
+ sigprocmask(SIG_SETMASK, &oldmask, NULL);
+ if (fd < 0) {
+ fprintf(stderr, "failed to recv file descriptor\n");
+ return -1;
+ }
+
+ if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
+ return fd;
+ }
+ }
+ fprintf(stderr, "failed to launch bridge helper\n");
+ return -1;
+}
+
+int net_init_bridge(QemuOpts *opts, const char *name, VLANState *vlan)
+{
+ TAPState *s;
+ int fd, vnet_hdr;
+
+ if (!qemu_opt_get(opts, "br")) {
+ qemu_opt_set(opts, "br", DEFAULT_BRIDGE_INTERFACE);
+ }
+ if (!qemu_opt_get(opts, "helper")) {
+ qemu_opt_set(opts, "helper", DEFAULT_BRIDGE_HELPER);
+ }
+
+ fd = net_bridge_run_helper(qemu_opt_get(opts, "helper"),
+ qemu_opt_get(opts, "br"));
+ if (fd == -1) {
+ return -1;
+ }
+
+ fcntl(fd, F_SETFL, O_NONBLOCK);
+
+ vnet_hdr = tap_probe_vnet_hdr(fd);
+
+ s = net_tap_fd_init(vlan, "bridge", name, fd, vnet_hdr);
+ if (!s) {
+ close(fd);
+ return -1;
+ }
+
+ snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s",
+ qemu_opt_get(opts, "helper"), qemu_opt_get(opts, "br"));
+
+ return 0;
+}
+
static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
{
int fd, vnet_hdr_required;
return fd;
}
-int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan)
+int net_init_tap(QemuOpts *opts, const char *name, VLANState *vlan)
{
TAPState *s;
- int fd, vnet_hdr;
+ int fd, vnet_hdr = 0;
+ const char *model;
if (qemu_opt_get(opts, "fd")) {
+ if (qemu_opt_get(opts, "ifname") ||
+ qemu_opt_get(opts, "script") ||
+ qemu_opt_get(opts, "downscript") ||
+ qemu_opt_get(opts, "vnet_hdr") ||
+ qemu_opt_get(opts, "helper")) {
+ error_report("ifname=, script=, downscript=, vnet_hdr=, "
+ "and helper= are invalid with fd=");
+ return -1;
+ }
+
+ fd = net_handle_fd_param(cur_mon, qemu_opt_get(opts, "fd"));
+ if (fd == -1) {
+ return -1;
+ }
+
+ fcntl(fd, F_SETFL, O_NONBLOCK);
+
+ vnet_hdr = tap_probe_vnet_hdr(fd);
+
+ model = "tap";
+
+ } else if (qemu_opt_get(opts, "helper")) {
if (qemu_opt_get(opts, "ifname") ||
qemu_opt_get(opts, "script") ||
qemu_opt_get(opts, "downscript") ||
qemu_opt_get(opts, "vnet_hdr")) {
- qemu_error("ifname=, script=, downscript= and vnet_hdr= is invalid with fd=\n");
+ error_report("ifname=, script=, downscript=, and vnet_hdr= "
+ "are invalid with helper=");
return -1;
}
- fd = net_handle_fd_param(mon, qemu_opt_get(opts, "fd"));
+ fd = net_bridge_run_helper(qemu_opt_get(opts, "helper"),
+ DEFAULT_BRIDGE_INTERFACE);
if (fd == -1) {
return -1;
}
fcntl(fd, F_SETFL, O_NONBLOCK);
vnet_hdr = tap_probe_vnet_hdr(fd);
+
+ model = "bridge";
+
} else {
if (!qemu_opt_get(opts, "script")) {
qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT);
}
fd = net_tap_init(opts, &vnet_hdr);
+ if (fd == -1) {
+ return -1;
+ }
+
+ model = "tap";
}
- s = net_tap_fd_init(vlan, "tap", name, fd, vnet_hdr);
+ s = net_tap_fd_init(vlan, model, name, fd, vnet_hdr);
if (!s) {
close(fd);
return -1;
}
if (qemu_opt_get(opts, "fd")) {
- snprintf(s->vc->info_str, sizeof(s->vc->info_str), "fd=%d", fd);
+ snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
+ } else if (qemu_opt_get(opts, "helper")) {
+ snprintf(s->nc.info_str, sizeof(s->nc.info_str),
+ "helper=%s", qemu_opt_get(opts, "helper"));
} else {
const char *ifname, *script, *downscript;
script = qemu_opt_get(opts, "script");
downscript = qemu_opt_get(opts, "downscript");
- snprintf(s->vc->info_str, sizeof(s->vc->info_str),
+ snprintf(s->nc.info_str, sizeof(s->nc.info_str),
"ifname=%s,script=%s,downscript=%s",
ifname, script, downscript);
}
}
- if (vlan) {
- vlan->nb_host_devs++;
+ if (qemu_opt_get_bool(opts, "vhost", !!qemu_opt_get(opts, "vhostfd") ||
+ qemu_opt_get_bool(opts, "vhostforce", false))) {
+ int vhostfd, r;
+ bool force = qemu_opt_get_bool(opts, "vhostforce", false);
+ if (qemu_opt_get(opts, "vhostfd")) {
+ r = net_handle_fd_param(cur_mon, qemu_opt_get(opts, "vhostfd"));
+ if (r == -1) {
+ return -1;
+ }
+ vhostfd = r;
+ } else {
+ vhostfd = -1;
+ }
+ s->vhost_net = vhost_net_init(&s->nc, vhostfd, force);
+ if (!s->vhost_net) {
+ error_report("vhost-net requested but could not be initialized");
+ return -1;
+ }
+ } else if (qemu_opt_get(opts, "vhostfd")) {
+ error_report("vhostfd= is not valid without vhost");
+ return -1;
}
return 0;
}
+
+VHostNetState *tap_get_vhost_net(VLANClientState *nc)
+{
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
+ assert(nc->info->type == NET_CLIENT_TYPE_TAP);
+ return s->vhost_net;
+}