]>
Commit | Line | Data |
---|---|---|
5281d757 MM |
1 | /* |
2 | * QEMU System Emulator | |
3 | * | |
4 | * Copyright (c) 2003-2008 Fabrice Bellard | |
5 | * Copyright (c) 2009 Red Hat, Inc. | |
6 | * | |
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
8 | * of this software and associated documentation files (the "Software"), to deal | |
9 | * in the Software without restriction, including without limitation the rights | |
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
11 | * copies of the Software, and to permit persons to whom the Software is | |
12 | * furnished to do so, subject to the following conditions: | |
13 | * | |
14 | * The above copyright notice and this permission notice shall be included in | |
15 | * all copies or substantial portions of the Software. | |
16 | * | |
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
23 | * THE SOFTWARE. | |
24 | */ | |
25 | ||
2744d920 | 26 | #include "qemu/osdep.h" |
1422e32d | 27 | #include "tap_int.h" |
5281d757 | 28 | |
5281d757 | 29 | |
5281d757 | 30 | #include <sys/ioctl.h> |
5281d757 | 31 | #include <sys/wait.h> |
71f4effc | 32 | #include <sys/socket.h> |
5281d757 MM |
33 | #include <net/if.h> |
34 | ||
1422e32d | 35 | #include "net/net.h" |
a245fc18 | 36 | #include "clients.h" |
83c9089e | 37 | #include "monitor/monitor.h" |
9c17d615 | 38 | #include "sysemu/sysemu.h" |
da34e65c | 39 | #include "qapi/error.h" |
5281d757 | 40 | #include "qemu-common.h" |
f348b6d1 | 41 | #include "qemu/cutils.h" |
1de7afc9 | 42 | #include "qemu/error-report.h" |
5281d757 | 43 | |
1422e32d | 44 | #include "net/tap.h" |
5281d757 | 45 | |
0d09e41a | 46 | #include "net/vhost_net.h" |
82b0d80e | 47 | |
5281d757 | 48 | typedef struct TAPState { |
4e68f7a0 | 49 | NetClientState nc; |
5281d757 MM |
50 | int fd; |
51 | char down_script[1024]; | |
52 | char down_script_arg[128]; | |
d32fcad3 | 53 | uint8_t buf[NET_BUFSIZE]; |
ec45f083 JW |
54 | bool read_poll; |
55 | bool write_poll; | |
56 | bool using_vnet_hdr; | |
57 | bool has_ufo; | |
16dbaf90 | 58 | bool enabled; |
82b0d80e | 59 | VHostNetState *vhost_net; |
ef4252b1 | 60 | unsigned host_vnet_hdr_len; |
9e32ff32 | 61 | Notifier exit; |
5281d757 MM |
62 | } TAPState; |
63 | ||
ac4fcf56 MA |
64 | static void launch_script(const char *setup_script, const char *ifname, |
65 | int fd, Error **errp); | |
5281d757 | 66 | |
5281d757 MM |
67 | static void tap_send(void *opaque); |
68 | static void tap_writable(void *opaque); | |
69 | ||
70 | static void tap_update_fd_handler(TAPState *s) | |
71 | { | |
82e1cc4b FZ |
72 | qemu_set_fd_handler(s->fd, |
73 | s->read_poll && s->enabled ? tap_send : NULL, | |
74 | s->write_poll && s->enabled ? tap_writable : NULL, | |
75 | s); | |
5281d757 MM |
76 | } |
77 | ||
ec45f083 | 78 | static void tap_read_poll(TAPState *s, bool enable) |
5281d757 | 79 | { |
ec45f083 | 80 | s->read_poll = enable; |
5281d757 MM |
81 | tap_update_fd_handler(s); |
82 | } | |
83 | ||
ec45f083 | 84 | static void tap_write_poll(TAPState *s, bool enable) |
5281d757 | 85 | { |
ec45f083 | 86 | s->write_poll = enable; |
5281d757 MM |
87 | tap_update_fd_handler(s); |
88 | } | |
89 | ||
90 | static void tap_writable(void *opaque) | |
91 | { | |
92 | TAPState *s = opaque; | |
93 | ||
ec45f083 | 94 | tap_write_poll(s, false); |
5281d757 | 95 | |
3e35ba93 | 96 | qemu_flush_queued_packets(&s->nc); |
5281d757 MM |
97 | } |
98 | ||
99 | static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt) | |
100 | { | |
101 | ssize_t len; | |
102 | ||
103 | do { | |
104 | len = writev(s->fd, iov, iovcnt); | |
105 | } while (len == -1 && errno == EINTR); | |
106 | ||
107 | if (len == -1 && errno == EAGAIN) { | |
ec45f083 | 108 | tap_write_poll(s, true); |
5281d757 MM |
109 | return 0; |
110 | } | |
111 | ||
112 | return len; | |
113 | } | |
114 | ||
4e68f7a0 | 115 | static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov, |
5281d757 MM |
116 | int iovcnt) |
117 | { | |
3e35ba93 | 118 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
5281d757 MM |
119 | const struct iovec *iovp = iov; |
120 | struct iovec iov_copy[iovcnt + 1]; | |
ef4252b1 | 121 | struct virtio_net_hdr_mrg_rxbuf hdr = { }; |
5281d757 | 122 | |
ef4252b1 | 123 | if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { |
5281d757 | 124 | iov_copy[0].iov_base = &hdr; |
ef4252b1 | 125 | iov_copy[0].iov_len = s->host_vnet_hdr_len; |
5281d757 MM |
126 | memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov)); |
127 | iovp = iov_copy; | |
128 | iovcnt++; | |
129 | } | |
130 | ||
131 | return tap_write_packet(s, iovp, iovcnt); | |
132 | } | |
133 | ||
4e68f7a0 | 134 | static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size) |
5281d757 | 135 | { |
3e35ba93 | 136 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
5281d757 MM |
137 | struct iovec iov[2]; |
138 | int iovcnt = 0; | |
ef4252b1 | 139 | struct virtio_net_hdr_mrg_rxbuf hdr = { }; |
5281d757 | 140 | |
ef4252b1 | 141 | if (s->host_vnet_hdr_len) { |
5281d757 | 142 | iov[iovcnt].iov_base = &hdr; |
ef4252b1 | 143 | iov[iovcnt].iov_len = s->host_vnet_hdr_len; |
5281d757 MM |
144 | iovcnt++; |
145 | } | |
146 | ||
147 | iov[iovcnt].iov_base = (char *)buf; | |
148 | iov[iovcnt].iov_len = size; | |
149 | iovcnt++; | |
150 | ||
151 | return tap_write_packet(s, iov, iovcnt); | |
152 | } | |
153 | ||
4e68f7a0 | 154 | static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size) |
5281d757 | 155 | { |
3e35ba93 | 156 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
5281d757 MM |
157 | struct iovec iov[1]; |
158 | ||
ef4252b1 | 159 | if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { |
3e35ba93 | 160 | return tap_receive_raw(nc, buf, size); |
5281d757 MM |
161 | } |
162 | ||
163 | iov[0].iov_base = (char *)buf; | |
164 | iov[0].iov_len = size; | |
165 | ||
166 | return tap_write_packet(s, iov, 1); | |
167 | } | |
168 | ||
966ea5ec MM |
169 | #ifndef __sun__ |
170 | ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen) | |
5281d757 MM |
171 | { |
172 | return read(tapfd, buf, maxlen); | |
173 | } | |
174 | #endif | |
175 | ||
4e68f7a0 | 176 | static void tap_send_completed(NetClientState *nc, ssize_t len) |
5281d757 | 177 | { |
3e35ba93 | 178 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
ec45f083 | 179 | tap_read_poll(s, true); |
5281d757 MM |
180 | } |
181 | ||
182 | static void tap_send(void *opaque) | |
183 | { | |
184 | TAPState *s = opaque; | |
185 | int size; | |
756ae78b | 186 | int packets = 0; |
5281d757 | 187 | |
a90a7425 | 188 | while (true) { |
5819c918 MM |
189 | uint8_t *buf = s->buf; |
190 | ||
191 | size = tap_read_packet(s->fd, s->buf, sizeof(s->buf)); | |
192 | if (size <= 0) { | |
193 | break; | |
194 | } | |
195 | ||
ef4252b1 MT |
196 | if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { |
197 | buf += s->host_vnet_hdr_len; | |
198 | size -= s->host_vnet_hdr_len; | |
5819c918 MM |
199 | } |
200 | ||
3e35ba93 | 201 | size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed); |
5819c918 | 202 | if (size == 0) { |
ec45f083 | 203 | tap_read_poll(s, false); |
68e5ec64 SH |
204 | break; |
205 | } else if (size < 0) { | |
206 | break; | |
5819c918 | 207 | } |
756ae78b WK |
208 | |
209 | /* | |
210 | * When the host keeps receiving more packets while tap_send() is | |
211 | * running we can hog the QEMU global mutex. Limit the number of | |
212 | * packets that are processed per tap_send() callback to prevent | |
213 | * stalling the guest. | |
214 | */ | |
215 | packets++; | |
216 | if (packets >= 50) { | |
217 | break; | |
218 | } | |
68e5ec64 | 219 | } |
5281d757 MM |
220 | } |
221 | ||
3bac80d3 | 222 | static bool tap_has_ufo(NetClientState *nc) |
5281d757 | 223 | { |
3e35ba93 | 224 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
5281d757 | 225 | |
f394b2e2 | 226 | assert(nc->info->type == NET_CLIENT_DRIVER_TAP); |
5281d757 MM |
227 | |
228 | return s->has_ufo; | |
229 | } | |
230 | ||
3bac80d3 | 231 | static bool tap_has_vnet_hdr(NetClientState *nc) |
5281d757 | 232 | { |
3e35ba93 | 233 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
5281d757 | 234 | |
f394b2e2 | 235 | assert(nc->info->type == NET_CLIENT_DRIVER_TAP); |
5281d757 | 236 | |
ef4252b1 | 237 | return !!s->host_vnet_hdr_len; |
5281d757 MM |
238 | } |
239 | ||
3bac80d3 | 240 | static bool tap_has_vnet_hdr_len(NetClientState *nc, int len) |
445d892f MT |
241 | { |
242 | TAPState *s = DO_UPCAST(TAPState, nc, nc); | |
243 | ||
f394b2e2 | 244 | assert(nc->info->type == NET_CLIENT_DRIVER_TAP); |
445d892f | 245 | |
e96dfd11 | 246 | return !!tap_probe_vnet_hdr_len(s->fd, len); |
445d892f MT |
247 | } |
248 | ||
3bac80d3 | 249 | static void tap_set_vnet_hdr_len(NetClientState *nc, int len) |
445d892f MT |
250 | { |
251 | TAPState *s = DO_UPCAST(TAPState, nc, nc); | |
252 | ||
f394b2e2 | 253 | assert(nc->info->type == NET_CLIENT_DRIVER_TAP); |
445d892f MT |
254 | assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) || |
255 | len == sizeof(struct virtio_net_hdr)); | |
256 | ||
257 | tap_fd_set_vnet_hdr_len(s->fd, len); | |
258 | s->host_vnet_hdr_len = len; | |
259 | } | |
260 | ||
3bac80d3 | 261 | static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr) |
5281d757 | 262 | { |
3e35ba93 | 263 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
5281d757 | 264 | |
f394b2e2 | 265 | assert(nc->info->type == NET_CLIENT_DRIVER_TAP); |
ef4252b1 | 266 | assert(!!s->host_vnet_hdr_len == using_vnet_hdr); |
5281d757 MM |
267 | |
268 | s->using_vnet_hdr = using_vnet_hdr; | |
269 | } | |
270 | ||
c80cd6bb GK |
271 | static int tap_set_vnet_le(NetClientState *nc, bool is_le) |
272 | { | |
273 | TAPState *s = DO_UPCAST(TAPState, nc, nc); | |
274 | ||
275 | return tap_fd_set_vnet_le(s->fd, is_le); | |
276 | } | |
277 | ||
278 | static int tap_set_vnet_be(NetClientState *nc, bool is_be) | |
279 | { | |
280 | TAPState *s = DO_UPCAST(TAPState, nc, nc); | |
281 | ||
282 | return tap_fd_set_vnet_be(s->fd, is_be); | |
283 | } | |
284 | ||
3bac80d3 | 285 | static void tap_set_offload(NetClientState *nc, int csum, int tso4, |
5281d757 MM |
286 | int tso6, int ecn, int ufo) |
287 | { | |
3e35ba93 | 288 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
27a6375d MT |
289 | if (s->fd < 0) { |
290 | return; | |
291 | } | |
5281d757 | 292 | |
27a6375d | 293 | tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo); |
5281d757 MM |
294 | } |
295 | ||
9e32ff32 MAL |
296 | static void tap_exit_notify(Notifier *notifier, void *data) |
297 | { | |
298 | TAPState *s = container_of(notifier, TAPState, exit); | |
299 | Error *err = NULL; | |
300 | ||
301 | if (s->down_script[0]) { | |
302 | launch_script(s->down_script, s->down_script_arg, s->fd, &err); | |
303 | if (err) { | |
304 | error_report_err(err); | |
305 | } | |
306 | } | |
307 | } | |
308 | ||
4e68f7a0 | 309 | static void tap_cleanup(NetClientState *nc) |
5281d757 | 310 | { |
3e35ba93 | 311 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
5281d757 | 312 | |
82b0d80e MT |
313 | if (s->vhost_net) { |
314 | vhost_net_cleanup(s->vhost_net); | |
e6bcb1b6 | 315 | g_free(s->vhost_net); |
43849424 | 316 | s->vhost_net = NULL; |
82b0d80e MT |
317 | } |
318 | ||
3e35ba93 | 319 | qemu_purge_queued_packets(nc); |
5281d757 | 320 | |
9e32ff32 MAL |
321 | tap_exit_notify(&s->exit, NULL); |
322 | qemu_remove_exit_notifier(&s->exit); | |
5281d757 | 323 | |
ec45f083 JW |
324 | tap_read_poll(s, false); |
325 | tap_write_poll(s, false); | |
5281d757 | 326 | close(s->fd); |
27a6375d | 327 | s->fd = -1; |
5281d757 MM |
328 | } |
329 | ||
4e68f7a0 | 330 | static void tap_poll(NetClientState *nc, bool enable) |
ceb69615 MT |
331 | { |
332 | TAPState *s = DO_UPCAST(TAPState, nc, nc); | |
333 | tap_read_poll(s, enable); | |
334 | tap_write_poll(s, enable); | |
335 | } | |
336 | ||
4e68f7a0 | 337 | int tap_get_fd(NetClientState *nc) |
95d528a2 MT |
338 | { |
339 | TAPState *s = DO_UPCAST(TAPState, nc, nc); | |
f394b2e2 | 340 | assert(nc->info->type == NET_CLIENT_DRIVER_TAP); |
95d528a2 MT |
341 | return s->fd; |
342 | } | |
343 | ||
5281d757 MM |
344 | /* fd support */ |
345 | ||
3e35ba93 | 346 | static NetClientInfo net_tap_info = { |
f394b2e2 | 347 | .type = NET_CLIENT_DRIVER_TAP, |
3e35ba93 MM |
348 | .size = sizeof(TAPState), |
349 | .receive = tap_receive, | |
350 | .receive_raw = tap_receive_raw, | |
351 | .receive_iov = tap_receive_iov, | |
ceb69615 | 352 | .poll = tap_poll, |
3e35ba93 | 353 | .cleanup = tap_cleanup, |
2e753bcc VM |
354 | .has_ufo = tap_has_ufo, |
355 | .has_vnet_hdr = tap_has_vnet_hdr, | |
356 | .has_vnet_hdr_len = tap_has_vnet_hdr_len, | |
357 | .using_vnet_hdr = tap_using_vnet_hdr, | |
358 | .set_offload = tap_set_offload, | |
359 | .set_vnet_hdr_len = tap_set_vnet_hdr_len, | |
c80cd6bb GK |
360 | .set_vnet_le = tap_set_vnet_le, |
361 | .set_vnet_be = tap_set_vnet_be, | |
3e35ba93 MM |
362 | }; |
363 | ||
4e68f7a0 | 364 | static TAPState *net_tap_fd_init(NetClientState *peer, |
5281d757 MM |
365 | const char *model, |
366 | const char *name, | |
367 | int fd, | |
368 | int vnet_hdr) | |
369 | { | |
4e68f7a0 | 370 | NetClientState *nc; |
5281d757 | 371 | TAPState *s; |
5281d757 | 372 | |
ab5f3f84 | 373 | nc = qemu_new_net_client(&net_tap_info, peer, model, name); |
3e35ba93 MM |
374 | |
375 | s = DO_UPCAST(TAPState, nc, nc); | |
376 | ||
5281d757 | 377 | s->fd = fd; |
ef4252b1 | 378 | s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0; |
ec45f083 | 379 | s->using_vnet_hdr = false; |
9c282718 | 380 | s->has_ufo = tap_probe_has_ufo(s->fd); |
16dbaf90 | 381 | s->enabled = true; |
3e35ba93 | 382 | tap_set_offload(&s->nc, 0, 0, 0, 0, 0); |
58ddcd50 MT |
383 | /* |
384 | * Make sure host header length is set correctly in tap: | |
385 | * it might have been modified by another instance of qemu. | |
386 | */ | |
387 | if (tap_probe_vnet_hdr_len(s->fd, s->host_vnet_hdr_len)) { | |
388 | tap_fd_set_vnet_hdr_len(s->fd, s->host_vnet_hdr_len); | |
389 | } | |
ec45f083 | 390 | tap_read_poll(s, true); |
82b0d80e | 391 | s->vhost_net = NULL; |
9e32ff32 MAL |
392 | |
393 | s->exit.notify = tap_exit_notify; | |
394 | qemu_add_exit_notifier(&s->exit); | |
395 | ||
5281d757 MM |
396 | return s; |
397 | } | |
398 | ||
ac4fcf56 MA |
399 | static void launch_script(const char *setup_script, const char *ifname, |
400 | int fd, Error **errp) | |
5281d757 | 401 | { |
5281d757 MM |
402 | int pid, status; |
403 | char *args[3]; | |
404 | char **parg; | |
405 | ||
5281d757 MM |
406 | /* try to launch network script */ |
407 | pid = fork(); | |
ac4fcf56 MA |
408 | if (pid < 0) { |
409 | error_setg_errno(errp, errno, "could not launch network script %s", | |
410 | setup_script); | |
411 | return; | |
412 | } | |
5281d757 MM |
413 | if (pid == 0) { |
414 | int open_max = sysconf(_SC_OPEN_MAX), i; | |
415 | ||
13a12f86 PG |
416 | for (i = 3; i < open_max; i++) { |
417 | if (i != fd) { | |
5281d757 MM |
418 | close(i); |
419 | } | |
420 | } | |
421 | parg = args; | |
422 | *parg++ = (char *)setup_script; | |
423 | *parg++ = (char *)ifname; | |
9678d950 | 424 | *parg = NULL; |
5281d757 MM |
425 | execv(setup_script, args); |
426 | _exit(1); | |
ac4fcf56 | 427 | } else { |
5281d757 MM |
428 | while (waitpid(pid, &status, 0) != pid) { |
429 | /* loop */ | |
430 | } | |
5281d757 MM |
431 | |
432 | if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { | |
ac4fcf56 | 433 | return; |
5281d757 | 434 | } |
ac4fcf56 MA |
435 | error_setg(errp, "network script %s failed with status %d", |
436 | setup_script, status); | |
5281d757 | 437 | } |
5281d757 MM |
438 | } |
439 | ||
a7c36ee4 CB |
440 | static int recv_fd(int c) |
441 | { | |
442 | int fd; | |
443 | uint8_t msgbuf[CMSG_SPACE(sizeof(fd))]; | |
444 | struct msghdr msg = { | |
445 | .msg_control = msgbuf, | |
446 | .msg_controllen = sizeof(msgbuf), | |
447 | }; | |
448 | struct cmsghdr *cmsg; | |
449 | struct iovec iov; | |
450 | uint8_t req[1]; | |
451 | ssize_t len; | |
452 | ||
453 | cmsg = CMSG_FIRSTHDR(&msg); | |
454 | cmsg->cmsg_level = SOL_SOCKET; | |
455 | cmsg->cmsg_type = SCM_RIGHTS; | |
456 | cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); | |
457 | msg.msg_controllen = cmsg->cmsg_len; | |
458 | ||
459 | iov.iov_base = req; | |
460 | iov.iov_len = sizeof(req); | |
461 | ||
462 | msg.msg_iov = &iov; | |
463 | msg.msg_iovlen = 1; | |
464 | ||
465 | len = recvmsg(c, &msg, 0); | |
466 | if (len > 0) { | |
467 | memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd)); | |
468 | return fd; | |
469 | } | |
470 | ||
471 | return len; | |
472 | } | |
473 | ||
a8a21be9 MA |
474 | static int net_bridge_run_helper(const char *helper, const char *bridge, |
475 | Error **errp) | |
a7c36ee4 CB |
476 | { |
477 | sigset_t oldmask, mask; | |
478 | int pid, status; | |
479 | char *args[5]; | |
480 | char **parg; | |
481 | int sv[2]; | |
482 | ||
483 | sigemptyset(&mask); | |
484 | sigaddset(&mask, SIGCHLD); | |
485 | sigprocmask(SIG_BLOCK, &mask, &oldmask); | |
486 | ||
487 | if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { | |
a8a21be9 | 488 | error_setg_errno(errp, errno, "socketpair() failed"); |
a7c36ee4 CB |
489 | return -1; |
490 | } | |
491 | ||
492 | /* try to launch bridge helper */ | |
493 | pid = fork(); | |
a8a21be9 MA |
494 | if (pid < 0) { |
495 | error_setg_errno(errp, errno, "Can't fork bridge helper"); | |
496 | return -1; | |
497 | } | |
a7c36ee4 CB |
498 | if (pid == 0) { |
499 | int open_max = sysconf(_SC_OPEN_MAX), i; | |
500 | char fd_buf[6+10]; | |
501 | char br_buf[6+IFNAMSIZ] = {0}; | |
502 | char helper_cmd[PATH_MAX + sizeof(fd_buf) + sizeof(br_buf) + 15]; | |
503 | ||
13a12f86 PG |
504 | for (i = 3; i < open_max; i++) { |
505 | if (i != sv[1]) { | |
a7c36ee4 CB |
506 | close(i); |
507 | } | |
508 | } | |
509 | ||
510 | snprintf(fd_buf, sizeof(fd_buf), "%s%d", "--fd=", sv[1]); | |
511 | ||
512 | if (strrchr(helper, ' ') || strrchr(helper, '\t')) { | |
513 | /* assume helper is a command */ | |
514 | ||
515 | if (strstr(helper, "--br=") == NULL) { | |
516 | snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge); | |
517 | } | |
518 | ||
519 | snprintf(helper_cmd, sizeof(helper_cmd), "%s %s %s %s", | |
520 | helper, "--use-vnet", fd_buf, br_buf); | |
521 | ||
522 | parg = args; | |
523 | *parg++ = (char *)"sh"; | |
524 | *parg++ = (char *)"-c"; | |
525 | *parg++ = helper_cmd; | |
526 | *parg++ = NULL; | |
527 | ||
528 | execv("/bin/sh", args); | |
529 | } else { | |
530 | /* assume helper is just the executable path name */ | |
531 | ||
532 | snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge); | |
533 | ||
534 | parg = args; | |
535 | *parg++ = (char *)helper; | |
536 | *parg++ = (char *)"--use-vnet"; | |
537 | *parg++ = fd_buf; | |
538 | *parg++ = br_buf; | |
539 | *parg++ = NULL; | |
540 | ||
541 | execv(helper, args); | |
542 | } | |
543 | _exit(1); | |
544 | ||
a8a21be9 | 545 | } else { |
a7c36ee4 | 546 | int fd; |
a8a21be9 | 547 | int saved_errno; |
a7c36ee4 CB |
548 | |
549 | close(sv[1]); | |
550 | ||
551 | do { | |
552 | fd = recv_fd(sv[0]); | |
553 | } while (fd == -1 && errno == EINTR); | |
a8a21be9 | 554 | saved_errno = errno; |
a7c36ee4 CB |
555 | |
556 | close(sv[0]); | |
557 | ||
558 | while (waitpid(pid, &status, 0) != pid) { | |
559 | /* loop */ | |
560 | } | |
561 | sigprocmask(SIG_SETMASK, &oldmask, NULL); | |
562 | if (fd < 0) { | |
a8a21be9 MA |
563 | error_setg_errno(errp, saved_errno, |
564 | "failed to recv file descriptor"); | |
a7c36ee4 CB |
565 | return -1; |
566 | } | |
a8a21be9 MA |
567 | if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { |
568 | error_setg(errp, "bridge helper failed"); | |
569 | return -1; | |
a7c36ee4 | 570 | } |
a8a21be9 | 571 | return fd; |
a7c36ee4 | 572 | } |
a7c36ee4 CB |
573 | } |
574 | ||
cebea510 | 575 | int net_init_bridge(const Netdev *netdev, const char *name, |
a30ecde6 | 576 | NetClientState *peer, Error **errp) |
a7c36ee4 | 577 | { |
f79b51b0 LE |
578 | const NetdevBridgeOptions *bridge; |
579 | const char *helper, *br; | |
a7c36ee4 CB |
580 | TAPState *s; |
581 | int fd, vnet_hdr; | |
582 | ||
f394b2e2 EB |
583 | assert(netdev->type == NET_CLIENT_DRIVER_BRIDGE); |
584 | bridge = &netdev->u.bridge; | |
f79b51b0 LE |
585 | |
586 | helper = bridge->has_helper ? bridge->helper : DEFAULT_BRIDGE_HELPER; | |
587 | br = bridge->has_br ? bridge->br : DEFAULT_BRIDGE_INTERFACE; | |
a7c36ee4 | 588 | |
a8a21be9 | 589 | fd = net_bridge_run_helper(helper, br, errp); |
a7c36ee4 CB |
590 | if (fd == -1) { |
591 | return -1; | |
592 | } | |
593 | ||
594 | fcntl(fd, F_SETFL, O_NONBLOCK); | |
a7c36ee4 | 595 | vnet_hdr = tap_probe_vnet_hdr(fd); |
d33d93b2 | 596 | s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr); |
a7c36ee4 | 597 | |
f79b51b0 LE |
598 | snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper, |
599 | br); | |
a7c36ee4 CB |
600 | |
601 | return 0; | |
602 | } | |
603 | ||
08c573a8 LE |
604 | static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr, |
605 | const char *setup_script, char *ifname, | |
468dd824 | 606 | size_t ifname_sz, int mq_required, Error **errp) |
5281d757 | 607 | { |
ac4fcf56 | 608 | Error *err = NULL; |
5281d757 | 609 | int fd, vnet_hdr_required; |
5281d757 | 610 | |
08c573a8 LE |
611 | if (tap->has_vnet_hdr) { |
612 | *vnet_hdr = tap->vnet_hdr; | |
5281d757 MM |
613 | vnet_hdr_required = *vnet_hdr; |
614 | } else { | |
08c573a8 | 615 | *vnet_hdr = 1; |
5281d757 MM |
616 | vnet_hdr_required = 0; |
617 | } | |
618 | ||
264986e2 | 619 | TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required, |
468dd824 | 620 | mq_required, errp)); |
5281d757 MM |
621 | if (fd < 0) { |
622 | return -1; | |
623 | } | |
624 | ||
5281d757 MM |
625 | if (setup_script && |
626 | setup_script[0] != '\0' && | |
ac4fcf56 MA |
627 | strcmp(setup_script, "no") != 0) { |
628 | launch_script(setup_script, ifname, fd, &err); | |
629 | if (err) { | |
468dd824 | 630 | error_propagate(errp, err); |
ac4fcf56 MA |
631 | close(fd); |
632 | return -1; | |
633 | } | |
5281d757 MM |
634 | } |
635 | ||
5281d757 MM |
636 | return fd; |
637 | } | |
638 | ||
264986e2 JW |
639 | #define MAX_TAP_QUEUES 1024 |
640 | ||
445f116c MA |
641 | static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, |
642 | const char *model, const char *name, | |
643 | const char *ifname, const char *script, | |
644 | const char *downscript, const char *vhostfdname, | |
645 | int vnet_hdr, int fd, Error **errp) | |
5193e5fb | 646 | { |
1677f4c6 | 647 | Error *err = NULL; |
da4a4eac | 648 | TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr); |
81647a65 | 649 | int vhostfd; |
5193e5fb | 650 | |
80b832c3 MA |
651 | tap_set_sndbuf(s->fd, tap, &err); |
652 | if (err) { | |
445f116c MA |
653 | error_propagate(errp, err); |
654 | return; | |
5193e5fb JW |
655 | } |
656 | ||
264986e2 | 657 | if (tap->has_fd || tap->has_fds) { |
5193e5fb JW |
658 | snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd); |
659 | } else if (tap->has_helper) { | |
660 | snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s", | |
661 | tap->helper); | |
662 | } else { | |
5193e5fb JW |
663 | snprintf(s->nc.info_str, sizeof(s->nc.info_str), |
664 | "ifname=%s,script=%s,downscript=%s", ifname, script, | |
665 | downscript); | |
666 | ||
667 | if (strcmp(downscript, "no") != 0) { | |
668 | snprintf(s->down_script, sizeof(s->down_script), "%s", downscript); | |
669 | snprintf(s->down_script_arg, sizeof(s->down_script_arg), | |
670 | "%s", ifname); | |
671 | } | |
672 | } | |
673 | ||
674 | if (tap->has_vhost ? tap->vhost : | |
675 | vhostfdname || (tap->has_vhostforce && tap->vhostforce)) { | |
81647a65 NN |
676 | VhostNetOptions options; |
677 | ||
1a1bfac9 | 678 | options.backend_type = VHOST_BACKEND_TYPE_KERNEL; |
81647a65 | 679 | options.net_backend = &s->nc; |
69e87b32 JW |
680 | if (tap->has_poll_us) { |
681 | options.busyloop_timeout = tap->poll_us; | |
682 | } else { | |
683 | options.busyloop_timeout = 0; | |
684 | } | |
5193e5fb | 685 | |
3a2d44f6 | 686 | if (vhostfdname) { |
1677f4c6 | 687 | vhostfd = monitor_fd_param(cur_mon, vhostfdname, &err); |
5193e5fb | 688 | if (vhostfd == -1) { |
445f116c MA |
689 | error_propagate(errp, err); |
690 | return; | |
5193e5fb JW |
691 | } |
692 | } else { | |
81647a65 NN |
693 | vhostfd = open("/dev/vhost-net", O_RDWR); |
694 | if (vhostfd < 0) { | |
445f116c MA |
695 | error_setg_errno(errp, errno, |
696 | "tap: open vhost char device failed"); | |
697 | return; | |
81647a65 | 698 | } |
c471ad0e | 699 | fcntl(vhostfd, F_SETFL, O_NONBLOCK); |
5193e5fb | 700 | } |
81647a65 | 701 | options.opaque = (void *)(uintptr_t)vhostfd; |
5193e5fb | 702 | |
81647a65 | 703 | s->vhost_net = vhost_net_init(&options); |
5193e5fb | 704 | if (!s->vhost_net) { |
445f116c MA |
705 | error_setg(errp, |
706 | "vhost-net requested but could not be initialized"); | |
707 | return; | |
5193e5fb | 708 | } |
3a2d44f6 | 709 | } else if (vhostfdname) { |
69e87b32 | 710 | error_setg(errp, "vhostfd(s)= is not valid without vhost"); |
5193e5fb | 711 | } |
5193e5fb JW |
712 | } |
713 | ||
264986e2 JW |
714 | static int get_fds(char *str, char *fds[], int max) |
715 | { | |
716 | char *ptr = str, *this; | |
717 | size_t len = strlen(str); | |
718 | int i = 0; | |
719 | ||
720 | while (i < max && ptr < str + len) { | |
721 | this = strchr(ptr, ':'); | |
722 | ||
723 | if (this == NULL) { | |
724 | fds[i] = g_strdup(ptr); | |
725 | } else { | |
726 | fds[i] = g_strndup(ptr, this - ptr); | |
727 | } | |
728 | ||
729 | i++; | |
730 | if (this == NULL) { | |
731 | break; | |
732 | } else { | |
733 | ptr = this + 1; | |
734 | } | |
735 | } | |
736 | ||
737 | return i; | |
738 | } | |
739 | ||
cebea510 | 740 | int net_init_tap(const Netdev *netdev, const char *name, |
a30ecde6 | 741 | NetClientState *peer, Error **errp) |
5281d757 | 742 | { |
08c573a8 | 743 | const NetdevTapOptions *tap; |
264986e2 | 744 | int fd, vnet_hdr = 0, i = 0, queues; |
08c573a8 LE |
745 | /* for the no-fd, no-helper case */ |
746 | const char *script = NULL; /* suppress wrong "uninit'd use" gcc warning */ | |
5193e5fb | 747 | const char *downscript = NULL; |
1677f4c6 | 748 | Error *err = NULL; |
264986e2 | 749 | const char *vhostfdname; |
08c573a8 LE |
750 | char ifname[128]; |
751 | ||
f394b2e2 EB |
752 | assert(netdev->type == NET_CLIENT_DRIVER_TAP); |
753 | tap = &netdev->u.tap; | |
264986e2 JW |
754 | queues = tap->has_queues ? tap->queues : 1; |
755 | vhostfdname = tap->has_vhostfd ? tap->vhostfd : NULL; | |
5281d757 | 756 | |
ce675a75 JW |
757 | /* QEMU vlans does not support multiqueue tap, in this case peer is set. |
758 | * For -netdev, peer is always NULL. */ | |
759 | if (peer && (tap->has_queues || tap->has_fds || tap->has_vhostfds)) { | |
a3088177 | 760 | error_setg(errp, "Multiqueue tap cannot be used with QEMU vlans"); |
ce675a75 JW |
761 | return -1; |
762 | } | |
763 | ||
08c573a8 LE |
764 | if (tap->has_fd) { |
765 | if (tap->has_ifname || tap->has_script || tap->has_downscript || | |
264986e2 | 766 | tap->has_vnet_hdr || tap->has_helper || tap->has_queues || |
c87826a8 | 767 | tap->has_fds || tap->has_vhostfds) { |
a3088177 MA |
768 | error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, " |
769 | "helper=, queues=, fds=, and vhostfds= " | |
770 | "are invalid with fd="); | |
5281d757 MM |
771 | return -1; |
772 | } | |
773 | ||
1677f4c6 | 774 | fd = monitor_fd_param(cur_mon, tap->fd, &err); |
5281d757 | 775 | if (fd == -1) { |
a3088177 | 776 | error_propagate(errp, err); |
5281d757 MM |
777 | return -1; |
778 | } | |
779 | ||
780 | fcntl(fd, F_SETFL, O_NONBLOCK); | |
781 | ||
782 | vnet_hdr = tap_probe_vnet_hdr(fd); | |
a7c36ee4 | 783 | |
445f116c MA |
784 | net_init_tap_one(tap, peer, "tap", name, NULL, |
785 | script, downscript, | |
786 | vhostfdname, vnet_hdr, fd, &err); | |
787 | if (err) { | |
a3088177 | 788 | error_propagate(errp, err); |
264986e2 JW |
789 | return -1; |
790 | } | |
791 | } else if (tap->has_fds) { | |
fac7d7b1 PM |
792 | char **fds; |
793 | char **vhost_fds; | |
264986e2 JW |
794 | int nfds, nvhosts; |
795 | ||
796 | if (tap->has_ifname || tap->has_script || tap->has_downscript || | |
797 | tap->has_vnet_hdr || tap->has_helper || tap->has_queues || | |
c87826a8 | 798 | tap->has_vhostfd) { |
a3088177 MA |
799 | error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, " |
800 | "helper=, queues=, and vhostfd= " | |
801 | "are invalid with fds="); | |
264986e2 JW |
802 | return -1; |
803 | } | |
804 | ||
fac7d7b1 PM |
805 | fds = g_new0(char *, MAX_TAP_QUEUES); |
806 | vhost_fds = g_new0(char *, MAX_TAP_QUEUES); | |
807 | ||
264986e2 JW |
808 | nfds = get_fds(tap->fds, fds, MAX_TAP_QUEUES); |
809 | if (tap->has_vhostfds) { | |
810 | nvhosts = get_fds(tap->vhostfds, vhost_fds, MAX_TAP_QUEUES); | |
811 | if (nfds != nvhosts) { | |
a3088177 MA |
812 | error_setg(errp, "The number of fds passed does not match " |
813 | "the number of vhostfds passed"); | |
091a6b2a | 814 | goto free_fail; |
264986e2 JW |
815 | } |
816 | } | |
817 | ||
818 | for (i = 0; i < nfds; i++) { | |
1677f4c6 | 819 | fd = monitor_fd_param(cur_mon, fds[i], &err); |
264986e2 | 820 | if (fd == -1) { |
a3088177 | 821 | error_propagate(errp, err); |
091a6b2a | 822 | goto free_fail; |
264986e2 JW |
823 | } |
824 | ||
825 | fcntl(fd, F_SETFL, O_NONBLOCK); | |
a7c36ee4 | 826 | |
264986e2 JW |
827 | if (i == 0) { |
828 | vnet_hdr = tap_probe_vnet_hdr(fd); | |
829 | } else if (vnet_hdr != tap_probe_vnet_hdr(fd)) { | |
a3088177 MA |
830 | error_setg(errp, |
831 | "vnet_hdr not consistent across given tap fds"); | |
091a6b2a | 832 | goto free_fail; |
264986e2 JW |
833 | } |
834 | ||
445f116c MA |
835 | net_init_tap_one(tap, peer, "tap", name, ifname, |
836 | script, downscript, | |
837 | tap->has_vhostfds ? vhost_fds[i] : NULL, | |
838 | vnet_hdr, fd, &err); | |
839 | if (err) { | |
a3088177 | 840 | error_propagate(errp, err); |
091a6b2a | 841 | goto free_fail; |
264986e2 JW |
842 | } |
843 | } | |
11196e95 ZJ |
844 | g_free(fds); |
845 | g_free(vhost_fds); | |
091a6b2a PB |
846 | return 0; |
847 | ||
848 | free_fail: | |
849 | for (i = 0; i < nfds; i++) { | |
850 | g_free(fds[i]); | |
851 | g_free(vhost_fds[i]); | |
852 | } | |
853 | g_free(fds); | |
854 | g_free(vhost_fds); | |
855 | return -1; | |
08c573a8 LE |
856 | } else if (tap->has_helper) { |
857 | if (tap->has_ifname || tap->has_script || tap->has_downscript || | |
c87826a8 | 858 | tap->has_vnet_hdr || tap->has_queues || tap->has_vhostfds) { |
a3088177 MA |
859 | error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, " |
860 | "queues=, and vhostfds= are invalid with helper="); | |
a7c36ee4 CB |
861 | return -1; |
862 | } | |
863 | ||
584613ea AK |
864 | fd = net_bridge_run_helper(tap->helper, |
865 | tap->has_br ? | |
866 | tap->br : DEFAULT_BRIDGE_INTERFACE, | |
a8a21be9 | 867 | errp); |
a7c36ee4 CB |
868 | if (fd == -1) { |
869 | return -1; | |
870 | } | |
871 | ||
872 | fcntl(fd, F_SETFL, O_NONBLOCK); | |
a7c36ee4 CB |
873 | vnet_hdr = tap_probe_vnet_hdr(fd); |
874 | ||
445f116c MA |
875 | net_init_tap_one(tap, peer, "bridge", name, ifname, |
876 | script, downscript, vhostfdname, | |
877 | vnet_hdr, fd, &err); | |
878 | if (err) { | |
a3088177 | 879 | error_propagate(errp, err); |
84f8f3da | 880 | close(fd); |
264986e2 JW |
881 | return -1; |
882 | } | |
5281d757 | 883 | } else { |
c87826a8 | 884 | if (tap->has_vhostfds) { |
a3088177 | 885 | error_setg(errp, "vhostfds= is invalid if fds= wasn't specified"); |
c87826a8 JW |
886 | return -1; |
887 | } | |
08c573a8 | 888 | script = tap->has_script ? tap->script : DEFAULT_NETWORK_SCRIPT; |
5193e5fb JW |
889 | downscript = tap->has_downscript ? tap->downscript : |
890 | DEFAULT_NETWORK_DOWN_SCRIPT; | |
264986e2 JW |
891 | |
892 | if (tap->has_ifname) { | |
893 | pstrcpy(ifname, sizeof ifname, tap->ifname); | |
894 | } else { | |
895 | ifname[0] = '\0'; | |
929fe497 | 896 | } |
a7c36ee4 | 897 | |
264986e2 JW |
898 | for (i = 0; i < queues; i++) { |
899 | fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script, | |
a3088177 | 900 | ifname, sizeof ifname, queues > 1, errp); |
264986e2 JW |
901 | if (fd == -1) { |
902 | return -1; | |
903 | } | |
904 | ||
905 | if (queues > 1 && i == 0 && !tap->has_ifname) { | |
906 | if (tap_fd_get_ifname(fd, ifname)) { | |
a3088177 | 907 | error_setg(errp, "Fail to get ifname"); |
84f8f3da | 908 | close(fd); |
264986e2 JW |
909 | return -1; |
910 | } | |
911 | } | |
912 | ||
445f116c MA |
913 | net_init_tap_one(tap, peer, "tap", name, ifname, |
914 | i >= 1 ? "no" : script, | |
915 | i >= 1 ? "no" : downscript, | |
916 | vhostfdname, vnet_hdr, fd, &err); | |
917 | if (err) { | |
a3088177 | 918 | error_propagate(errp, err); |
84f8f3da | 919 | close(fd); |
264986e2 JW |
920 | return -1; |
921 | } | |
922 | } | |
5281d757 MM |
923 | } |
924 | ||
264986e2 | 925 | return 0; |
5281d757 | 926 | } |
b202554c | 927 | |
4e68f7a0 | 928 | VHostNetState *tap_get_vhost_net(NetClientState *nc) |
b202554c MT |
929 | { |
930 | TAPState *s = DO_UPCAST(TAPState, nc, nc); | |
f394b2e2 | 931 | assert(nc->info->type == NET_CLIENT_DRIVER_TAP); |
b202554c MT |
932 | return s->vhost_net; |
933 | } | |
16dbaf90 JW |
934 | |
935 | int tap_enable(NetClientState *nc) | |
936 | { | |
937 | TAPState *s = DO_UPCAST(TAPState, nc, nc); | |
938 | int ret; | |
939 | ||
940 | if (s->enabled) { | |
941 | return 0; | |
942 | } else { | |
943 | ret = tap_fd_enable(s->fd); | |
944 | if (ret == 0) { | |
945 | s->enabled = true; | |
946 | tap_update_fd_handler(s); | |
947 | } | |
948 | return ret; | |
949 | } | |
950 | } | |
951 | ||
952 | int tap_disable(NetClientState *nc) | |
953 | { | |
954 | TAPState *s = DO_UPCAST(TAPState, nc, nc); | |
955 | int ret; | |
956 | ||
957 | if (s->enabled == 0) { | |
958 | return 0; | |
959 | } else { | |
960 | ret = tap_fd_disable(s->fd); | |
961 | if (ret == 0) { | |
962 | qemu_purge_queued_packets(nc); | |
963 | s->enabled = false; | |
964 | tap_update_fd_handler(s); | |
965 | } | |
966 | return ret; | |
967 | } | |
968 | } |