]>
Commit | Line | Data |
---|---|---|
5281d757 MM |
1 | /* |
2 | * QEMU System Emulator | |
3 | * | |
4 | * Copyright (c) 2003-2008 Fabrice Bellard | |
5 | * Copyright (c) 2009 Red Hat, Inc. | |
6 | * | |
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
8 | * of this software and associated documentation files (the "Software"), to deal | |
9 | * in the Software without restriction, including without limitation the rights | |
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
11 | * copies of the Software, and to permit persons to whom the Software is | |
12 | * furnished to do so, subject to the following conditions: | |
13 | * | |
14 | * The above copyright notice and this permission notice shall be included in | |
15 | * all copies or substantial portions of the Software. | |
16 | * | |
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
23 | * THE SOFTWARE. | |
24 | */ | |
25 | ||
2744d920 | 26 | #include "qemu/osdep.h" |
1422e32d | 27 | #include "tap_int.h" |
5281d757 | 28 | |
5281d757 | 29 | |
5281d757 | 30 | #include <sys/ioctl.h> |
5281d757 | 31 | #include <sys/wait.h> |
71f4effc | 32 | #include <sys/socket.h> |
5281d757 MM |
33 | #include <net/if.h> |
34 | ||
1422e32d | 35 | #include "net/net.h" |
a245fc18 | 36 | #include "clients.h" |
83c9089e | 37 | #include "monitor/monitor.h" |
9c17d615 | 38 | #include "sysemu/sysemu.h" |
da34e65c | 39 | #include "qapi/error.h" |
5281d757 | 40 | #include "qemu-common.h" |
f348b6d1 | 41 | #include "qemu/cutils.h" |
1de7afc9 | 42 | #include "qemu/error-report.h" |
5281d757 | 43 | |
1422e32d | 44 | #include "net/tap.h" |
5281d757 | 45 | |
0d09e41a | 46 | #include "net/vhost_net.h" |
82b0d80e | 47 | |
5281d757 | 48 | typedef struct TAPState { |
4e68f7a0 | 49 | NetClientState nc; |
5281d757 MM |
50 | int fd; |
51 | char down_script[1024]; | |
52 | char down_script_arg[128]; | |
d32fcad3 | 53 | uint8_t buf[NET_BUFSIZE]; |
ec45f083 JW |
54 | bool read_poll; |
55 | bool write_poll; | |
56 | bool using_vnet_hdr; | |
57 | bool has_ufo; | |
16dbaf90 | 58 | bool enabled; |
82b0d80e | 59 | VHostNetState *vhost_net; |
ef4252b1 | 60 | unsigned host_vnet_hdr_len; |
9e32ff32 | 61 | Notifier exit; |
5281d757 MM |
62 | } TAPState; |
63 | ||
ac4fcf56 MA |
64 | static void launch_script(const char *setup_script, const char *ifname, |
65 | int fd, Error **errp); | |
5281d757 | 66 | |
5281d757 MM |
67 | static void tap_send(void *opaque); |
68 | static void tap_writable(void *opaque); | |
69 | ||
70 | static void tap_update_fd_handler(TAPState *s) | |
71 | { | |
82e1cc4b FZ |
72 | qemu_set_fd_handler(s->fd, |
73 | s->read_poll && s->enabled ? tap_send : NULL, | |
74 | s->write_poll && s->enabled ? tap_writable : NULL, | |
75 | s); | |
5281d757 MM |
76 | } |
77 | ||
ec45f083 | 78 | static void tap_read_poll(TAPState *s, bool enable) |
5281d757 | 79 | { |
ec45f083 | 80 | s->read_poll = enable; |
5281d757 MM |
81 | tap_update_fd_handler(s); |
82 | } | |
83 | ||
ec45f083 | 84 | static void tap_write_poll(TAPState *s, bool enable) |
5281d757 | 85 | { |
ec45f083 | 86 | s->write_poll = enable; |
5281d757 MM |
87 | tap_update_fd_handler(s); |
88 | } | |
89 | ||
90 | static void tap_writable(void *opaque) | |
91 | { | |
92 | TAPState *s = opaque; | |
93 | ||
ec45f083 | 94 | tap_write_poll(s, false); |
5281d757 | 95 | |
3e35ba93 | 96 | qemu_flush_queued_packets(&s->nc); |
5281d757 MM |
97 | } |
98 | ||
99 | static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt) | |
100 | { | |
101 | ssize_t len; | |
102 | ||
103 | do { | |
104 | len = writev(s->fd, iov, iovcnt); | |
105 | } while (len == -1 && errno == EINTR); | |
106 | ||
107 | if (len == -1 && errno == EAGAIN) { | |
ec45f083 | 108 | tap_write_poll(s, true); |
5281d757 MM |
109 | return 0; |
110 | } | |
111 | ||
112 | return len; | |
113 | } | |
114 | ||
4e68f7a0 | 115 | static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov, |
5281d757 MM |
116 | int iovcnt) |
117 | { | |
3e35ba93 | 118 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
5281d757 MM |
119 | const struct iovec *iovp = iov; |
120 | struct iovec iov_copy[iovcnt + 1]; | |
ef4252b1 | 121 | struct virtio_net_hdr_mrg_rxbuf hdr = { }; |
5281d757 | 122 | |
ef4252b1 | 123 | if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { |
5281d757 | 124 | iov_copy[0].iov_base = &hdr; |
ef4252b1 | 125 | iov_copy[0].iov_len = s->host_vnet_hdr_len; |
5281d757 MM |
126 | memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov)); |
127 | iovp = iov_copy; | |
128 | iovcnt++; | |
129 | } | |
130 | ||
131 | return tap_write_packet(s, iovp, iovcnt); | |
132 | } | |
133 | ||
4e68f7a0 | 134 | static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size) |
5281d757 | 135 | { |
3e35ba93 | 136 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
5281d757 MM |
137 | struct iovec iov[2]; |
138 | int iovcnt = 0; | |
ef4252b1 | 139 | struct virtio_net_hdr_mrg_rxbuf hdr = { }; |
5281d757 | 140 | |
ef4252b1 | 141 | if (s->host_vnet_hdr_len) { |
5281d757 | 142 | iov[iovcnt].iov_base = &hdr; |
ef4252b1 | 143 | iov[iovcnt].iov_len = s->host_vnet_hdr_len; |
5281d757 MM |
144 | iovcnt++; |
145 | } | |
146 | ||
147 | iov[iovcnt].iov_base = (char *)buf; | |
148 | iov[iovcnt].iov_len = size; | |
149 | iovcnt++; | |
150 | ||
151 | return tap_write_packet(s, iov, iovcnt); | |
152 | } | |
153 | ||
4e68f7a0 | 154 | static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size) |
5281d757 | 155 | { |
3e35ba93 | 156 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
5281d757 MM |
157 | struct iovec iov[1]; |
158 | ||
ef4252b1 | 159 | if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { |
3e35ba93 | 160 | return tap_receive_raw(nc, buf, size); |
5281d757 MM |
161 | } |
162 | ||
163 | iov[0].iov_base = (char *)buf; | |
164 | iov[0].iov_len = size; | |
165 | ||
166 | return tap_write_packet(s, iov, 1); | |
167 | } | |
168 | ||
966ea5ec MM |
169 | #ifndef __sun__ |
170 | ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen) | |
5281d757 MM |
171 | { |
172 | return read(tapfd, buf, maxlen); | |
173 | } | |
174 | #endif | |
175 | ||
4e68f7a0 | 176 | static void tap_send_completed(NetClientState *nc, ssize_t len) |
5281d757 | 177 | { |
3e35ba93 | 178 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
ec45f083 | 179 | tap_read_poll(s, true); |
5281d757 MM |
180 | } |
181 | ||
182 | static void tap_send(void *opaque) | |
183 | { | |
184 | TAPState *s = opaque; | |
185 | int size; | |
756ae78b | 186 | int packets = 0; |
5281d757 | 187 | |
a90a7425 | 188 | while (true) { |
5819c918 MM |
189 | uint8_t *buf = s->buf; |
190 | ||
191 | size = tap_read_packet(s->fd, s->buf, sizeof(s->buf)); | |
192 | if (size <= 0) { | |
193 | break; | |
194 | } | |
195 | ||
ef4252b1 MT |
196 | if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { |
197 | buf += s->host_vnet_hdr_len; | |
198 | size -= s->host_vnet_hdr_len; | |
5819c918 MM |
199 | } |
200 | ||
3e35ba93 | 201 | size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed); |
5819c918 | 202 | if (size == 0) { |
ec45f083 | 203 | tap_read_poll(s, false); |
68e5ec64 SH |
204 | break; |
205 | } else if (size < 0) { | |
206 | break; | |
5819c918 | 207 | } |
756ae78b WK |
208 | |
209 | /* | |
210 | * When the host keeps receiving more packets while tap_send() is | |
211 | * running we can hog the QEMU global mutex. Limit the number of | |
212 | * packets that are processed per tap_send() callback to prevent | |
213 | * stalling the guest. | |
214 | */ | |
215 | packets++; | |
216 | if (packets >= 50) { | |
217 | break; | |
218 | } | |
68e5ec64 | 219 | } |
5281d757 MM |
220 | } |
221 | ||
3bac80d3 | 222 | static bool tap_has_ufo(NetClientState *nc) |
5281d757 | 223 | { |
3e35ba93 | 224 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
5281d757 | 225 | |
2be64a68 | 226 | assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); |
5281d757 MM |
227 | |
228 | return s->has_ufo; | |
229 | } | |
230 | ||
3bac80d3 | 231 | static bool tap_has_vnet_hdr(NetClientState *nc) |
5281d757 | 232 | { |
3e35ba93 | 233 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
5281d757 | 234 | |
2be64a68 | 235 | assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); |
5281d757 | 236 | |
ef4252b1 | 237 | return !!s->host_vnet_hdr_len; |
5281d757 MM |
238 | } |
239 | ||
3bac80d3 | 240 | static bool tap_has_vnet_hdr_len(NetClientState *nc, int len) |
445d892f MT |
241 | { |
242 | TAPState *s = DO_UPCAST(TAPState, nc, nc); | |
243 | ||
2be64a68 | 244 | assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); |
445d892f | 245 | |
e96dfd11 | 246 | return !!tap_probe_vnet_hdr_len(s->fd, len); |
445d892f MT |
247 | } |
248 | ||
3bac80d3 | 249 | static void tap_set_vnet_hdr_len(NetClientState *nc, int len) |
445d892f MT |
250 | { |
251 | TAPState *s = DO_UPCAST(TAPState, nc, nc); | |
252 | ||
2be64a68 | 253 | assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); |
445d892f MT |
254 | assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) || |
255 | len == sizeof(struct virtio_net_hdr)); | |
256 | ||
257 | tap_fd_set_vnet_hdr_len(s->fd, len); | |
258 | s->host_vnet_hdr_len = len; | |
259 | } | |
260 | ||
3bac80d3 | 261 | static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr) |
5281d757 | 262 | { |
3e35ba93 | 263 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
5281d757 | 264 | |
2be64a68 | 265 | assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); |
ef4252b1 | 266 | assert(!!s->host_vnet_hdr_len == using_vnet_hdr); |
5281d757 MM |
267 | |
268 | s->using_vnet_hdr = using_vnet_hdr; | |
269 | } | |
270 | ||
c80cd6bb GK |
271 | static int tap_set_vnet_le(NetClientState *nc, bool is_le) |
272 | { | |
273 | TAPState *s = DO_UPCAST(TAPState, nc, nc); | |
274 | ||
275 | return tap_fd_set_vnet_le(s->fd, is_le); | |
276 | } | |
277 | ||
278 | static int tap_set_vnet_be(NetClientState *nc, bool is_be) | |
279 | { | |
280 | TAPState *s = DO_UPCAST(TAPState, nc, nc); | |
281 | ||
282 | return tap_fd_set_vnet_be(s->fd, is_be); | |
283 | } | |
284 | ||
3bac80d3 | 285 | static void tap_set_offload(NetClientState *nc, int csum, int tso4, |
5281d757 MM |
286 | int tso6, int ecn, int ufo) |
287 | { | |
3e35ba93 | 288 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
27a6375d MT |
289 | if (s->fd < 0) { |
290 | return; | |
291 | } | |
5281d757 | 292 | |
27a6375d | 293 | tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo); |
5281d757 MM |
294 | } |
295 | ||
9e32ff32 MAL |
296 | static void tap_exit_notify(Notifier *notifier, void *data) |
297 | { | |
298 | TAPState *s = container_of(notifier, TAPState, exit); | |
299 | Error *err = NULL; | |
300 | ||
301 | if (s->down_script[0]) { | |
302 | launch_script(s->down_script, s->down_script_arg, s->fd, &err); | |
303 | if (err) { | |
304 | error_report_err(err); | |
305 | } | |
306 | } | |
307 | } | |
308 | ||
4e68f7a0 | 309 | static void tap_cleanup(NetClientState *nc) |
5281d757 | 310 | { |
3e35ba93 | 311 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
5281d757 | 312 | |
82b0d80e MT |
313 | if (s->vhost_net) { |
314 | vhost_net_cleanup(s->vhost_net); | |
43849424 | 315 | s->vhost_net = NULL; |
82b0d80e MT |
316 | } |
317 | ||
3e35ba93 | 318 | qemu_purge_queued_packets(nc); |
5281d757 | 319 | |
9e32ff32 MAL |
320 | tap_exit_notify(&s->exit, NULL); |
321 | qemu_remove_exit_notifier(&s->exit); | |
5281d757 | 322 | |
ec45f083 JW |
323 | tap_read_poll(s, false); |
324 | tap_write_poll(s, false); | |
5281d757 | 325 | close(s->fd); |
27a6375d | 326 | s->fd = -1; |
5281d757 MM |
327 | } |
328 | ||
4e68f7a0 | 329 | static void tap_poll(NetClientState *nc, bool enable) |
ceb69615 MT |
330 | { |
331 | TAPState *s = DO_UPCAST(TAPState, nc, nc); | |
332 | tap_read_poll(s, enable); | |
333 | tap_write_poll(s, enable); | |
334 | } | |
335 | ||
4e68f7a0 | 336 | int tap_get_fd(NetClientState *nc) |
95d528a2 MT |
337 | { |
338 | TAPState *s = DO_UPCAST(TAPState, nc, nc); | |
2be64a68 | 339 | assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); |
95d528a2 MT |
340 | return s->fd; |
341 | } | |
342 | ||
5281d757 MM |
343 | /* fd support */ |
344 | ||
3e35ba93 | 345 | static NetClientInfo net_tap_info = { |
2be64a68 | 346 | .type = NET_CLIENT_OPTIONS_KIND_TAP, |
3e35ba93 MM |
347 | .size = sizeof(TAPState), |
348 | .receive = tap_receive, | |
349 | .receive_raw = tap_receive_raw, | |
350 | .receive_iov = tap_receive_iov, | |
ceb69615 | 351 | .poll = tap_poll, |
3e35ba93 | 352 | .cleanup = tap_cleanup, |
2e753bcc VM |
353 | .has_ufo = tap_has_ufo, |
354 | .has_vnet_hdr = tap_has_vnet_hdr, | |
355 | .has_vnet_hdr_len = tap_has_vnet_hdr_len, | |
356 | .using_vnet_hdr = tap_using_vnet_hdr, | |
357 | .set_offload = tap_set_offload, | |
358 | .set_vnet_hdr_len = tap_set_vnet_hdr_len, | |
c80cd6bb GK |
359 | .set_vnet_le = tap_set_vnet_le, |
360 | .set_vnet_be = tap_set_vnet_be, | |
3e35ba93 MM |
361 | }; |
362 | ||
4e68f7a0 | 363 | static TAPState *net_tap_fd_init(NetClientState *peer, |
5281d757 MM |
364 | const char *model, |
365 | const char *name, | |
366 | int fd, | |
367 | int vnet_hdr) | |
368 | { | |
4e68f7a0 | 369 | NetClientState *nc; |
5281d757 | 370 | TAPState *s; |
5281d757 | 371 | |
ab5f3f84 | 372 | nc = qemu_new_net_client(&net_tap_info, peer, model, name); |
3e35ba93 MM |
373 | |
374 | s = DO_UPCAST(TAPState, nc, nc); | |
375 | ||
5281d757 | 376 | s->fd = fd; |
ef4252b1 | 377 | s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0; |
ec45f083 | 378 | s->using_vnet_hdr = false; |
9c282718 | 379 | s->has_ufo = tap_probe_has_ufo(s->fd); |
16dbaf90 | 380 | s->enabled = true; |
3e35ba93 | 381 | tap_set_offload(&s->nc, 0, 0, 0, 0, 0); |
58ddcd50 MT |
382 | /* |
383 | * Make sure host header length is set correctly in tap: | |
384 | * it might have been modified by another instance of qemu. | |
385 | */ | |
386 | if (tap_probe_vnet_hdr_len(s->fd, s->host_vnet_hdr_len)) { | |
387 | tap_fd_set_vnet_hdr_len(s->fd, s->host_vnet_hdr_len); | |
388 | } | |
ec45f083 | 389 | tap_read_poll(s, true); |
82b0d80e | 390 | s->vhost_net = NULL; |
9e32ff32 MAL |
391 | |
392 | s->exit.notify = tap_exit_notify; | |
393 | qemu_add_exit_notifier(&s->exit); | |
394 | ||
5281d757 MM |
395 | return s; |
396 | } | |
397 | ||
ac4fcf56 MA |
398 | static void launch_script(const char *setup_script, const char *ifname, |
399 | int fd, Error **errp) | |
5281d757 | 400 | { |
5281d757 MM |
401 | int pid, status; |
402 | char *args[3]; | |
403 | char **parg; | |
404 | ||
5281d757 MM |
405 | /* try to launch network script */ |
406 | pid = fork(); | |
ac4fcf56 MA |
407 | if (pid < 0) { |
408 | error_setg_errno(errp, errno, "could not launch network script %s", | |
409 | setup_script); | |
410 | return; | |
411 | } | |
5281d757 MM |
412 | if (pid == 0) { |
413 | int open_max = sysconf(_SC_OPEN_MAX), i; | |
414 | ||
13a12f86 PG |
415 | for (i = 3; i < open_max; i++) { |
416 | if (i != fd) { | |
5281d757 MM |
417 | close(i); |
418 | } | |
419 | } | |
420 | parg = args; | |
421 | *parg++ = (char *)setup_script; | |
422 | *parg++ = (char *)ifname; | |
9678d950 | 423 | *parg = NULL; |
5281d757 MM |
424 | execv(setup_script, args); |
425 | _exit(1); | |
ac4fcf56 | 426 | } else { |
5281d757 MM |
427 | while (waitpid(pid, &status, 0) != pid) { |
428 | /* loop */ | |
429 | } | |
5281d757 MM |
430 | |
431 | if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { | |
ac4fcf56 | 432 | return; |
5281d757 | 433 | } |
ac4fcf56 MA |
434 | error_setg(errp, "network script %s failed with status %d", |
435 | setup_script, status); | |
5281d757 | 436 | } |
5281d757 MM |
437 | } |
438 | ||
a7c36ee4 CB |
439 | static int recv_fd(int c) |
440 | { | |
441 | int fd; | |
442 | uint8_t msgbuf[CMSG_SPACE(sizeof(fd))]; | |
443 | struct msghdr msg = { | |
444 | .msg_control = msgbuf, | |
445 | .msg_controllen = sizeof(msgbuf), | |
446 | }; | |
447 | struct cmsghdr *cmsg; | |
448 | struct iovec iov; | |
449 | uint8_t req[1]; | |
450 | ssize_t len; | |
451 | ||
452 | cmsg = CMSG_FIRSTHDR(&msg); | |
453 | cmsg->cmsg_level = SOL_SOCKET; | |
454 | cmsg->cmsg_type = SCM_RIGHTS; | |
455 | cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); | |
456 | msg.msg_controllen = cmsg->cmsg_len; | |
457 | ||
458 | iov.iov_base = req; | |
459 | iov.iov_len = sizeof(req); | |
460 | ||
461 | msg.msg_iov = &iov; | |
462 | msg.msg_iovlen = 1; | |
463 | ||
464 | len = recvmsg(c, &msg, 0); | |
465 | if (len > 0) { | |
466 | memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd)); | |
467 | return fd; | |
468 | } | |
469 | ||
470 | return len; | |
471 | } | |
472 | ||
a8a21be9 MA |
473 | static int net_bridge_run_helper(const char *helper, const char *bridge, |
474 | Error **errp) | |
a7c36ee4 CB |
475 | { |
476 | sigset_t oldmask, mask; | |
477 | int pid, status; | |
478 | char *args[5]; | |
479 | char **parg; | |
480 | int sv[2]; | |
481 | ||
482 | sigemptyset(&mask); | |
483 | sigaddset(&mask, SIGCHLD); | |
484 | sigprocmask(SIG_BLOCK, &mask, &oldmask); | |
485 | ||
486 | if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { | |
a8a21be9 | 487 | error_setg_errno(errp, errno, "socketpair() failed"); |
a7c36ee4 CB |
488 | return -1; |
489 | } | |
490 | ||
491 | /* try to launch bridge helper */ | |
492 | pid = fork(); | |
a8a21be9 MA |
493 | if (pid < 0) { |
494 | error_setg_errno(errp, errno, "Can't fork bridge helper"); | |
495 | return -1; | |
496 | } | |
a7c36ee4 CB |
497 | if (pid == 0) { |
498 | int open_max = sysconf(_SC_OPEN_MAX), i; | |
499 | char fd_buf[6+10]; | |
500 | char br_buf[6+IFNAMSIZ] = {0}; | |
501 | char helper_cmd[PATH_MAX + sizeof(fd_buf) + sizeof(br_buf) + 15]; | |
502 | ||
13a12f86 PG |
503 | for (i = 3; i < open_max; i++) { |
504 | if (i != sv[1]) { | |
a7c36ee4 CB |
505 | close(i); |
506 | } | |
507 | } | |
508 | ||
509 | snprintf(fd_buf, sizeof(fd_buf), "%s%d", "--fd=", sv[1]); | |
510 | ||
511 | if (strrchr(helper, ' ') || strrchr(helper, '\t')) { | |
512 | /* assume helper is a command */ | |
513 | ||
514 | if (strstr(helper, "--br=") == NULL) { | |
515 | snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge); | |
516 | } | |
517 | ||
518 | snprintf(helper_cmd, sizeof(helper_cmd), "%s %s %s %s", | |
519 | helper, "--use-vnet", fd_buf, br_buf); | |
520 | ||
521 | parg = args; | |
522 | *parg++ = (char *)"sh"; | |
523 | *parg++ = (char *)"-c"; | |
524 | *parg++ = helper_cmd; | |
525 | *parg++ = NULL; | |
526 | ||
527 | execv("/bin/sh", args); | |
528 | } else { | |
529 | /* assume helper is just the executable path name */ | |
530 | ||
531 | snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge); | |
532 | ||
533 | parg = args; | |
534 | *parg++ = (char *)helper; | |
535 | *parg++ = (char *)"--use-vnet"; | |
536 | *parg++ = fd_buf; | |
537 | *parg++ = br_buf; | |
538 | *parg++ = NULL; | |
539 | ||
540 | execv(helper, args); | |
541 | } | |
542 | _exit(1); | |
543 | ||
a8a21be9 | 544 | } else { |
a7c36ee4 | 545 | int fd; |
a8a21be9 | 546 | int saved_errno; |
a7c36ee4 CB |
547 | |
548 | close(sv[1]); | |
549 | ||
550 | do { | |
551 | fd = recv_fd(sv[0]); | |
552 | } while (fd == -1 && errno == EINTR); | |
a8a21be9 | 553 | saved_errno = errno; |
a7c36ee4 CB |
554 | |
555 | close(sv[0]); | |
556 | ||
557 | while (waitpid(pid, &status, 0) != pid) { | |
558 | /* loop */ | |
559 | } | |
560 | sigprocmask(SIG_SETMASK, &oldmask, NULL); | |
561 | if (fd < 0) { | |
a8a21be9 MA |
562 | error_setg_errno(errp, saved_errno, |
563 | "failed to recv file descriptor"); | |
a7c36ee4 CB |
564 | return -1; |
565 | } | |
a8a21be9 MA |
566 | if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { |
567 | error_setg(errp, "bridge helper failed"); | |
568 | return -1; | |
a7c36ee4 | 569 | } |
a8a21be9 | 570 | return fd; |
a7c36ee4 | 571 | } |
a7c36ee4 CB |
572 | } |
573 | ||
1a0c0958 | 574 | int net_init_bridge(const NetClientOptions *opts, const char *name, |
a30ecde6 | 575 | NetClientState *peer, Error **errp) |
a7c36ee4 | 576 | { |
f79b51b0 LE |
577 | const NetdevBridgeOptions *bridge; |
578 | const char *helper, *br; | |
a7c36ee4 CB |
579 | TAPState *s; |
580 | int fd, vnet_hdr; | |
581 | ||
8d0bcba8 | 582 | assert(opts->type == NET_CLIENT_OPTIONS_KIND_BRIDGE); |
32bafa8f | 583 | bridge = opts->u.bridge.data; |
f79b51b0 LE |
584 | |
585 | helper = bridge->has_helper ? bridge->helper : DEFAULT_BRIDGE_HELPER; | |
586 | br = bridge->has_br ? bridge->br : DEFAULT_BRIDGE_INTERFACE; | |
a7c36ee4 | 587 | |
a8a21be9 | 588 | fd = net_bridge_run_helper(helper, br, errp); |
a7c36ee4 CB |
589 | if (fd == -1) { |
590 | return -1; | |
591 | } | |
592 | ||
593 | fcntl(fd, F_SETFL, O_NONBLOCK); | |
a7c36ee4 | 594 | vnet_hdr = tap_probe_vnet_hdr(fd); |
d33d93b2 | 595 | s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr); |
a7c36ee4 | 596 | |
f79b51b0 LE |
597 | snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper, |
598 | br); | |
a7c36ee4 CB |
599 | |
600 | return 0; | |
601 | } | |
602 | ||
08c573a8 LE |
603 | static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr, |
604 | const char *setup_script, char *ifname, | |
468dd824 | 605 | size_t ifname_sz, int mq_required, Error **errp) |
5281d757 | 606 | { |
ac4fcf56 | 607 | Error *err = NULL; |
5281d757 | 608 | int fd, vnet_hdr_required; |
5281d757 | 609 | |
08c573a8 LE |
610 | if (tap->has_vnet_hdr) { |
611 | *vnet_hdr = tap->vnet_hdr; | |
5281d757 MM |
612 | vnet_hdr_required = *vnet_hdr; |
613 | } else { | |
08c573a8 | 614 | *vnet_hdr = 1; |
5281d757 MM |
615 | vnet_hdr_required = 0; |
616 | } | |
617 | ||
264986e2 | 618 | TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required, |
468dd824 | 619 | mq_required, errp)); |
5281d757 MM |
620 | if (fd < 0) { |
621 | return -1; | |
622 | } | |
623 | ||
5281d757 MM |
624 | if (setup_script && |
625 | setup_script[0] != '\0' && | |
ac4fcf56 MA |
626 | strcmp(setup_script, "no") != 0) { |
627 | launch_script(setup_script, ifname, fd, &err); | |
628 | if (err) { | |
468dd824 | 629 | error_propagate(errp, err); |
ac4fcf56 MA |
630 | close(fd); |
631 | return -1; | |
632 | } | |
5281d757 MM |
633 | } |
634 | ||
5281d757 MM |
635 | return fd; |
636 | } | |
637 | ||
264986e2 JW |
638 | #define MAX_TAP_QUEUES 1024 |
639 | ||
445f116c MA |
640 | static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, |
641 | const char *model, const char *name, | |
642 | const char *ifname, const char *script, | |
643 | const char *downscript, const char *vhostfdname, | |
644 | int vnet_hdr, int fd, Error **errp) | |
5193e5fb | 645 | { |
1677f4c6 | 646 | Error *err = NULL; |
da4a4eac | 647 | TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr); |
81647a65 | 648 | int vhostfd; |
5193e5fb | 649 | |
80b832c3 MA |
650 | tap_set_sndbuf(s->fd, tap, &err); |
651 | if (err) { | |
445f116c MA |
652 | error_propagate(errp, err); |
653 | return; | |
5193e5fb JW |
654 | } |
655 | ||
264986e2 | 656 | if (tap->has_fd || tap->has_fds) { |
5193e5fb JW |
657 | snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd); |
658 | } else if (tap->has_helper) { | |
659 | snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s", | |
660 | tap->helper); | |
661 | } else { | |
5193e5fb JW |
662 | snprintf(s->nc.info_str, sizeof(s->nc.info_str), |
663 | "ifname=%s,script=%s,downscript=%s", ifname, script, | |
664 | downscript); | |
665 | ||
666 | if (strcmp(downscript, "no") != 0) { | |
667 | snprintf(s->down_script, sizeof(s->down_script), "%s", downscript); | |
668 | snprintf(s->down_script_arg, sizeof(s->down_script_arg), | |
669 | "%s", ifname); | |
670 | } | |
671 | } | |
672 | ||
673 | if (tap->has_vhost ? tap->vhost : | |
674 | vhostfdname || (tap->has_vhostforce && tap->vhostforce)) { | |
81647a65 NN |
675 | VhostNetOptions options; |
676 | ||
1a1bfac9 | 677 | options.backend_type = VHOST_BACKEND_TYPE_KERNEL; |
81647a65 | 678 | options.net_backend = &s->nc; |
69e87b32 JW |
679 | if (tap->has_poll_us) { |
680 | options.busyloop_timeout = tap->poll_us; | |
681 | } else { | |
682 | options.busyloop_timeout = 0; | |
683 | } | |
5193e5fb | 684 | |
3a2d44f6 | 685 | if (vhostfdname) { |
1677f4c6 | 686 | vhostfd = monitor_fd_param(cur_mon, vhostfdname, &err); |
5193e5fb | 687 | if (vhostfd == -1) { |
445f116c MA |
688 | error_propagate(errp, err); |
689 | return; | |
5193e5fb JW |
690 | } |
691 | } else { | |
81647a65 NN |
692 | vhostfd = open("/dev/vhost-net", O_RDWR); |
693 | if (vhostfd < 0) { | |
445f116c MA |
694 | error_setg_errno(errp, errno, |
695 | "tap: open vhost char device failed"); | |
696 | return; | |
81647a65 | 697 | } |
5193e5fb | 698 | } |
81647a65 | 699 | options.opaque = (void *)(uintptr_t)vhostfd; |
5193e5fb | 700 | |
81647a65 | 701 | s->vhost_net = vhost_net_init(&options); |
5193e5fb | 702 | if (!s->vhost_net) { |
445f116c MA |
703 | error_setg(errp, |
704 | "vhost-net requested but could not be initialized"); | |
705 | return; | |
5193e5fb | 706 | } |
3a2d44f6 | 707 | } else if (vhostfdname) { |
69e87b32 | 708 | error_setg(errp, "vhostfd(s)= is not valid without vhost"); |
5193e5fb | 709 | } |
5193e5fb JW |
710 | } |
711 | ||
264986e2 JW |
712 | static int get_fds(char *str, char *fds[], int max) |
713 | { | |
714 | char *ptr = str, *this; | |
715 | size_t len = strlen(str); | |
716 | int i = 0; | |
717 | ||
718 | while (i < max && ptr < str + len) { | |
719 | this = strchr(ptr, ':'); | |
720 | ||
721 | if (this == NULL) { | |
722 | fds[i] = g_strdup(ptr); | |
723 | } else { | |
724 | fds[i] = g_strndup(ptr, this - ptr); | |
725 | } | |
726 | ||
727 | i++; | |
728 | if (this == NULL) { | |
729 | break; | |
730 | } else { | |
731 | ptr = this + 1; | |
732 | } | |
733 | } | |
734 | ||
735 | return i; | |
736 | } | |
737 | ||
1a0c0958 | 738 | int net_init_tap(const NetClientOptions *opts, const char *name, |
a30ecde6 | 739 | NetClientState *peer, Error **errp) |
5281d757 | 740 | { |
08c573a8 | 741 | const NetdevTapOptions *tap; |
264986e2 | 742 | int fd, vnet_hdr = 0, i = 0, queues; |
08c573a8 LE |
743 | /* for the no-fd, no-helper case */ |
744 | const char *script = NULL; /* suppress wrong "uninit'd use" gcc warning */ | |
5193e5fb | 745 | const char *downscript = NULL; |
1677f4c6 | 746 | Error *err = NULL; |
264986e2 | 747 | const char *vhostfdname; |
08c573a8 LE |
748 | char ifname[128]; |
749 | ||
8d0bcba8 | 750 | assert(opts->type == NET_CLIENT_OPTIONS_KIND_TAP); |
32bafa8f | 751 | tap = opts->u.tap.data; |
264986e2 JW |
752 | queues = tap->has_queues ? tap->queues : 1; |
753 | vhostfdname = tap->has_vhostfd ? tap->vhostfd : NULL; | |
5281d757 | 754 | |
ce675a75 JW |
755 | /* QEMU vlans does not support multiqueue tap, in this case peer is set. |
756 | * For -netdev, peer is always NULL. */ | |
757 | if (peer && (tap->has_queues || tap->has_fds || tap->has_vhostfds)) { | |
a3088177 | 758 | error_setg(errp, "Multiqueue tap cannot be used with QEMU vlans"); |
ce675a75 JW |
759 | return -1; |
760 | } | |
761 | ||
08c573a8 LE |
762 | if (tap->has_fd) { |
763 | if (tap->has_ifname || tap->has_script || tap->has_downscript || | |
264986e2 | 764 | tap->has_vnet_hdr || tap->has_helper || tap->has_queues || |
c87826a8 | 765 | tap->has_fds || tap->has_vhostfds) { |
a3088177 MA |
766 | error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, " |
767 | "helper=, queues=, fds=, and vhostfds= " | |
768 | "are invalid with fd="); | |
5281d757 MM |
769 | return -1; |
770 | } | |
771 | ||
1677f4c6 | 772 | fd = monitor_fd_param(cur_mon, tap->fd, &err); |
5281d757 | 773 | if (fd == -1) { |
a3088177 | 774 | error_propagate(errp, err); |
5281d757 MM |
775 | return -1; |
776 | } | |
777 | ||
778 | fcntl(fd, F_SETFL, O_NONBLOCK); | |
779 | ||
780 | vnet_hdr = tap_probe_vnet_hdr(fd); | |
a7c36ee4 | 781 | |
445f116c MA |
782 | net_init_tap_one(tap, peer, "tap", name, NULL, |
783 | script, downscript, | |
784 | vhostfdname, vnet_hdr, fd, &err); | |
785 | if (err) { | |
a3088177 | 786 | error_propagate(errp, err); |
264986e2 JW |
787 | return -1; |
788 | } | |
789 | } else if (tap->has_fds) { | |
11196e95 ZJ |
790 | char **fds = g_new(char *, MAX_TAP_QUEUES); |
791 | char **vhost_fds = g_new(char *, MAX_TAP_QUEUES); | |
264986e2 JW |
792 | int nfds, nvhosts; |
793 | ||
794 | if (tap->has_ifname || tap->has_script || tap->has_downscript || | |
795 | tap->has_vnet_hdr || tap->has_helper || tap->has_queues || | |
c87826a8 | 796 | tap->has_vhostfd) { |
a3088177 MA |
797 | error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, " |
798 | "helper=, queues=, and vhostfd= " | |
799 | "are invalid with fds="); | |
264986e2 JW |
800 | return -1; |
801 | } | |
802 | ||
803 | nfds = get_fds(tap->fds, fds, MAX_TAP_QUEUES); | |
804 | if (tap->has_vhostfds) { | |
805 | nvhosts = get_fds(tap->vhostfds, vhost_fds, MAX_TAP_QUEUES); | |
806 | if (nfds != nvhosts) { | |
a3088177 MA |
807 | error_setg(errp, "The number of fds passed does not match " |
808 | "the number of vhostfds passed"); | |
264986e2 JW |
809 | return -1; |
810 | } | |
811 | } | |
812 | ||
813 | for (i = 0; i < nfds; i++) { | |
1677f4c6 | 814 | fd = monitor_fd_param(cur_mon, fds[i], &err); |
264986e2 | 815 | if (fd == -1) { |
a3088177 | 816 | error_propagate(errp, err); |
264986e2 JW |
817 | return -1; |
818 | } | |
819 | ||
820 | fcntl(fd, F_SETFL, O_NONBLOCK); | |
a7c36ee4 | 821 | |
264986e2 JW |
822 | if (i == 0) { |
823 | vnet_hdr = tap_probe_vnet_hdr(fd); | |
824 | } else if (vnet_hdr != tap_probe_vnet_hdr(fd)) { | |
a3088177 MA |
825 | error_setg(errp, |
826 | "vnet_hdr not consistent across given tap fds"); | |
264986e2 JW |
827 | return -1; |
828 | } | |
829 | ||
445f116c MA |
830 | net_init_tap_one(tap, peer, "tap", name, ifname, |
831 | script, downscript, | |
832 | tap->has_vhostfds ? vhost_fds[i] : NULL, | |
833 | vnet_hdr, fd, &err); | |
834 | if (err) { | |
a3088177 | 835 | error_propagate(errp, err); |
264986e2 JW |
836 | return -1; |
837 | } | |
838 | } | |
11196e95 ZJ |
839 | g_free(fds); |
840 | g_free(vhost_fds); | |
08c573a8 LE |
841 | } else if (tap->has_helper) { |
842 | if (tap->has_ifname || tap->has_script || tap->has_downscript || | |
c87826a8 | 843 | tap->has_vnet_hdr || tap->has_queues || tap->has_vhostfds) { |
a3088177 MA |
844 | error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, " |
845 | "queues=, and vhostfds= are invalid with helper="); | |
a7c36ee4 CB |
846 | return -1; |
847 | } | |
848 | ||
a8a21be9 MA |
849 | fd = net_bridge_run_helper(tap->helper, DEFAULT_BRIDGE_INTERFACE, |
850 | errp); | |
a7c36ee4 CB |
851 | if (fd == -1) { |
852 | return -1; | |
853 | } | |
854 | ||
855 | fcntl(fd, F_SETFL, O_NONBLOCK); | |
a7c36ee4 CB |
856 | vnet_hdr = tap_probe_vnet_hdr(fd); |
857 | ||
445f116c MA |
858 | net_init_tap_one(tap, peer, "bridge", name, ifname, |
859 | script, downscript, vhostfdname, | |
860 | vnet_hdr, fd, &err); | |
861 | if (err) { | |
a3088177 | 862 | error_propagate(errp, err); |
84f8f3da | 863 | close(fd); |
264986e2 JW |
864 | return -1; |
865 | } | |
5281d757 | 866 | } else { |
c87826a8 | 867 | if (tap->has_vhostfds) { |
a3088177 | 868 | error_setg(errp, "vhostfds= is invalid if fds= wasn't specified"); |
c87826a8 JW |
869 | return -1; |
870 | } | |
08c573a8 | 871 | script = tap->has_script ? tap->script : DEFAULT_NETWORK_SCRIPT; |
5193e5fb JW |
872 | downscript = tap->has_downscript ? tap->downscript : |
873 | DEFAULT_NETWORK_DOWN_SCRIPT; | |
264986e2 JW |
874 | |
875 | if (tap->has_ifname) { | |
876 | pstrcpy(ifname, sizeof ifname, tap->ifname); | |
877 | } else { | |
878 | ifname[0] = '\0'; | |
929fe497 | 879 | } |
a7c36ee4 | 880 | |
264986e2 JW |
881 | for (i = 0; i < queues; i++) { |
882 | fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script, | |
a3088177 | 883 | ifname, sizeof ifname, queues > 1, errp); |
264986e2 JW |
884 | if (fd == -1) { |
885 | return -1; | |
886 | } | |
887 | ||
888 | if (queues > 1 && i == 0 && !tap->has_ifname) { | |
889 | if (tap_fd_get_ifname(fd, ifname)) { | |
a3088177 | 890 | error_setg(errp, "Fail to get ifname"); |
84f8f3da | 891 | close(fd); |
264986e2 JW |
892 | return -1; |
893 | } | |
894 | } | |
895 | ||
445f116c MA |
896 | net_init_tap_one(tap, peer, "tap", name, ifname, |
897 | i >= 1 ? "no" : script, | |
898 | i >= 1 ? "no" : downscript, | |
899 | vhostfdname, vnet_hdr, fd, &err); | |
900 | if (err) { | |
a3088177 | 901 | error_propagate(errp, err); |
84f8f3da | 902 | close(fd); |
264986e2 JW |
903 | return -1; |
904 | } | |
905 | } | |
5281d757 MM |
906 | } |
907 | ||
264986e2 | 908 | return 0; |
5281d757 | 909 | } |
b202554c | 910 | |
4e68f7a0 | 911 | VHostNetState *tap_get_vhost_net(NetClientState *nc) |
b202554c MT |
912 | { |
913 | TAPState *s = DO_UPCAST(TAPState, nc, nc); | |
2be64a68 | 914 | assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); |
b202554c MT |
915 | return s->vhost_net; |
916 | } | |
16dbaf90 JW |
917 | |
918 | int tap_enable(NetClientState *nc) | |
919 | { | |
920 | TAPState *s = DO_UPCAST(TAPState, nc, nc); | |
921 | int ret; | |
922 | ||
923 | if (s->enabled) { | |
924 | return 0; | |
925 | } else { | |
926 | ret = tap_fd_enable(s->fd); | |
927 | if (ret == 0) { | |
928 | s->enabled = true; | |
929 | tap_update_fd_handler(s); | |
930 | } | |
931 | return ret; | |
932 | } | |
933 | } | |
934 | ||
935 | int tap_disable(NetClientState *nc) | |
936 | { | |
937 | TAPState *s = DO_UPCAST(TAPState, nc, nc); | |
938 | int ret; | |
939 | ||
940 | if (s->enabled == 0) { | |
941 | return 0; | |
942 | } else { | |
943 | ret = tap_fd_disable(s->fd); | |
944 | if (ret == 0) { | |
945 | qemu_purge_queued_packets(nc); | |
946 | s->enabled = false; | |
947 | tap_update_fd_handler(s); | |
948 | } | |
949 | return ret; | |
950 | } | |
951 | } |