]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Vhost User Bridge | |
3 | * | |
4 | * Copyright (c) 2015 Red Hat, Inc. | |
5 | * | |
6 | * Authors: | |
7 | * Victor Kaplansky <[email protected]> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or | |
10 | * later. See the COPYING file in the top-level directory. | |
11 | */ | |
12 | ||
13 | /* | |
14 | * TODO: | |
15 | * - main should get parameters from the command line. | |
16 | * - implement all request handlers. Still not implemented: | |
17 | * vubr_get_queue_num_exec() | |
18 | * vubr_send_rarp_exec() | |
19 | * - test for broken requests and virtqueue. | |
20 | * - implement features defined by Virtio 1.0 spec. | |
21 | * - support mergeable buffers and indirect descriptors. | |
22 | * - implement clean shutdown. | |
23 | * - implement non-blocking writes to UDP backend. | |
24 | * - implement polling strategy. | |
25 | * - implement clean starting/stopping of vq processing | |
26 | * - implement clean starting/stopping of used and buffers | |
27 | * dirty page logging. | |
28 | */ | |
29 | ||
30 | #define _FILE_OFFSET_BITS 64 | |
31 | ||
32 | #include "qemu/osdep.h" | |
33 | #include <sys/socket.h> | |
34 | #include <sys/un.h> | |
35 | #include <sys/unistd.h> | |
36 | #include <sys/eventfd.h> | |
37 | #include <arpa/inet.h> | |
38 | #include <netdb.h> | |
39 | #include <linux/vhost.h> | |
40 | ||
41 | #include "qemu/atomic.h" | |
42 | #include "standard-headers/linux/virtio_net.h" | |
43 | #include "standard-headers/linux/virtio_ring.h" | |
44 | ||
45 | #define VHOST_USER_BRIDGE_DEBUG 1 | |
46 | ||
47 | #define DPRINT(...) \ | |
48 | do { \ | |
49 | if (VHOST_USER_BRIDGE_DEBUG) { \ | |
50 | printf(__VA_ARGS__); \ | |
51 | } \ | |
52 | } while (0) | |
53 | ||
54 | typedef void (*CallbackFunc)(int sock, void *ctx); | |
55 | ||
56 | typedef struct Event { | |
57 | void *ctx; | |
58 | CallbackFunc callback; | |
59 | } Event; | |
60 | ||
61 | typedef struct Dispatcher { | |
62 | int max_sock; | |
63 | fd_set fdset; | |
64 | Event events[FD_SETSIZE]; | |
65 | } Dispatcher; | |
66 | ||
67 | static void | |
68 | vubr_die(const char *s) | |
69 | { | |
70 | perror(s); | |
71 | exit(1); | |
72 | } | |
73 | ||
74 | static int | |
75 | dispatcher_init(Dispatcher *dispr) | |
76 | { | |
77 | FD_ZERO(&dispr->fdset); | |
78 | dispr->max_sock = -1; | |
79 | return 0; | |
80 | } | |
81 | ||
82 | static int | |
83 | dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb) | |
84 | { | |
85 | if (sock >= FD_SETSIZE) { | |
86 | fprintf(stderr, | |
87 | "Error: Failed to add new event. sock %d should be less than %d\n", | |
88 | sock, FD_SETSIZE); | |
89 | return -1; | |
90 | } | |
91 | ||
92 | dispr->events[sock].ctx = ctx; | |
93 | dispr->events[sock].callback = cb; | |
94 | ||
95 | FD_SET(sock, &dispr->fdset); | |
96 | if (sock > dispr->max_sock) { | |
97 | dispr->max_sock = sock; | |
98 | } | |
99 | DPRINT("Added sock %d for watching. max_sock: %d\n", | |
100 | sock, dispr->max_sock); | |
101 | return 0; | |
102 | } | |
103 | ||
104 | /* dispatcher_remove() is not currently in use but may be useful | |
105 | * in the future. */ | |
106 | static int | |
107 | dispatcher_remove(Dispatcher *dispr, int sock) | |
108 | { | |
109 | if (sock >= FD_SETSIZE) { | |
110 | fprintf(stderr, | |
111 | "Error: Failed to remove event. sock %d should be less than %d\n", | |
112 | sock, FD_SETSIZE); | |
113 | return -1; | |
114 | } | |
115 | ||
116 | FD_CLR(sock, &dispr->fdset); | |
117 | DPRINT("Sock %d removed from dispatcher watch.\n", sock); | |
118 | return 0; | |
119 | } | |
120 | ||
121 | /* timeout in us */ | |
122 | static int | |
123 | dispatcher_wait(Dispatcher *dispr, uint32_t timeout) | |
124 | { | |
125 | struct timeval tv; | |
126 | tv.tv_sec = timeout / 1000000; | |
127 | tv.tv_usec = timeout % 1000000; | |
128 | ||
129 | fd_set fdset = dispr->fdset; | |
130 | ||
131 | /* wait until some of sockets become readable. */ | |
132 | int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv); | |
133 | ||
134 | if (rc == -1) { | |
135 | vubr_die("select"); | |
136 | } | |
137 | ||
138 | /* Timeout */ | |
139 | if (rc == 0) { | |
140 | return 0; | |
141 | } | |
142 | ||
143 | /* Now call callback for every ready socket. */ | |
144 | ||
145 | int sock; | |
146 | for (sock = 0; sock < dispr->max_sock + 1; sock++) { | |
147 | /* The callback on a socket can remove other sockets from the | |
148 | * dispatcher, thus we have to check that the socket is | |
149 | * still not removed from dispatcher's list | |
150 | */ | |
151 | if (FD_ISSET(sock, &fdset) && FD_ISSET(sock, &dispr->fdset)) { | |
152 | Event *e = &dispr->events[sock]; | |
153 | e->callback(sock, e->ctx); | |
154 | } | |
155 | } | |
156 | ||
157 | return 0; | |
158 | } | |
159 | ||
160 | typedef struct VubrVirtq { | |
161 | int call_fd; | |
162 | int kick_fd; | |
163 | uint32_t size; | |
164 | uint16_t last_avail_index; | |
165 | uint16_t last_used_index; | |
166 | struct vring_desc *desc; | |
167 | struct vring_avail *avail; | |
168 | struct vring_used *used; | |
169 | uint64_t log_guest_addr; | |
170 | int enable; | |
171 | } VubrVirtq; | |
172 | ||
173 | /* Based on qemu/hw/virtio/vhost-user.c */ | |
174 | ||
175 | #define VHOST_MEMORY_MAX_NREGIONS 8 | |
176 | #define VHOST_USER_F_PROTOCOL_FEATURES 30 | |
177 | /* v1.0 compliant. */ | |
178 | #define VIRTIO_F_VERSION_1 32 | |
179 | ||
180 | #define VHOST_LOG_PAGE 4096 | |
181 | ||
182 | enum VhostUserProtocolFeature { | |
183 | VHOST_USER_PROTOCOL_F_MQ = 0, | |
184 | VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, | |
185 | VHOST_USER_PROTOCOL_F_RARP = 2, | |
186 | ||
187 | VHOST_USER_PROTOCOL_F_MAX | |
188 | }; | |
189 | ||
190 | #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) | |
191 | ||
192 | typedef enum VhostUserRequest { | |
193 | VHOST_USER_NONE = 0, | |
194 | VHOST_USER_GET_FEATURES = 1, | |
195 | VHOST_USER_SET_FEATURES = 2, | |
196 | VHOST_USER_SET_OWNER = 3, | |
197 | VHOST_USER_RESET_OWNER = 4, | |
198 | VHOST_USER_SET_MEM_TABLE = 5, | |
199 | VHOST_USER_SET_LOG_BASE = 6, | |
200 | VHOST_USER_SET_LOG_FD = 7, | |
201 | VHOST_USER_SET_VRING_NUM = 8, | |
202 | VHOST_USER_SET_VRING_ADDR = 9, | |
203 | VHOST_USER_SET_VRING_BASE = 10, | |
204 | VHOST_USER_GET_VRING_BASE = 11, | |
205 | VHOST_USER_SET_VRING_KICK = 12, | |
206 | VHOST_USER_SET_VRING_CALL = 13, | |
207 | VHOST_USER_SET_VRING_ERR = 14, | |
208 | VHOST_USER_GET_PROTOCOL_FEATURES = 15, | |
209 | VHOST_USER_SET_PROTOCOL_FEATURES = 16, | |
210 | VHOST_USER_GET_QUEUE_NUM = 17, | |
211 | VHOST_USER_SET_VRING_ENABLE = 18, | |
212 | VHOST_USER_SEND_RARP = 19, | |
213 | VHOST_USER_MAX | |
214 | } VhostUserRequest; | |
215 | ||
216 | typedef struct VhostUserMemoryRegion { | |
217 | uint64_t guest_phys_addr; | |
218 | uint64_t memory_size; | |
219 | uint64_t userspace_addr; | |
220 | uint64_t mmap_offset; | |
221 | } VhostUserMemoryRegion; | |
222 | ||
223 | typedef struct VhostUserMemory { | |
224 | uint32_t nregions; | |
225 | uint32_t padding; | |
226 | VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; | |
227 | } VhostUserMemory; | |
228 | ||
229 | typedef struct VhostUserLog { | |
230 | uint64_t mmap_size; | |
231 | uint64_t mmap_offset; | |
232 | } VhostUserLog; | |
233 | ||
234 | typedef struct VhostUserMsg { | |
235 | VhostUserRequest request; | |
236 | ||
237 | #define VHOST_USER_VERSION_MASK (0x3) | |
238 | #define VHOST_USER_REPLY_MASK (0x1<<2) | |
239 | uint32_t flags; | |
240 | uint32_t size; /* the following payload size */ | |
241 | union { | |
242 | #define VHOST_USER_VRING_IDX_MASK (0xff) | |
243 | #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) | |
244 | uint64_t u64; | |
245 | struct vhost_vring_state state; | |
246 | struct vhost_vring_addr addr; | |
247 | VhostUserMemory memory; | |
248 | VhostUserLog log; | |
249 | } payload; | |
250 | int fds[VHOST_MEMORY_MAX_NREGIONS]; | |
251 | int fd_num; | |
252 | } QEMU_PACKED VhostUserMsg; | |
253 | ||
254 | #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64) | |
255 | ||
256 | /* The version of the protocol we support */ | |
257 | #define VHOST_USER_VERSION (0x1) | |
258 | ||
259 | #define MAX_NR_VIRTQUEUE (8) | |
260 | ||
261 | typedef struct VubrDevRegion { | |
262 | /* Guest Physical address. */ | |
263 | uint64_t gpa; | |
264 | /* Memory region size. */ | |
265 | uint64_t size; | |
266 | /* QEMU virtual address (userspace). */ | |
267 | uint64_t qva; | |
268 | /* Starting offset in our mmaped space. */ | |
269 | uint64_t mmap_offset; | |
270 | /* Start address of mmaped space. */ | |
271 | uint64_t mmap_addr; | |
272 | } VubrDevRegion; | |
273 | ||
274 | typedef struct VubrDev { | |
275 | int sock; | |
276 | Dispatcher dispatcher; | |
277 | uint32_t nregions; | |
278 | VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS]; | |
279 | VubrVirtq vq[MAX_NR_VIRTQUEUE]; | |
280 | int log_call_fd; | |
281 | uint64_t log_size; | |
282 | uint8_t *log_table; | |
283 | int backend_udp_sock; | |
284 | struct sockaddr_in backend_udp_dest; | |
285 | int ready; | |
286 | uint64_t features; | |
287 | int hdrlen; | |
288 | } VubrDev; | |
289 | ||
290 | static const char *vubr_request_str[] = { | |
291 | [VHOST_USER_NONE] = "VHOST_USER_NONE", | |
292 | [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", | |
293 | [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES", | |
294 | [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER", | |
295 | [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER", | |
296 | [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE", | |
297 | [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE", | |
298 | [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD", | |
299 | [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM", | |
300 | [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR", | |
301 | [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE", | |
302 | [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE", | |
303 | [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", | |
304 | [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL", | |
305 | [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR", | |
306 | [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES", | |
307 | [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES", | |
308 | [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM", | |
309 | [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", | |
310 | [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP", | |
311 | [VHOST_USER_MAX] = "VHOST_USER_MAX", | |
312 | }; | |
313 | ||
314 | static void | |
315 | print_buffer(uint8_t *buf, size_t len) | |
316 | { | |
317 | int i; | |
318 | printf("Raw buffer:\n"); | |
319 | for (i = 0; i < len; i++) { | |
320 | if (i % 16 == 0) { | |
321 | printf("\n"); | |
322 | } | |
323 | if (i % 4 == 0) { | |
324 | printf(" "); | |
325 | } | |
326 | printf("%02x ", buf[i]); | |
327 | } | |
328 | printf("\n............................................................\n"); | |
329 | } | |
330 | ||
331 | /* Translate guest physical address to our virtual address. */ | |
332 | static uint64_t | |
333 | gpa_to_va(VubrDev *dev, uint64_t guest_addr) | |
334 | { | |
335 | int i; | |
336 | ||
337 | /* Find matching memory region. */ | |
338 | for (i = 0; i < dev->nregions; i++) { | |
339 | VubrDevRegion *r = &dev->regions[i]; | |
340 | ||
341 | if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) { | |
342 | return guest_addr - r->gpa + r->mmap_addr + r->mmap_offset; | |
343 | } | |
344 | } | |
345 | ||
346 | assert(!"address not found in regions"); | |
347 | return 0; | |
348 | } | |
349 | ||
350 | /* Translate qemu virtual address to our virtual address. */ | |
351 | static uint64_t | |
352 | qva_to_va(VubrDev *dev, uint64_t qemu_addr) | |
353 | { | |
354 | int i; | |
355 | ||
356 | /* Find matching memory region. */ | |
357 | for (i = 0; i < dev->nregions; i++) { | |
358 | VubrDevRegion *r = &dev->regions[i]; | |
359 | ||
360 | if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) { | |
361 | return qemu_addr - r->qva + r->mmap_addr + r->mmap_offset; | |
362 | } | |
363 | } | |
364 | ||
365 | assert(!"address not found in regions"); | |
366 | return 0; | |
367 | } | |
368 | ||
369 | static void | |
370 | vubr_message_read(int conn_fd, VhostUserMsg *vmsg) | |
371 | { | |
372 | char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { }; | |
373 | struct iovec iov = { | |
374 | .iov_base = (char *)vmsg, | |
375 | .iov_len = VHOST_USER_HDR_SIZE, | |
376 | }; | |
377 | struct msghdr msg = { | |
378 | .msg_iov = &iov, | |
379 | .msg_iovlen = 1, | |
380 | .msg_control = control, | |
381 | .msg_controllen = sizeof(control), | |
382 | }; | |
383 | size_t fd_size; | |
384 | struct cmsghdr *cmsg; | |
385 | int rc; | |
386 | ||
387 | rc = recvmsg(conn_fd, &msg, 0); | |
388 | ||
389 | if (rc == 0) { | |
390 | vubr_die("recvmsg"); | |
391 | fprintf(stderr, "Peer disconnected.\n"); | |
392 | exit(1); | |
393 | } | |
394 | if (rc < 0) { | |
395 | vubr_die("recvmsg"); | |
396 | } | |
397 | ||
398 | vmsg->fd_num = 0; | |
399 | for (cmsg = CMSG_FIRSTHDR(&msg); | |
400 | cmsg != NULL; | |
401 | cmsg = CMSG_NXTHDR(&msg, cmsg)) | |
402 | { | |
403 | if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { | |
404 | fd_size = cmsg->cmsg_len - CMSG_LEN(0); | |
405 | vmsg->fd_num = fd_size / sizeof(int); | |
406 | memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size); | |
407 | break; | |
408 | } | |
409 | } | |
410 | ||
411 | if (vmsg->size > sizeof(vmsg->payload)) { | |
412 | fprintf(stderr, | |
413 | "Error: too big message request: %d, size: vmsg->size: %u, " | |
414 | "while sizeof(vmsg->payload) = %zu\n", | |
415 | vmsg->request, vmsg->size, sizeof(vmsg->payload)); | |
416 | exit(1); | |
417 | } | |
418 | ||
419 | if (vmsg->size) { | |
420 | rc = read(conn_fd, &vmsg->payload, vmsg->size); | |
421 | if (rc == 0) { | |
422 | vubr_die("recvmsg"); | |
423 | fprintf(stderr, "Peer disconnected.\n"); | |
424 | exit(1); | |
425 | } | |
426 | if (rc < 0) { | |
427 | vubr_die("recvmsg"); | |
428 | } | |
429 | ||
430 | assert(rc == vmsg->size); | |
431 | } | |
432 | } | |
433 | ||
434 | static void | |
435 | vubr_message_write(int conn_fd, VhostUserMsg *vmsg) | |
436 | { | |
437 | int rc; | |
438 | ||
439 | do { | |
440 | rc = write(conn_fd, vmsg, VHOST_USER_HDR_SIZE + vmsg->size); | |
441 | } while (rc < 0 && errno == EINTR); | |
442 | ||
443 | if (rc < 0) { | |
444 | vubr_die("write"); | |
445 | } | |
446 | } | |
447 | ||
448 | static void | |
449 | vubr_backend_udp_sendbuf(VubrDev *dev, uint8_t *buf, size_t len) | |
450 | { | |
451 | int slen = sizeof(struct sockaddr_in); | |
452 | ||
453 | if (sendto(dev->backend_udp_sock, buf, len, 0, | |
454 | (struct sockaddr *) &dev->backend_udp_dest, slen) == -1) { | |
455 | vubr_die("sendto()"); | |
456 | } | |
457 | } | |
458 | ||
459 | static int | |
460 | vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen) | |
461 | { | |
462 | int slen = sizeof(struct sockaddr_in); | |
463 | int rc; | |
464 | ||
465 | rc = recvfrom(dev->backend_udp_sock, buf, buflen, 0, | |
466 | (struct sockaddr *) &dev->backend_udp_dest, | |
467 | (socklen_t *)&slen); | |
468 | if (rc == -1) { | |
469 | vubr_die("recvfrom()"); | |
470 | } | |
471 | ||
472 | return rc; | |
473 | } | |
474 | ||
475 | static void | |
476 | vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len) | |
477 | { | |
478 | int hdrlen = dev->hdrlen; | |
479 | DPRINT(" hdrlen = %d\n", dev->hdrlen); | |
480 | ||
481 | if (VHOST_USER_BRIDGE_DEBUG) { | |
482 | print_buffer(buf, len); | |
483 | } | |
484 | vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen); | |
485 | } | |
486 | ||
487 | /* Kick the log_call_fd if required. */ | |
488 | static void | |
489 | vubr_log_kick(VubrDev *dev) | |
490 | { | |
491 | if (dev->log_call_fd != -1) { | |
492 | DPRINT("Kicking the QEMU's log...\n"); | |
493 | eventfd_write(dev->log_call_fd, 1); | |
494 | } | |
495 | } | |
496 | ||
497 | /* Kick the guest if necessary. */ | |
498 | static void | |
499 | vubr_virtqueue_kick(VubrVirtq *vq) | |
500 | { | |
501 | if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { | |
502 | DPRINT("Kicking the guest...\n"); | |
503 | eventfd_write(vq->call_fd, 1); | |
504 | } | |
505 | } | |
506 | ||
507 | static void | |
508 | vubr_log_page(uint8_t *log_table, uint64_t page) | |
509 | { | |
510 | DPRINT("Logged dirty guest page: %"PRId64"\n", page); | |
511 | atomic_or(&log_table[page / 8], 1 << (page % 8)); | |
512 | } | |
513 | ||
514 | static void | |
515 | vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length) | |
516 | { | |
517 | uint64_t page; | |
518 | ||
519 | if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) || | |
520 | !dev->log_table || !length) { | |
521 | return; | |
522 | } | |
523 | ||
524 | assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8)); | |
525 | ||
526 | page = address / VHOST_LOG_PAGE; | |
527 | while (page * VHOST_LOG_PAGE < address + length) { | |
528 | vubr_log_page(dev->log_table, page); | |
529 | page += VHOST_LOG_PAGE; | |
530 | } | |
531 | vubr_log_kick(dev); | |
532 | } | |
533 | ||
534 | static void | |
535 | vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) | |
536 | { | |
537 | struct vring_desc *desc = vq->desc; | |
538 | struct vring_avail *avail = vq->avail; | |
539 | struct vring_used *used = vq->used; | |
540 | uint64_t log_guest_addr = vq->log_guest_addr; | |
541 | int32_t remaining_len = len; | |
542 | ||
543 | unsigned int size = vq->size; | |
544 | ||
545 | uint16_t avail_index = atomic_mb_read(&avail->idx); | |
546 | ||
547 | /* We check the available descriptors before posting the | |
548 | * buffer, so here we assume that enough available | |
549 | * descriptors. */ | |
550 | assert(vq->last_avail_index != avail_index); | |
551 | uint16_t a_index = vq->last_avail_index % size; | |
552 | uint16_t u_index = vq->last_used_index % size; | |
553 | uint16_t d_index = avail->ring[a_index]; | |
554 | ||
555 | int i = d_index; | |
556 | uint32_t written_len = 0; | |
557 | ||
558 | do { | |
559 | DPRINT("Post packet to guest on vq:\n"); | |
560 | DPRINT(" size = %d\n", vq->size); | |
561 | DPRINT(" last_avail_index = %d\n", vq->last_avail_index); | |
562 | DPRINT(" last_used_index = %d\n", vq->last_used_index); | |
563 | DPRINT(" a_index = %d\n", a_index); | |
564 | DPRINT(" u_index = %d\n", u_index); | |
565 | DPRINT(" d_index = %d\n", d_index); | |
566 | DPRINT(" desc[%d].addr = 0x%016"PRIx64"\n", i, desc[i].addr); | |
567 | DPRINT(" desc[%d].len = %d\n", i, desc[i].len); | |
568 | DPRINT(" desc[%d].flags = %d\n", i, desc[i].flags); | |
569 | DPRINT(" avail->idx = %d\n", avail_index); | |
570 | DPRINT(" used->idx = %d\n", used->idx); | |
571 | ||
572 | if (!(desc[i].flags & VRING_DESC_F_WRITE)) { | |
573 | /* FIXME: we should find writable descriptor. */ | |
574 | fprintf(stderr, "Error: descriptor is not writable. Exiting.\n"); | |
575 | exit(1); | |
576 | } | |
577 | ||
578 | void *chunk_start = (void *)(uintptr_t)gpa_to_va(dev, desc[i].addr); | |
579 | uint32_t chunk_len = desc[i].len; | |
580 | uint32_t chunk_write_len = MIN(remaining_len, chunk_len); | |
581 | ||
582 | memcpy(chunk_start, buf + written_len, chunk_write_len); | |
583 | vubr_log_write(dev, desc[i].addr, chunk_write_len); | |
584 | remaining_len -= chunk_write_len; | |
585 | written_len += chunk_write_len; | |
586 | ||
587 | if ((remaining_len == 0) || !(desc[i].flags & VRING_DESC_F_NEXT)) { | |
588 | break; | |
589 | } | |
590 | ||
591 | i = desc[i].next; | |
592 | } while (1); | |
593 | ||
594 | if (remaining_len > 0) { | |
595 | fprintf(stderr, | |
596 | "Too long packet for RX, remaining_len = %d, Dropping...\n", | |
597 | remaining_len); | |
598 | return; | |
599 | } | |
600 | ||
601 | /* Add descriptor to the used ring. */ | |
602 | used->ring[u_index].id = d_index; | |
603 | used->ring[u_index].len = len; | |
604 | vubr_log_write(dev, | |
605 | log_guest_addr + offsetof(struct vring_used, ring[u_index]), | |
606 | sizeof(used->ring[u_index])); | |
607 | ||
608 | vq->last_avail_index++; | |
609 | vq->last_used_index++; | |
610 | ||
611 | atomic_mb_set(&used->idx, vq->last_used_index); | |
612 | vubr_log_write(dev, | |
613 | log_guest_addr + offsetof(struct vring_used, idx), | |
614 | sizeof(used->idx)); | |
615 | ||
616 | /* Kick the guest if necessary. */ | |
617 | vubr_virtqueue_kick(vq); | |
618 | } | |
619 | ||
620 | static int | |
621 | vubr_process_desc(VubrDev *dev, VubrVirtq *vq) | |
622 | { | |
623 | struct vring_desc *desc = vq->desc; | |
624 | struct vring_avail *avail = vq->avail; | |
625 | struct vring_used *used = vq->used; | |
626 | uint64_t log_guest_addr = vq->log_guest_addr; | |
627 | ||
628 | unsigned int size = vq->size; | |
629 | ||
630 | uint16_t a_index = vq->last_avail_index % size; | |
631 | uint16_t u_index = vq->last_used_index % size; | |
632 | uint16_t d_index = avail->ring[a_index]; | |
633 | ||
634 | uint32_t i, len = 0; | |
635 | size_t buf_size = 4096; | |
636 | uint8_t buf[4096]; | |
637 | ||
638 | DPRINT("Chunks: "); | |
639 | i = d_index; | |
640 | do { | |
641 | void *chunk_start = (void *)(uintptr_t)gpa_to_va(dev, desc[i].addr); | |
642 | uint32_t chunk_len = desc[i].len; | |
643 | ||
644 | assert(!(desc[i].flags & VRING_DESC_F_WRITE)); | |
645 | ||
646 | if (len + chunk_len < buf_size) { | |
647 | memcpy(buf + len, chunk_start, chunk_len); | |
648 | DPRINT("%d ", chunk_len); | |
649 | } else { | |
650 | fprintf(stderr, "Error: too long packet. Dropping...\n"); | |
651 | break; | |
652 | } | |
653 | ||
654 | len += chunk_len; | |
655 | ||
656 | if (!(desc[i].flags & VRING_DESC_F_NEXT)) { | |
657 | break; | |
658 | } | |
659 | ||
660 | i = desc[i].next; | |
661 | } while (1); | |
662 | DPRINT("\n"); | |
663 | ||
664 | if (!len) { | |
665 | return -1; | |
666 | } | |
667 | ||
668 | /* Add descriptor to the used ring. */ | |
669 | used->ring[u_index].id = d_index; | |
670 | used->ring[u_index].len = len; | |
671 | vubr_log_write(dev, | |
672 | log_guest_addr + offsetof(struct vring_used, ring[u_index]), | |
673 | sizeof(used->ring[u_index])); | |
674 | ||
675 | vubr_consume_raw_packet(dev, buf, len); | |
676 | ||
677 | return 0; | |
678 | } | |
679 | ||
680 | static void | |
681 | vubr_process_avail(VubrDev *dev, VubrVirtq *vq) | |
682 | { | |
683 | struct vring_avail *avail = vq->avail; | |
684 | struct vring_used *used = vq->used; | |
685 | uint64_t log_guest_addr = vq->log_guest_addr; | |
686 | ||
687 | while (vq->last_avail_index != atomic_mb_read(&avail->idx)) { | |
688 | vubr_process_desc(dev, vq); | |
689 | vq->last_avail_index++; | |
690 | vq->last_used_index++; | |
691 | } | |
692 | ||
693 | atomic_mb_set(&used->idx, vq->last_used_index); | |
694 | vubr_log_write(dev, | |
695 | log_guest_addr + offsetof(struct vring_used, idx), | |
696 | sizeof(used->idx)); | |
697 | } | |
698 | ||
699 | static void | |
700 | vubr_backend_recv_cb(int sock, void *ctx) | |
701 | { | |
702 | VubrDev *dev = (VubrDev *) ctx; | |
703 | VubrVirtq *rx_vq = &dev->vq[0]; | |
704 | uint8_t buf[4096]; | |
705 | struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf; | |
706 | int hdrlen = dev->hdrlen; | |
707 | int buflen = sizeof(buf); | |
708 | int len; | |
709 | ||
710 | if (!dev->ready) { | |
711 | return; | |
712 | } | |
713 | ||
714 | DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n"); | |
715 | DPRINT(" hdrlen = %d\n", hdrlen); | |
716 | ||
717 | uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx); | |
718 | ||
719 | /* If there is no available descriptors, just do nothing. | |
720 | * The buffer will be handled by next arrived UDP packet, | |
721 | * or next kick on receive virtq. */ | |
722 | if (rx_vq->last_avail_index == avail_index) { | |
723 | DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n"); | |
724 | return; | |
725 | } | |
726 | ||
727 | memset(buf, 0, hdrlen); | |
728 | /* TODO: support mergeable buffers. */ | |
729 | if (hdrlen == 12) | |
730 | hdr->num_buffers = 1; | |
731 | len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen); | |
732 | ||
733 | vubr_post_buffer(dev, rx_vq, buf, len + hdrlen); | |
734 | } | |
735 | ||
736 | static void | |
737 | vubr_kick_cb(int sock, void *ctx) | |
738 | { | |
739 | VubrDev *dev = (VubrDev *) ctx; | |
740 | eventfd_t kick_data; | |
741 | ssize_t rc; | |
742 | ||
743 | rc = eventfd_read(sock, &kick_data); | |
744 | if (rc == -1) { | |
745 | vubr_die("eventfd_read()"); | |
746 | } else { | |
747 | DPRINT("Got kick_data: %016"PRIx64"\n", kick_data); | |
748 | vubr_process_avail(dev, &dev->vq[1]); | |
749 | } | |
750 | } | |
751 | ||
752 | static int | |
753 | vubr_none_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
754 | { | |
755 | DPRINT("Function %s() not implemented yet.\n", __func__); | |
756 | return 0; | |
757 | } | |
758 | ||
759 | static int | |
760 | vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
761 | { | |
762 | vmsg->payload.u64 = | |
763 | ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | | |
764 | (1ULL << VHOST_F_LOG_ALL) | | |
765 | (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | | |
766 | (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)); | |
767 | ||
768 | vmsg->size = sizeof(vmsg->payload.u64); | |
769 | ||
770 | DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
771 | ||
772 | /* Reply */ | |
773 | return 1; | |
774 | } | |
775 | ||
776 | static int | |
777 | vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
778 | { | |
779 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
780 | ||
781 | dev->features = vmsg->payload.u64; | |
782 | if ((dev->features & (1ULL << VIRTIO_F_VERSION_1)) || | |
783 | (dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) { | |
784 | dev->hdrlen = 12; | |
785 | } else { | |
786 | dev->hdrlen = 10; | |
787 | } | |
788 | ||
789 | return 0; | |
790 | } | |
791 | ||
792 | static int | |
793 | vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
794 | { | |
795 | return 0; | |
796 | } | |
797 | ||
798 | static void | |
799 | vubr_close_log(VubrDev *dev) | |
800 | { | |
801 | if (dev->log_table) { | |
802 | if (munmap(dev->log_table, dev->log_size) != 0) { | |
803 | vubr_die("munmap()"); | |
804 | } | |
805 | ||
806 | dev->log_table = 0; | |
807 | } | |
808 | if (dev->log_call_fd != -1) { | |
809 | close(dev->log_call_fd); | |
810 | dev->log_call_fd = -1; | |
811 | } | |
812 | } | |
813 | ||
814 | static int | |
815 | vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
816 | { | |
817 | vubr_close_log(dev); | |
818 | dev->ready = 0; | |
819 | dev->features = 0; | |
820 | return 0; | |
821 | } | |
822 | ||
823 | static int | |
824 | vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
825 | { | |
826 | int i; | |
827 | VhostUserMemory *memory = &vmsg->payload.memory; | |
828 | dev->nregions = memory->nregions; | |
829 | ||
830 | DPRINT("Nregions: %d\n", memory->nregions); | |
831 | for (i = 0; i < dev->nregions; i++) { | |
832 | void *mmap_addr; | |
833 | VhostUserMemoryRegion *msg_region = &memory->regions[i]; | |
834 | VubrDevRegion *dev_region = &dev->regions[i]; | |
835 | ||
836 | DPRINT("Region %d\n", i); | |
837 | DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n", | |
838 | msg_region->guest_phys_addr); | |
839 | DPRINT(" memory_size: 0x%016"PRIx64"\n", | |
840 | msg_region->memory_size); | |
841 | DPRINT(" userspace_addr 0x%016"PRIx64"\n", | |
842 | msg_region->userspace_addr); | |
843 | DPRINT(" mmap_offset 0x%016"PRIx64"\n", | |
844 | msg_region->mmap_offset); | |
845 | ||
846 | dev_region->gpa = msg_region->guest_phys_addr; | |
847 | dev_region->size = msg_region->memory_size; | |
848 | dev_region->qva = msg_region->userspace_addr; | |
849 | dev_region->mmap_offset = msg_region->mmap_offset; | |
850 | ||
851 | /* We don't use offset argument of mmap() since the | |
852 | * mapped address has to be page aligned, and we use huge | |
853 | * pages. */ | |
854 | mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, | |
855 | PROT_READ | PROT_WRITE, MAP_SHARED, | |
856 | vmsg->fds[i], 0); | |
857 | ||
858 | if (mmap_addr == MAP_FAILED) { | |
859 | vubr_die("mmap"); | |
860 | } | |
861 | dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr; | |
862 | DPRINT(" mmap_addr: 0x%016"PRIx64"\n", dev_region->mmap_addr); | |
863 | ||
864 | close(vmsg->fds[i]); | |
865 | } | |
866 | ||
867 | return 0; | |
868 | } | |
869 | ||
870 | static int | |
871 | vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
872 | { | |
873 | int fd; | |
874 | uint64_t log_mmap_size, log_mmap_offset; | |
875 | void *rc; | |
876 | ||
877 | assert(vmsg->fd_num == 1); | |
878 | fd = vmsg->fds[0]; | |
879 | ||
880 | assert(vmsg->size == sizeof(vmsg->payload.log)); | |
881 | log_mmap_offset = vmsg->payload.log.mmap_offset; | |
882 | log_mmap_size = vmsg->payload.log.mmap_size; | |
883 | DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset); | |
884 | DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size); | |
885 | ||
886 | rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, | |
887 | log_mmap_offset); | |
888 | if (rc == MAP_FAILED) { | |
889 | vubr_die("mmap"); | |
890 | } | |
891 | dev->log_table = rc; | |
892 | dev->log_size = log_mmap_size; | |
893 | ||
894 | vmsg->size = sizeof(vmsg->payload.u64); | |
895 | /* Reply */ | |
896 | return 1; | |
897 | } | |
898 | ||
899 | static int | |
900 | vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
901 | { | |
902 | assert(vmsg->fd_num == 1); | |
903 | dev->log_call_fd = vmsg->fds[0]; | |
904 | DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]); | |
905 | return 0; | |
906 | } | |
907 | ||
908 | static int | |
909 | vubr_set_vring_num_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
910 | { | |
911 | unsigned int index = vmsg->payload.state.index; | |
912 | unsigned int num = vmsg->payload.state.num; | |
913 | ||
914 | DPRINT("State.index: %d\n", index); | |
915 | DPRINT("State.num: %d\n", num); | |
916 | dev->vq[index].size = num; | |
917 | return 0; | |
918 | } | |
919 | ||
920 | static int | |
921 | vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
922 | { | |
923 | struct vhost_vring_addr *vra = &vmsg->payload.addr; | |
924 | unsigned int index = vra->index; | |
925 | VubrVirtq *vq = &dev->vq[index]; | |
926 | ||
927 | DPRINT("vhost_vring_addr:\n"); | |
928 | DPRINT(" index: %d\n", vra->index); | |
929 | DPRINT(" flags: %d\n", vra->flags); | |
930 | DPRINT(" desc_user_addr: 0x%016llx\n", vra->desc_user_addr); | |
931 | DPRINT(" used_user_addr: 0x%016llx\n", vra->used_user_addr); | |
932 | DPRINT(" avail_user_addr: 0x%016llx\n", vra->avail_user_addr); | |
933 | DPRINT(" log_guest_addr: 0x%016llx\n", vra->log_guest_addr); | |
934 | ||
935 | vq->desc = (struct vring_desc *)(uintptr_t)qva_to_va(dev, vra->desc_user_addr); | |
936 | vq->used = (struct vring_used *)(uintptr_t)qva_to_va(dev, vra->used_user_addr); | |
937 | vq->avail = (struct vring_avail *)(uintptr_t)qva_to_va(dev, vra->avail_user_addr); | |
938 | vq->log_guest_addr = vra->log_guest_addr; | |
939 | ||
940 | DPRINT("Setting virtq addresses:\n"); | |
941 | DPRINT(" vring_desc at %p\n", vq->desc); | |
942 | DPRINT(" vring_used at %p\n", vq->used); | |
943 | DPRINT(" vring_avail at %p\n", vq->avail); | |
944 | ||
945 | vq->last_used_index = vq->used->idx; | |
946 | ||
947 | if (vq->last_avail_index != vq->used->idx) { | |
948 | DPRINT("Last avail index != used index: %d != %d, resuming", | |
949 | vq->last_avail_index, vq->used->idx); | |
950 | vq->last_avail_index = vq->used->idx; | |
951 | } | |
952 | ||
953 | return 0; | |
954 | } | |
955 | ||
956 | static int | |
957 | vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
958 | { | |
959 | unsigned int index = vmsg->payload.state.index; | |
960 | unsigned int num = vmsg->payload.state.num; | |
961 | ||
962 | DPRINT("State.index: %d\n", index); | |
963 | DPRINT("State.num: %d\n", num); | |
964 | dev->vq[index].last_avail_index = num; | |
965 | ||
966 | return 0; | |
967 | } | |
968 | ||
969 | static int | |
970 | vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
971 | { | |
972 | unsigned int index = vmsg->payload.state.index; | |
973 | ||
974 | DPRINT("State.index: %d\n", index); | |
975 | vmsg->payload.state.num = dev->vq[index].last_avail_index; | |
976 | vmsg->size = sizeof(vmsg->payload.state); | |
977 | /* FIXME: this is a work-around for a bug in QEMU enabling | |
978 | * too early vrings. When protocol features are enabled, | |
979 | * we have to respect * VHOST_USER_SET_VRING_ENABLE request. */ | |
980 | dev->ready = 0; | |
981 | ||
982 | if (dev->vq[index].call_fd != -1) { | |
983 | close(dev->vq[index].call_fd); | |
984 | dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd); | |
985 | dev->vq[index].call_fd = -1; | |
986 | } | |
987 | if (dev->vq[index].kick_fd != -1) { | |
988 | close(dev->vq[index].kick_fd); | |
989 | dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd); | |
990 | dev->vq[index].kick_fd = -1; | |
991 | } | |
992 | ||
993 | /* Reply */ | |
994 | return 1; | |
995 | } | |
996 | ||
997 | static int | |
998 | vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
999 | { | |
1000 | uint64_t u64_arg = vmsg->payload.u64; | |
1001 | int index = u64_arg & VHOST_USER_VRING_IDX_MASK; | |
1002 | ||
1003 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
1004 | ||
1005 | assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0); | |
1006 | assert(vmsg->fd_num == 1); | |
1007 | ||
1008 | if (dev->vq[index].kick_fd != -1) { | |
1009 | close(dev->vq[index].kick_fd); | |
1010 | dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd); | |
1011 | } | |
1012 | dev->vq[index].kick_fd = vmsg->fds[0]; | |
1013 | DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index); | |
1014 | ||
1015 | if (index % 2 == 1) { | |
1016 | /* TX queue. */ | |
1017 | dispatcher_add(&dev->dispatcher, dev->vq[index].kick_fd, | |
1018 | dev, vubr_kick_cb); | |
1019 | ||
1020 | DPRINT("Waiting for kicks on fd: %d for vq: %d\n", | |
1021 | dev->vq[index].kick_fd, index); | |
1022 | } | |
1023 | /* We temporarily use this hack to determine that both TX and RX | |
1024 | * queues are set up and ready for processing. | |
1025 | * FIXME: we need to rely in VHOST_USER_SET_VRING_ENABLE and | |
1026 | * actual kicks. */ | |
1027 | if (dev->vq[0].kick_fd != -1 && | |
1028 | dev->vq[1].kick_fd != -1) { | |
1029 | dev->ready = 1; | |
1030 | DPRINT("vhost-user-bridge is ready for processing queues.\n"); | |
1031 | } | |
1032 | return 0; | |
1033 | ||
1034 | } | |
1035 | ||
1036 | static int | |
1037 | vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1038 | { | |
1039 | uint64_t u64_arg = vmsg->payload.u64; | |
1040 | int index = u64_arg & VHOST_USER_VRING_IDX_MASK; | |
1041 | ||
1042 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
1043 | assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0); | |
1044 | assert(vmsg->fd_num == 1); | |
1045 | ||
1046 | if (dev->vq[index].call_fd != -1) { | |
1047 | close(dev->vq[index].call_fd); | |
1048 | dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd); | |
1049 | } | |
1050 | dev->vq[index].call_fd = vmsg->fds[0]; | |
1051 | DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index); | |
1052 | ||
1053 | return 0; | |
1054 | } | |
1055 | ||
1056 | static int | |
1057 | vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1058 | { | |
1059 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
1060 | return 0; | |
1061 | } | |
1062 | ||
1063 | static int | |
1064 | vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1065 | { | |
1066 | vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD; | |
1067 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
1068 | vmsg->size = sizeof(vmsg->payload.u64); | |
1069 | ||
1070 | /* Reply */ | |
1071 | return 1; | |
1072 | } | |
1073 | ||
1074 | static int | |
1075 | vubr_set_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1076 | { | |
1077 | /* FIXME: unimplented */ | |
1078 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
1079 | return 0; | |
1080 | } | |
1081 | ||
1082 | static int | |
1083 | vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1084 | { | |
1085 | DPRINT("Function %s() not implemented yet.\n", __func__); | |
1086 | return 0; | |
1087 | } | |
1088 | ||
1089 | static int | |
1090 | vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1091 | { | |
1092 | unsigned int index = vmsg->payload.state.index; | |
1093 | unsigned int enable = vmsg->payload.state.num; | |
1094 | ||
1095 | DPRINT("State.index: %d\n", index); | |
1096 | DPRINT("State.enable: %d\n", enable); | |
1097 | dev->vq[index].enable = enable; | |
1098 | return 0; | |
1099 | } | |
1100 | ||
1101 | static int | |
1102 | vubr_send_rarp_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1103 | { | |
1104 | DPRINT("Function %s() not implemented yet.\n", __func__); | |
1105 | return 0; | |
1106 | } | |
1107 | ||
1108 | static int | |
1109 | vubr_execute_request(VubrDev *dev, VhostUserMsg *vmsg) | |
1110 | { | |
1111 | /* Print out generic part of the request. */ | |
1112 | DPRINT( | |
1113 | "================== Vhost user message from QEMU ==================\n"); | |
1114 | DPRINT("Request: %s (%d)\n", vubr_request_str[vmsg->request], | |
1115 | vmsg->request); | |
1116 | DPRINT("Flags: 0x%x\n", vmsg->flags); | |
1117 | DPRINT("Size: %d\n", vmsg->size); | |
1118 | ||
1119 | if (vmsg->fd_num) { | |
1120 | int i; | |
1121 | DPRINT("Fds:"); | |
1122 | for (i = 0; i < vmsg->fd_num; i++) { | |
1123 | DPRINT(" %d", vmsg->fds[i]); | |
1124 | } | |
1125 | DPRINT("\n"); | |
1126 | } | |
1127 | ||
1128 | switch (vmsg->request) { | |
1129 | case VHOST_USER_NONE: | |
1130 | return vubr_none_exec(dev, vmsg); | |
1131 | case VHOST_USER_GET_FEATURES: | |
1132 | return vubr_get_features_exec(dev, vmsg); | |
1133 | case VHOST_USER_SET_FEATURES: | |
1134 | return vubr_set_features_exec(dev, vmsg); | |
1135 | case VHOST_USER_SET_OWNER: | |
1136 | return vubr_set_owner_exec(dev, vmsg); | |
1137 | case VHOST_USER_RESET_OWNER: | |
1138 | return vubr_reset_device_exec(dev, vmsg); | |
1139 | case VHOST_USER_SET_MEM_TABLE: | |
1140 | return vubr_set_mem_table_exec(dev, vmsg); | |
1141 | case VHOST_USER_SET_LOG_BASE: | |
1142 | return vubr_set_log_base_exec(dev, vmsg); | |
1143 | case VHOST_USER_SET_LOG_FD: | |
1144 | return vubr_set_log_fd_exec(dev, vmsg); | |
1145 | case VHOST_USER_SET_VRING_NUM: | |
1146 | return vubr_set_vring_num_exec(dev, vmsg); | |
1147 | case VHOST_USER_SET_VRING_ADDR: | |
1148 | return vubr_set_vring_addr_exec(dev, vmsg); | |
1149 | case VHOST_USER_SET_VRING_BASE: | |
1150 | return vubr_set_vring_base_exec(dev, vmsg); | |
1151 | case VHOST_USER_GET_VRING_BASE: | |
1152 | return vubr_get_vring_base_exec(dev, vmsg); | |
1153 | case VHOST_USER_SET_VRING_KICK: | |
1154 | return vubr_set_vring_kick_exec(dev, vmsg); | |
1155 | case VHOST_USER_SET_VRING_CALL: | |
1156 | return vubr_set_vring_call_exec(dev, vmsg); | |
1157 | case VHOST_USER_SET_VRING_ERR: | |
1158 | return vubr_set_vring_err_exec(dev, vmsg); | |
1159 | case VHOST_USER_GET_PROTOCOL_FEATURES: | |
1160 | return vubr_get_protocol_features_exec(dev, vmsg); | |
1161 | case VHOST_USER_SET_PROTOCOL_FEATURES: | |
1162 | return vubr_set_protocol_features_exec(dev, vmsg); | |
1163 | case VHOST_USER_GET_QUEUE_NUM: | |
1164 | return vubr_get_queue_num_exec(dev, vmsg); | |
1165 | case VHOST_USER_SET_VRING_ENABLE: | |
1166 | return vubr_set_vring_enable_exec(dev, vmsg); | |
1167 | case VHOST_USER_SEND_RARP: | |
1168 | return vubr_send_rarp_exec(dev, vmsg); | |
1169 | ||
1170 | case VHOST_USER_MAX: | |
1171 | assert(vmsg->request != VHOST_USER_MAX); | |
1172 | } | |
1173 | return 0; | |
1174 | } | |
1175 | ||
1176 | static void | |
1177 | vubr_receive_cb(int sock, void *ctx) | |
1178 | { | |
1179 | VubrDev *dev = (VubrDev *) ctx; | |
1180 | VhostUserMsg vmsg; | |
1181 | int reply_requested; | |
1182 | ||
1183 | vubr_message_read(sock, &vmsg); | |
1184 | reply_requested = vubr_execute_request(dev, &vmsg); | |
1185 | if (reply_requested) { | |
1186 | /* Set the version in the flags when sending the reply */ | |
1187 | vmsg.flags &= ~VHOST_USER_VERSION_MASK; | |
1188 | vmsg.flags |= VHOST_USER_VERSION; | |
1189 | vmsg.flags |= VHOST_USER_REPLY_MASK; | |
1190 | vubr_message_write(sock, &vmsg); | |
1191 | } | |
1192 | } | |
1193 | ||
1194 | static void | |
1195 | vubr_accept_cb(int sock, void *ctx) | |
1196 | { | |
1197 | VubrDev *dev = (VubrDev *)ctx; | |
1198 | int conn_fd; | |
1199 | struct sockaddr_un un; | |
1200 | socklen_t len = sizeof(un); | |
1201 | ||
1202 | conn_fd = accept(sock, (struct sockaddr *) &un, &len); | |
1203 | if (conn_fd == -1) { | |
1204 | vubr_die("accept()"); | |
1205 | } | |
1206 | DPRINT("Got connection from remote peer on sock %d\n", conn_fd); | |
1207 | dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb); | |
1208 | } | |
1209 | ||
1210 | static VubrDev * | |
1211 | vubr_new(const char *path, bool client) | |
1212 | { | |
1213 | VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev)); | |
1214 | dev->nregions = 0; | |
1215 | int i; | |
1216 | struct sockaddr_un un; | |
1217 | CallbackFunc cb; | |
1218 | size_t len; | |
1219 | ||
1220 | for (i = 0; i < MAX_NR_VIRTQUEUE; i++) { | |
1221 | dev->vq[i] = (VubrVirtq) { | |
1222 | .call_fd = -1, .kick_fd = -1, | |
1223 | .size = 0, | |
1224 | .last_avail_index = 0, .last_used_index = 0, | |
1225 | .desc = 0, .avail = 0, .used = 0, | |
1226 | .enable = 0, | |
1227 | }; | |
1228 | } | |
1229 | ||
1230 | /* Init log */ | |
1231 | dev->log_call_fd = -1; | |
1232 | dev->log_size = 0; | |
1233 | dev->log_table = 0; | |
1234 | dev->ready = 0; | |
1235 | dev->features = 0; | |
1236 | ||
1237 | /* Get a UNIX socket. */ | |
1238 | dev->sock = socket(AF_UNIX, SOCK_STREAM, 0); | |
1239 | if (dev->sock == -1) { | |
1240 | vubr_die("socket"); | |
1241 | } | |
1242 | ||
1243 | un.sun_family = AF_UNIX; | |
1244 | strcpy(un.sun_path, path); | |
1245 | len = sizeof(un.sun_family) + strlen(path); | |
1246 | ||
1247 | if (!client) { | |
1248 | unlink(path); | |
1249 | ||
1250 | if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) { | |
1251 | vubr_die("bind"); | |
1252 | } | |
1253 | ||
1254 | if (listen(dev->sock, 1) == -1) { | |
1255 | vubr_die("listen"); | |
1256 | } | |
1257 | cb = vubr_accept_cb; | |
1258 | ||
1259 | DPRINT("Waiting for connections on UNIX socket %s ...\n", path); | |
1260 | } else { | |
1261 | if (connect(dev->sock, (struct sockaddr *)&un, len) == -1) { | |
1262 | vubr_die("connect"); | |
1263 | } | |
1264 | cb = vubr_receive_cb; | |
1265 | } | |
1266 | ||
1267 | dispatcher_init(&dev->dispatcher); | |
1268 | dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, cb); | |
1269 | ||
1270 | return dev; | |
1271 | } | |
1272 | ||
1273 | static void | |
1274 | vubr_set_host(struct sockaddr_in *saddr, const char *host) | |
1275 | { | |
1276 | if (isdigit(host[0])) { | |
1277 | if (!inet_aton(host, &saddr->sin_addr)) { | |
1278 | fprintf(stderr, "inet_aton() failed.\n"); | |
1279 | exit(1); | |
1280 | } | |
1281 | } else { | |
1282 | struct hostent *he = gethostbyname(host); | |
1283 | ||
1284 | if (!he) { | |
1285 | fprintf(stderr, "gethostbyname() failed.\n"); | |
1286 | exit(1); | |
1287 | } | |
1288 | saddr->sin_addr = *(struct in_addr *)he->h_addr; | |
1289 | } | |
1290 | } | |
1291 | ||
1292 | static void | |
1293 | vubr_backend_udp_setup(VubrDev *dev, | |
1294 | const char *local_host, | |
1295 | const char *local_port, | |
1296 | const char *remote_host, | |
1297 | const char *remote_port) | |
1298 | { | |
1299 | int sock; | |
1300 | const char *r; | |
1301 | ||
1302 | int lport, rport; | |
1303 | ||
1304 | lport = strtol(local_port, (char **)&r, 0); | |
1305 | if (r == local_port) { | |
1306 | fprintf(stderr, "lport parsing failed.\n"); | |
1307 | exit(1); | |
1308 | } | |
1309 | ||
1310 | rport = strtol(remote_port, (char **)&r, 0); | |
1311 | if (r == remote_port) { | |
1312 | fprintf(stderr, "rport parsing failed.\n"); | |
1313 | exit(1); | |
1314 | } | |
1315 | ||
1316 | struct sockaddr_in si_local = { | |
1317 | .sin_family = AF_INET, | |
1318 | .sin_port = htons(lport), | |
1319 | }; | |
1320 | ||
1321 | vubr_set_host(&si_local, local_host); | |
1322 | ||
1323 | /* setup destination for sends */ | |
1324 | dev->backend_udp_dest = (struct sockaddr_in) { | |
1325 | .sin_family = AF_INET, | |
1326 | .sin_port = htons(rport), | |
1327 | }; | |
1328 | vubr_set_host(&dev->backend_udp_dest, remote_host); | |
1329 | ||
1330 | sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); | |
1331 | if (sock == -1) { | |
1332 | vubr_die("socket"); | |
1333 | } | |
1334 | ||
1335 | if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) { | |
1336 | vubr_die("bind"); | |
1337 | } | |
1338 | ||
1339 | dev->backend_udp_sock = sock; | |
1340 | dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb); | |
1341 | DPRINT("Waiting for data from udp backend on %s:%d...\n", | |
1342 | local_host, lport); | |
1343 | } | |
1344 | ||
1345 | static void | |
1346 | vubr_run(VubrDev *dev) | |
1347 | { | |
1348 | while (1) { | |
1349 | /* timeout 200ms */ | |
1350 | dispatcher_wait(&dev->dispatcher, 200000); | |
1351 | /* Here one can try polling strategy. */ | |
1352 | } | |
1353 | } | |
1354 | ||
1355 | static int | |
1356 | vubr_parse_host_port(const char **host, const char **port, const char *buf) | |
1357 | { | |
1358 | char *p = strchr(buf, ':'); | |
1359 | ||
1360 | if (!p) { | |
1361 | return -1; | |
1362 | } | |
1363 | *p = '\0'; | |
1364 | *host = strdup(buf); | |
1365 | *port = strdup(p + 1); | |
1366 | return 0; | |
1367 | } | |
1368 | ||
1369 | #define DEFAULT_UD_SOCKET "/tmp/vubr.sock" | |
1370 | #define DEFAULT_LHOST "127.0.0.1" | |
1371 | #define DEFAULT_LPORT "4444" | |
1372 | #define DEFAULT_RHOST "127.0.0.1" | |
1373 | #define DEFAULT_RPORT "5555" | |
1374 | ||
1375 | static const char *ud_socket_path = DEFAULT_UD_SOCKET; | |
1376 | static const char *lhost = DEFAULT_LHOST; | |
1377 | static const char *lport = DEFAULT_LPORT; | |
1378 | static const char *rhost = DEFAULT_RHOST; | |
1379 | static const char *rport = DEFAULT_RPORT; | |
1380 | ||
1381 | int | |
1382 | main(int argc, char *argv[]) | |
1383 | { | |
1384 | VubrDev *dev; | |
1385 | int opt; | |
1386 | bool client = false; | |
1387 | ||
1388 | while ((opt = getopt(argc, argv, "l:r:u:c")) != -1) { | |
1389 | ||
1390 | switch (opt) { | |
1391 | case 'l': | |
1392 | if (vubr_parse_host_port(&lhost, &lport, optarg) < 0) { | |
1393 | goto out; | |
1394 | } | |
1395 | break; | |
1396 | case 'r': | |
1397 | if (vubr_parse_host_port(&rhost, &rport, optarg) < 0) { | |
1398 | goto out; | |
1399 | } | |
1400 | break; | |
1401 | case 'u': | |
1402 | ud_socket_path = strdup(optarg); | |
1403 | break; | |
1404 | case 'c': | |
1405 | client = true; | |
1406 | break; | |
1407 | default: | |
1408 | goto out; | |
1409 | } | |
1410 | } | |
1411 | ||
1412 | DPRINT("ud socket: %s (%s)\n", ud_socket_path, | |
1413 | client ? "client" : "server"); | |
1414 | DPRINT("local: %s:%s\n", lhost, lport); | |
1415 | DPRINT("remote: %s:%s\n", rhost, rport); | |
1416 | ||
1417 | dev = vubr_new(ud_socket_path, client); | |
1418 | if (!dev) { | |
1419 | return 1; | |
1420 | } | |
1421 | ||
1422 | vubr_backend_udp_setup(dev, lhost, lport, rhost, rport); | |
1423 | vubr_run(dev); | |
1424 | return 0; | |
1425 | ||
1426 | out: | |
1427 | fprintf(stderr, "Usage: %s ", argv[0]); | |
1428 | fprintf(stderr, "[-c] [-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n"); | |
1429 | fprintf(stderr, "\t-u path to unix doman socket. default: %s\n", | |
1430 | DEFAULT_UD_SOCKET); | |
1431 | fprintf(stderr, "\t-l local host and port. default: %s:%s\n", | |
1432 | DEFAULT_LHOST, DEFAULT_LPORT); | |
1433 | fprintf(stderr, "\t-r remote host and port. default: %s:%s\n", | |
1434 | DEFAULT_RHOST, DEFAULT_RPORT); | |
1435 | fprintf(stderr, "\t-c client mode\n"); | |
1436 | ||
1437 | return 1; | |
1438 | } |