4 * Copyright (c) 2015 Red Hat, Inc.
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
15 * - main should get parameters from the command line.
16 * - implement all request handlers. Still not implemented:
17 * vubr_get_queue_num_exec()
18 * vubr_send_rarp_exec()
19 * - test for broken requests and virtqueue.
20 * - implement features defined by Virtio 1.0 spec.
21 * - support mergeable buffers and indirect descriptors.
22 * - implement clean shutdown.
23 * - implement non-blocking writes to UDP backend.
24 * - implement polling strategy.
25 * - implement clean starting/stopping of vq processing
26 * - implement clean starting/stopping of used and buffers
30 #define _FILE_OFFSET_BITS 64
32 #include "qemu/osdep.h"
33 #include <sys/socket.h>
35 #include <sys/unistd.h>
37 #include <sys/eventfd.h>
38 #include <arpa/inet.h>
40 #include <qemu/osdep.h>
42 #include <linux/vhost.h>
44 #include "qemu/atomic.h"
45 #include "standard-headers/linux/virtio_net.h"
46 #include "standard-headers/linux/virtio_ring.h"
48 #define VHOST_USER_BRIDGE_DEBUG 1
52 if (VHOST_USER_BRIDGE_DEBUG) { \
53 printf(__VA_ARGS__); \
57 typedef void (*CallbackFunc)(int sock, void *ctx);
59 typedef struct Event {
61 CallbackFunc callback;
64 typedef struct Dispatcher {
67 Event events[FD_SETSIZE];
71 vubr_die(const char *s)
78 dispatcher_init(Dispatcher *dispr)
80 FD_ZERO(&dispr->fdset);
86 dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
88 if (sock >= FD_SETSIZE) {
90 "Error: Failed to add new event. sock %d should be less than %d\n",
95 dispr->events[sock].ctx = ctx;
96 dispr->events[sock].callback = cb;
98 FD_SET(sock, &dispr->fdset);
99 if (sock > dispr->max_sock) {
100 dispr->max_sock = sock;
102 DPRINT("Added sock %d for watching. max_sock: %d\n",
103 sock, dispr->max_sock);
107 /* dispatcher_remove() is not currently in use but may be useful
110 dispatcher_remove(Dispatcher *dispr, int sock)
112 if (sock >= FD_SETSIZE) {
114 "Error: Failed to remove event. sock %d should be less than %d\n",
119 FD_CLR(sock, &dispr->fdset);
120 DPRINT("Sock %d removed from dispatcher watch.\n", sock);
126 dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
129 tv.tv_sec = timeout / 1000000;
130 tv.tv_usec = timeout % 1000000;
132 fd_set fdset = dispr->fdset;
134 /* wait until some of sockets become readable. */
135 int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv);
146 /* Now call callback for every ready socket. */
149 for (sock = 0; sock < dispr->max_sock + 1; sock++) {
150 /* The callback on a socket can remove other sockets from the
151 * dispatcher, thus we have to check that the socket is
152 * still not removed from dispatcher's list
154 if (FD_ISSET(sock, &fdset) && FD_ISSET(sock, &dispr->fdset)) {
155 Event *e = &dispr->events[sock];
156 e->callback(sock, e->ctx);
163 typedef struct VubrVirtq {
167 uint16_t last_avail_index;
168 uint16_t last_used_index;
169 struct vring_desc *desc;
170 struct vring_avail *avail;
171 struct vring_used *used;
172 uint64_t log_guest_addr;
176 /* Based on qemu/hw/virtio/vhost-user.c */
178 #define VHOST_MEMORY_MAX_NREGIONS 8
179 #define VHOST_USER_F_PROTOCOL_FEATURES 30
180 /* v1.0 compliant. */
181 #define VIRTIO_F_VERSION_1 32
183 #define VHOST_LOG_PAGE 4096
185 enum VhostUserProtocolFeature {
186 VHOST_USER_PROTOCOL_F_MQ = 0,
187 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
188 VHOST_USER_PROTOCOL_F_RARP = 2,
190 VHOST_USER_PROTOCOL_F_MAX
193 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
195 typedef enum VhostUserRequest {
197 VHOST_USER_GET_FEATURES = 1,
198 VHOST_USER_SET_FEATURES = 2,
199 VHOST_USER_SET_OWNER = 3,
200 VHOST_USER_RESET_OWNER = 4,
201 VHOST_USER_SET_MEM_TABLE = 5,
202 VHOST_USER_SET_LOG_BASE = 6,
203 VHOST_USER_SET_LOG_FD = 7,
204 VHOST_USER_SET_VRING_NUM = 8,
205 VHOST_USER_SET_VRING_ADDR = 9,
206 VHOST_USER_SET_VRING_BASE = 10,
207 VHOST_USER_GET_VRING_BASE = 11,
208 VHOST_USER_SET_VRING_KICK = 12,
209 VHOST_USER_SET_VRING_CALL = 13,
210 VHOST_USER_SET_VRING_ERR = 14,
211 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
212 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
213 VHOST_USER_GET_QUEUE_NUM = 17,
214 VHOST_USER_SET_VRING_ENABLE = 18,
215 VHOST_USER_SEND_RARP = 19,
219 typedef struct VhostUserMemoryRegion {
220 uint64_t guest_phys_addr;
221 uint64_t memory_size;
222 uint64_t userspace_addr;
223 uint64_t mmap_offset;
224 } VhostUserMemoryRegion;
226 typedef struct VhostUserMemory {
229 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
232 typedef struct VhostUserLog {
234 uint64_t mmap_offset;
237 typedef struct VhostUserMsg {
238 VhostUserRequest request;
240 #define VHOST_USER_VERSION_MASK (0x3)
241 #define VHOST_USER_REPLY_MASK (0x1<<2)
243 uint32_t size; /* the following payload size */
245 #define VHOST_USER_VRING_IDX_MASK (0xff)
246 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
248 struct vhost_vring_state state;
249 struct vhost_vring_addr addr;
250 VhostUserMemory memory;
253 int fds[VHOST_MEMORY_MAX_NREGIONS];
255 } QEMU_PACKED VhostUserMsg;
257 #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
259 /* The version of the protocol we support */
260 #define VHOST_USER_VERSION (0x1)
262 #define MAX_NR_VIRTQUEUE (8)
264 typedef struct VubrDevRegion {
265 /* Guest Physical address. */
267 /* Memory region size. */
269 /* QEMU virtual address (userspace). */
271 /* Starting offset in our mmaped space. */
272 uint64_t mmap_offset;
273 /* Start address of mmaped space. */
277 typedef struct VubrDev {
279 Dispatcher dispatcher;
281 VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
282 VubrVirtq vq[MAX_NR_VIRTQUEUE];
286 int backend_udp_sock;
287 struct sockaddr_in backend_udp_dest;
293 static const char *vubr_request_str[] = {
294 [VHOST_USER_NONE] = "VHOST_USER_NONE",
295 [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
296 [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
297 [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
298 [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
299 [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
300 [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
301 [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
302 [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
303 [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
304 [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
305 [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
306 [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
307 [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
308 [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR",
309 [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES",
310 [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES",
311 [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
312 [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
313 [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
314 [VHOST_USER_MAX] = "VHOST_USER_MAX",
318 print_buffer(uint8_t *buf, size_t len)
321 printf("Raw buffer:\n");
322 for (i = 0; i < len; i++) {
329 printf("%02x ", buf[i]);
331 printf("\n............................................................\n");
334 /* Translate guest physical address to our virtual address. */
336 gpa_to_va(VubrDev *dev, uint64_t guest_addr)
340 /* Find matching memory region. */
341 for (i = 0; i < dev->nregions; i++) {
342 VubrDevRegion *r = &dev->regions[i];
344 if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
345 return guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
349 assert(!"address not found in regions");
353 /* Translate qemu virtual address to our virtual address. */
355 qva_to_va(VubrDev *dev, uint64_t qemu_addr)
359 /* Find matching memory region. */
360 for (i = 0; i < dev->nregions; i++) {
361 VubrDevRegion *r = &dev->regions[i];
363 if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
364 return qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
368 assert(!"address not found in regions");
373 vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
375 char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
377 .iov_base = (char *)vmsg,
378 .iov_len = VHOST_USER_HDR_SIZE,
380 struct msghdr msg = {
383 .msg_control = control,
384 .msg_controllen = sizeof(control),
387 struct cmsghdr *cmsg;
390 rc = recvmsg(conn_fd, &msg, 0);
394 fprintf(stderr, "Peer disconnected.\n");
402 for (cmsg = CMSG_FIRSTHDR(&msg);
404 cmsg = CMSG_NXTHDR(&msg, cmsg))
406 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
407 fd_size = cmsg->cmsg_len - CMSG_LEN(0);
408 vmsg->fd_num = fd_size / sizeof(int);
409 memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
414 if (vmsg->size > sizeof(vmsg->payload)) {
416 "Error: too big message request: %d, size: vmsg->size: %u, "
417 "while sizeof(vmsg->payload) = %zu\n",
418 vmsg->request, vmsg->size, sizeof(vmsg->payload));
423 rc = read(conn_fd, &vmsg->payload, vmsg->size);
426 fprintf(stderr, "Peer disconnected.\n");
433 assert(rc == vmsg->size);
438 vubr_message_write(int conn_fd, VhostUserMsg *vmsg)
443 rc = write(conn_fd, vmsg, VHOST_USER_HDR_SIZE + vmsg->size);
444 } while (rc < 0 && errno == EINTR);
452 vubr_backend_udp_sendbuf(VubrDev *dev, uint8_t *buf, size_t len)
454 int slen = sizeof(struct sockaddr_in);
456 if (sendto(dev->backend_udp_sock, buf, len, 0,
457 (struct sockaddr *) &dev->backend_udp_dest, slen) == -1) {
458 vubr_die("sendto()");
463 vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen)
465 int slen = sizeof(struct sockaddr_in);
468 rc = recvfrom(dev->backend_udp_sock, buf, buflen, 0,
469 (struct sockaddr *) &dev->backend_udp_dest,
472 vubr_die("recvfrom()");
479 vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len)
481 int hdrlen = dev->hdrlen;
482 DPRINT(" hdrlen = %d\n", dev->hdrlen);
484 if (VHOST_USER_BRIDGE_DEBUG) {
485 print_buffer(buf, len);
487 vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen);
490 /* Kick the log_call_fd if required. */
492 vubr_log_kick(VubrDev *dev)
494 if (dev->log_call_fd != -1) {
495 DPRINT("Kicking the QEMU's log...\n");
496 eventfd_write(dev->log_call_fd, 1);
500 /* Kick the guest if necessary. */
502 vubr_virtqueue_kick(VubrVirtq *vq)
504 if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
505 DPRINT("Kicking the guest...\n");
506 eventfd_write(vq->call_fd, 1);
511 vubr_log_page(uint8_t *log_table, uint64_t page)
513 DPRINT("Logged dirty guest page: %"PRId64"\n", page);
514 atomic_or(&log_table[page / 8], 1 << (page % 8));
518 vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
522 if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
523 !dev->log_table || !length) {
527 assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
529 page = address / VHOST_LOG_PAGE;
530 while (page * VHOST_LOG_PAGE < address + length) {
531 vubr_log_page(dev->log_table, page);
532 page += VHOST_LOG_PAGE;
538 vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
540 struct vring_desc *desc = vq->desc;
541 struct vring_avail *avail = vq->avail;
542 struct vring_used *used = vq->used;
543 uint64_t log_guest_addr = vq->log_guest_addr;
544 int32_t remaining_len = len;
546 unsigned int size = vq->size;
548 uint16_t avail_index = atomic_mb_read(&avail->idx);
550 /* We check the available descriptors before posting the
551 * buffer, so here we assume that enough available
553 assert(vq->last_avail_index != avail_index);
554 uint16_t a_index = vq->last_avail_index % size;
555 uint16_t u_index = vq->last_used_index % size;
556 uint16_t d_index = avail->ring[a_index];
559 uint32_t written_len = 0;
562 DPRINT("Post packet to guest on vq:\n");
563 DPRINT(" size = %d\n", vq->size);
564 DPRINT(" last_avail_index = %d\n", vq->last_avail_index);
565 DPRINT(" last_used_index = %d\n", vq->last_used_index);
566 DPRINT(" a_index = %d\n", a_index);
567 DPRINT(" u_index = %d\n", u_index);
568 DPRINT(" d_index = %d\n", d_index);
569 DPRINT(" desc[%d].addr = 0x%016"PRIx64"\n", i, desc[i].addr);
570 DPRINT(" desc[%d].len = %d\n", i, desc[i].len);
571 DPRINT(" desc[%d].flags = %d\n", i, desc[i].flags);
572 DPRINT(" avail->idx = %d\n", avail_index);
573 DPRINT(" used->idx = %d\n", used->idx);
575 if (!(desc[i].flags & VRING_DESC_F_WRITE)) {
576 /* FIXME: we should find writable descriptor. */
577 fprintf(stderr, "Error: descriptor is not writable. Exiting.\n");
581 void *chunk_start = (void *)(uintptr_t)gpa_to_va(dev, desc[i].addr);
582 uint32_t chunk_len = desc[i].len;
583 uint32_t chunk_write_len = MIN(remaining_len, chunk_len);
585 memcpy(chunk_start, buf + written_len, chunk_write_len);
586 vubr_log_write(dev, desc[i].addr, chunk_write_len);
587 remaining_len -= chunk_write_len;
588 written_len += chunk_write_len;
590 if ((remaining_len == 0) || !(desc[i].flags & VRING_DESC_F_NEXT)) {
597 if (remaining_len > 0) {
599 "Too long packet for RX, remaining_len = %d, Dropping...\n",
604 /* Add descriptor to the used ring. */
605 used->ring[u_index].id = d_index;
606 used->ring[u_index].len = len;
608 log_guest_addr + offsetof(struct vring_used, ring[u_index]),
609 sizeof(used->ring[u_index]));
611 vq->last_avail_index++;
612 vq->last_used_index++;
614 atomic_mb_set(&used->idx, vq->last_used_index);
616 log_guest_addr + offsetof(struct vring_used, idx),
619 /* Kick the guest if necessary. */
620 vubr_virtqueue_kick(vq);
624 vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
626 struct vring_desc *desc = vq->desc;
627 struct vring_avail *avail = vq->avail;
628 struct vring_used *used = vq->used;
629 uint64_t log_guest_addr = vq->log_guest_addr;
631 unsigned int size = vq->size;
633 uint16_t a_index = vq->last_avail_index % size;
634 uint16_t u_index = vq->last_used_index % size;
635 uint16_t d_index = avail->ring[a_index];
638 size_t buf_size = 4096;
644 void *chunk_start = (void *)(uintptr_t)gpa_to_va(dev, desc[i].addr);
645 uint32_t chunk_len = desc[i].len;
647 assert(!(desc[i].flags & VRING_DESC_F_WRITE));
649 if (len + chunk_len < buf_size) {
650 memcpy(buf + len, chunk_start, chunk_len);
651 DPRINT("%d ", chunk_len);
653 fprintf(stderr, "Error: too long packet. Dropping...\n");
659 if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
671 /* Add descriptor to the used ring. */
672 used->ring[u_index].id = d_index;
673 used->ring[u_index].len = len;
675 log_guest_addr + offsetof(struct vring_used, ring[u_index]),
676 sizeof(used->ring[u_index]));
678 vubr_consume_raw_packet(dev, buf, len);
684 vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
686 struct vring_avail *avail = vq->avail;
687 struct vring_used *used = vq->used;
688 uint64_t log_guest_addr = vq->log_guest_addr;
690 while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
691 vubr_process_desc(dev, vq);
692 vq->last_avail_index++;
693 vq->last_used_index++;
696 atomic_mb_set(&used->idx, vq->last_used_index);
698 log_guest_addr + offsetof(struct vring_used, idx),
703 vubr_backend_recv_cb(int sock, void *ctx)
705 VubrDev *dev = (VubrDev *) ctx;
706 VubrVirtq *rx_vq = &dev->vq[0];
708 struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf;
709 int hdrlen = dev->hdrlen;
710 int buflen = sizeof(buf);
717 DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
718 DPRINT(" hdrlen = %d\n", hdrlen);
720 uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
722 /* If there is no available descriptors, just do nothing.
723 * The buffer will be handled by next arrived UDP packet,
724 * or next kick on receive virtq. */
725 if (rx_vq->last_avail_index == avail_index) {
726 DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
730 memset(buf, 0, hdrlen);
731 /* TODO: support mergeable buffers. */
733 hdr->num_buffers = 1;
734 len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen);
736 vubr_post_buffer(dev, rx_vq, buf, len + hdrlen);
740 vubr_kick_cb(int sock, void *ctx)
742 VubrDev *dev = (VubrDev *) ctx;
746 rc = eventfd_read(sock, &kick_data);
748 vubr_die("eventfd_read()");
750 DPRINT("Got kick_data: %016"PRIx64"\n", kick_data);
751 vubr_process_avail(dev, &dev->vq[1]);
756 vubr_none_exec(VubrDev *dev, VhostUserMsg *vmsg)
758 DPRINT("Function %s() not implemented yet.\n", __func__);
763 vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
766 ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
767 (1ULL << VHOST_F_LOG_ALL) |
768 (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
769 (1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
771 vmsg->size = sizeof(vmsg->payload.u64);
773 DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
780 vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
782 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
784 dev->features = vmsg->payload.u64;
785 if ((dev->features & (1ULL << VIRTIO_F_VERSION_1)) ||
786 (dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) {
796 vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
802 vubr_close_log(VubrDev *dev)
804 if (dev->log_table) {
805 if (munmap(dev->log_table, dev->log_size) != 0) {
806 vubr_die("munmap()");
811 if (dev->log_call_fd != -1) {
812 close(dev->log_call_fd);
813 dev->log_call_fd = -1;
818 vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
827 vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
830 VhostUserMemory *memory = &vmsg->payload.memory;
831 dev->nregions = memory->nregions;
833 DPRINT("Nregions: %d\n", memory->nregions);
834 for (i = 0; i < dev->nregions; i++) {
836 VhostUserMemoryRegion *msg_region = &memory->regions[i];
837 VubrDevRegion *dev_region = &dev->regions[i];
839 DPRINT("Region %d\n", i);
840 DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n",
841 msg_region->guest_phys_addr);
842 DPRINT(" memory_size: 0x%016"PRIx64"\n",
843 msg_region->memory_size);
844 DPRINT(" userspace_addr 0x%016"PRIx64"\n",
845 msg_region->userspace_addr);
846 DPRINT(" mmap_offset 0x%016"PRIx64"\n",
847 msg_region->mmap_offset);
849 dev_region->gpa = msg_region->guest_phys_addr;
850 dev_region->size = msg_region->memory_size;
851 dev_region->qva = msg_region->userspace_addr;
852 dev_region->mmap_offset = msg_region->mmap_offset;
854 /* We don't use offset argument of mmap() since the
855 * mapped address has to be page aligned, and we use huge
857 mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
858 PROT_READ | PROT_WRITE, MAP_SHARED,
861 if (mmap_addr == MAP_FAILED) {
864 dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
865 DPRINT(" mmap_addr: 0x%016"PRIx64"\n", dev_region->mmap_addr);
874 vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
877 uint64_t log_mmap_size, log_mmap_offset;
880 assert(vmsg->fd_num == 1);
883 assert(vmsg->size == sizeof(vmsg->payload.log));
884 log_mmap_offset = vmsg->payload.log.mmap_offset;
885 log_mmap_size = vmsg->payload.log.mmap_size;
886 DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
887 DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size);
889 rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
891 if (rc == MAP_FAILED) {
895 dev->log_size = log_mmap_size;
897 vmsg->size = sizeof(vmsg->payload.u64);
903 vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg)
905 assert(vmsg->fd_num == 1);
906 dev->log_call_fd = vmsg->fds[0];
907 DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
912 vubr_set_vring_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
914 unsigned int index = vmsg->payload.state.index;
915 unsigned int num = vmsg->payload.state.num;
917 DPRINT("State.index: %d\n", index);
918 DPRINT("State.num: %d\n", num);
919 dev->vq[index].size = num;
924 vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
926 struct vhost_vring_addr *vra = &vmsg->payload.addr;
927 unsigned int index = vra->index;
928 VubrVirtq *vq = &dev->vq[index];
930 DPRINT("vhost_vring_addr:\n");
931 DPRINT(" index: %d\n", vra->index);
932 DPRINT(" flags: %d\n", vra->flags);
933 DPRINT(" desc_user_addr: 0x%016llx\n", vra->desc_user_addr);
934 DPRINT(" used_user_addr: 0x%016llx\n", vra->used_user_addr);
935 DPRINT(" avail_user_addr: 0x%016llx\n", vra->avail_user_addr);
936 DPRINT(" log_guest_addr: 0x%016llx\n", vra->log_guest_addr);
938 vq->desc = (struct vring_desc *)(uintptr_t)qva_to_va(dev, vra->desc_user_addr);
939 vq->used = (struct vring_used *)(uintptr_t)qva_to_va(dev, vra->used_user_addr);
940 vq->avail = (struct vring_avail *)(uintptr_t)qva_to_va(dev, vra->avail_user_addr);
941 vq->log_guest_addr = vra->log_guest_addr;
943 DPRINT("Setting virtq addresses:\n");
944 DPRINT(" vring_desc at %p\n", vq->desc);
945 DPRINT(" vring_used at %p\n", vq->used);
946 DPRINT(" vring_avail at %p\n", vq->avail);
948 vq->last_used_index = vq->used->idx;
953 vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
955 unsigned int index = vmsg->payload.state.index;
956 unsigned int num = vmsg->payload.state.num;
958 DPRINT("State.index: %d\n", index);
959 DPRINT("State.num: %d\n", num);
960 dev->vq[index].last_avail_index = num;
966 vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
968 unsigned int index = vmsg->payload.state.index;
970 DPRINT("State.index: %d\n", index);
971 vmsg->payload.state.num = dev->vq[index].last_avail_index;
972 vmsg->size = sizeof(vmsg->payload.state);
973 /* FIXME: this is a work-around for a bug in QEMU enabling
974 * too early vrings. When protocol features are enabled,
975 * we have to respect * VHOST_USER_SET_VRING_ENABLE request. */
978 if (dev->vq[index].call_fd != -1) {
979 close(dev->vq[index].call_fd);
980 dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd);
981 dev->vq[index].call_fd = -1;
983 if (dev->vq[index].kick_fd != -1) {
984 close(dev->vq[index].kick_fd);
985 dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd);
986 dev->vq[index].kick_fd = -1;
994 vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
996 uint64_t u64_arg = vmsg->payload.u64;
997 int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
999 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
1001 assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
1002 assert(vmsg->fd_num == 1);
1004 if (dev->vq[index].kick_fd != -1) {
1005 close(dev->vq[index].kick_fd);
1006 dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd);
1008 dev->vq[index].kick_fd = vmsg->fds[0];
1009 DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
1011 if (index % 2 == 1) {
1013 dispatcher_add(&dev->dispatcher, dev->vq[index].kick_fd,
1016 DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
1017 dev->vq[index].kick_fd, index);
1019 /* We temporarily use this hack to determine that both TX and RX
1020 * queues are set up and ready for processing.
1021 * FIXME: we need to rely in VHOST_USER_SET_VRING_ENABLE and
1023 if (dev->vq[0].kick_fd != -1 &&
1024 dev->vq[1].kick_fd != -1) {
1026 DPRINT("vhost-user-bridge is ready for processing queues.\n");
1033 vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg)
1035 uint64_t u64_arg = vmsg->payload.u64;
1036 int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
1038 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
1039 assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
1040 assert(vmsg->fd_num == 1);
1042 if (dev->vq[index].call_fd != -1) {
1043 close(dev->vq[index].call_fd);
1044 dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd);
1046 dev->vq[index].call_fd = vmsg->fds[0];
1047 DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
1053 vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
1055 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
1060 vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
1062 vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
1063 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
1064 vmsg->size = sizeof(vmsg->payload.u64);
1071 vubr_set_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
1073 /* FIXME: unimplented */
1074 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
1079 vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
1081 DPRINT("Function %s() not implemented yet.\n", __func__);
1086 vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg)
1088 unsigned int index = vmsg->payload.state.index;
1089 unsigned int enable = vmsg->payload.state.num;
1091 DPRINT("State.index: %d\n", index);
1092 DPRINT("State.enable: %d\n", enable);
1093 dev->vq[index].enable = enable;
1098 vubr_send_rarp_exec(VubrDev *dev, VhostUserMsg *vmsg)
1100 DPRINT("Function %s() not implemented yet.\n", __func__);
1105 vubr_execute_request(VubrDev *dev, VhostUserMsg *vmsg)
1107 /* Print out generic part of the request. */
1109 "================== Vhost user message from QEMU ==================\n");
1110 DPRINT("Request: %s (%d)\n", vubr_request_str[vmsg->request],
1112 DPRINT("Flags: 0x%x\n", vmsg->flags);
1113 DPRINT("Size: %d\n", vmsg->size);
1118 for (i = 0; i < vmsg->fd_num; i++) {
1119 DPRINT(" %d", vmsg->fds[i]);
1124 switch (vmsg->request) {
1125 case VHOST_USER_NONE:
1126 return vubr_none_exec(dev, vmsg);
1127 case VHOST_USER_GET_FEATURES:
1128 return vubr_get_features_exec(dev, vmsg);
1129 case VHOST_USER_SET_FEATURES:
1130 return vubr_set_features_exec(dev, vmsg);
1131 case VHOST_USER_SET_OWNER:
1132 return vubr_set_owner_exec(dev, vmsg);
1133 case VHOST_USER_RESET_OWNER:
1134 return vubr_reset_device_exec(dev, vmsg);
1135 case VHOST_USER_SET_MEM_TABLE:
1136 return vubr_set_mem_table_exec(dev, vmsg);
1137 case VHOST_USER_SET_LOG_BASE:
1138 return vubr_set_log_base_exec(dev, vmsg);
1139 case VHOST_USER_SET_LOG_FD:
1140 return vubr_set_log_fd_exec(dev, vmsg);
1141 case VHOST_USER_SET_VRING_NUM:
1142 return vubr_set_vring_num_exec(dev, vmsg);
1143 case VHOST_USER_SET_VRING_ADDR:
1144 return vubr_set_vring_addr_exec(dev, vmsg);
1145 case VHOST_USER_SET_VRING_BASE:
1146 return vubr_set_vring_base_exec(dev, vmsg);
1147 case VHOST_USER_GET_VRING_BASE:
1148 return vubr_get_vring_base_exec(dev, vmsg);
1149 case VHOST_USER_SET_VRING_KICK:
1150 return vubr_set_vring_kick_exec(dev, vmsg);
1151 case VHOST_USER_SET_VRING_CALL:
1152 return vubr_set_vring_call_exec(dev, vmsg);
1153 case VHOST_USER_SET_VRING_ERR:
1154 return vubr_set_vring_err_exec(dev, vmsg);
1155 case VHOST_USER_GET_PROTOCOL_FEATURES:
1156 return vubr_get_protocol_features_exec(dev, vmsg);
1157 case VHOST_USER_SET_PROTOCOL_FEATURES:
1158 return vubr_set_protocol_features_exec(dev, vmsg);
1159 case VHOST_USER_GET_QUEUE_NUM:
1160 return vubr_get_queue_num_exec(dev, vmsg);
1161 case VHOST_USER_SET_VRING_ENABLE:
1162 return vubr_set_vring_enable_exec(dev, vmsg);
1163 case VHOST_USER_SEND_RARP:
1164 return vubr_send_rarp_exec(dev, vmsg);
1166 case VHOST_USER_MAX:
1167 assert(vmsg->request != VHOST_USER_MAX);
1173 vubr_receive_cb(int sock, void *ctx)
1175 VubrDev *dev = (VubrDev *) ctx;
1177 int reply_requested;
1179 vubr_message_read(sock, &vmsg);
1180 reply_requested = vubr_execute_request(dev, &vmsg);
1181 if (reply_requested) {
1182 /* Set the version in the flags when sending the reply */
1183 vmsg.flags &= ~VHOST_USER_VERSION_MASK;
1184 vmsg.flags |= VHOST_USER_VERSION;
1185 vmsg.flags |= VHOST_USER_REPLY_MASK;
1186 vubr_message_write(sock, &vmsg);
1191 vubr_accept_cb(int sock, void *ctx)
1193 VubrDev *dev = (VubrDev *)ctx;
1195 struct sockaddr_un un;
1196 socklen_t len = sizeof(un);
1198 conn_fd = accept(sock, (struct sockaddr *) &un, &len);
1199 if (conn_fd == -1) {
1200 vubr_die("accept()");
1202 DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
1203 dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
1207 vubr_new(const char *path)
1209 VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
1212 struct sockaddr_un un;
1215 for (i = 0; i < MAX_NR_VIRTQUEUE; i++) {
1216 dev->vq[i] = (VubrVirtq) {
1217 .call_fd = -1, .kick_fd = -1,
1219 .last_avail_index = 0, .last_used_index = 0,
1220 .desc = 0, .avail = 0, .used = 0,
1226 dev->log_call_fd = -1;
1232 /* Get a UNIX socket. */
1233 dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
1234 if (dev->sock == -1) {
1238 un.sun_family = AF_UNIX;
1239 strcpy(un.sun_path, path);
1240 len = sizeof(un.sun_family) + strlen(path);
1243 if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) {
1247 if (listen(dev->sock, 1) == -1) {
1251 dispatcher_init(&dev->dispatcher);
1252 dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev,
1255 DPRINT("Waiting for connections on UNIX socket %s ...\n", path);
1260 vubr_set_host(struct sockaddr_in *saddr, const char *host)
1262 if (isdigit(host[0])) {
1263 if (!inet_aton(host, &saddr->sin_addr)) {
1264 fprintf(stderr, "inet_aton() failed.\n");
1268 struct hostent *he = gethostbyname(host);
1271 fprintf(stderr, "gethostbyname() failed.\n");
1274 saddr->sin_addr = *(struct in_addr *)he->h_addr;
1279 vubr_backend_udp_setup(VubrDev *dev,
1280 const char *local_host,
1281 const char *local_port,
1282 const char *remote_host,
1283 const char *remote_port)
1290 lport = strtol(local_port, (char **)&r, 0);
1291 if (r == local_port) {
1292 fprintf(stderr, "lport parsing failed.\n");
1296 rport = strtol(remote_port, (char **)&r, 0);
1297 if (r == remote_port) {
1298 fprintf(stderr, "rport parsing failed.\n");
1302 struct sockaddr_in si_local = {
1303 .sin_family = AF_INET,
1304 .sin_port = htons(lport),
1307 vubr_set_host(&si_local, local_host);
1309 /* setup destination for sends */
1310 dev->backend_udp_dest = (struct sockaddr_in) {
1311 .sin_family = AF_INET,
1312 .sin_port = htons(rport),
1314 vubr_set_host(&dev->backend_udp_dest, remote_host);
1316 sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
1321 if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) {
1325 dev->backend_udp_sock = sock;
1326 dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb);
1327 DPRINT("Waiting for data from udp backend on %s:%d...\n",
1332 vubr_run(VubrDev *dev)
1336 dispatcher_wait(&dev->dispatcher, 200000);
1337 /* Here one can try polling strategy. */
1342 vubr_parse_host_port(const char **host, const char **port, const char *buf)
1344 char *p = strchr(buf, ':');
1350 *host = strdup(buf);
1351 *port = strdup(p + 1);
1355 #define DEFAULT_UD_SOCKET "/tmp/vubr.sock"
1356 #define DEFAULT_LHOST "127.0.0.1"
1357 #define DEFAULT_LPORT "4444"
1358 #define DEFAULT_RHOST "127.0.0.1"
1359 #define DEFAULT_RPORT "5555"
1361 static const char *ud_socket_path = DEFAULT_UD_SOCKET;
1362 static const char *lhost = DEFAULT_LHOST;
1363 static const char *lport = DEFAULT_LPORT;
1364 static const char *rhost = DEFAULT_RHOST;
1365 static const char *rport = DEFAULT_RPORT;
1368 main(int argc, char *argv[])
1373 while ((opt = getopt(argc, argv, "l:r:u:")) != -1) {
1377 if (vubr_parse_host_port(&lhost, &lport, optarg) < 0) {
1382 if (vubr_parse_host_port(&rhost, &rport, optarg) < 0) {
1387 ud_socket_path = strdup(optarg);
1394 DPRINT("ud socket: %s\n", ud_socket_path);
1395 DPRINT("local: %s:%s\n", lhost, lport);
1396 DPRINT("remote: %s:%s\n", rhost, rport);
1398 dev = vubr_new(ud_socket_path);
1403 vubr_backend_udp_setup(dev, lhost, lport, rhost, rport);
1408 fprintf(stderr, "Usage: %s ", argv[0]);
1409 fprintf(stderr, "[-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n");
1410 fprintf(stderr, "\t-u path to unix doman socket. default: %s\n",
1412 fprintf(stderr, "\t-l local host and port. default: %s:%s\n",
1413 DEFAULT_LHOST, DEFAULT_LPORT);
1414 fprintf(stderr, "\t-r remote host and port. default: %s:%s\n",
1415 DEFAULT_RHOST, DEFAULT_RPORT);