]> Git Repo - qemu.git/blame - hw/virtio/vhost-user.c
vhost user: add support of live migration
[qemu.git] / hw / virtio / vhost-user.c
CommitLineData
5f6f6664
NN
1/*
2 * vhost-user
3 *
4 * Copyright (c) 2013 Virtual Open Systems Sarl.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 */
10
11#include "hw/virtio/vhost.h"
12#include "hw/virtio/vhost-backend.h"
13#include "sysemu/char.h"
14#include "sysemu/kvm.h"
15#include "qemu/error-report.h"
16#include "qemu/sockets.h"
3fd74b84 17#include "exec/ram_addr.h"
d2fc4402 18#include "migration/migration.h"
5f6f6664
NN
19
20#include <fcntl.h>
21#include <unistd.h>
22#include <sys/ioctl.h>
23#include <sys/socket.h>
24#include <sys/un.h>
25#include <linux/vhost.h>
26
27#define VHOST_MEMORY_MAX_NREGIONS 8
dcb10c00 28#define VHOST_USER_F_PROTOCOL_FEATURES 30
e2051e9e 29
1be0ac21
MAL
30#define VHOST_USER_PROTOCOL_FEATURE_MASK 0x3ULL
31#define VHOST_USER_PROTOCOL_F_MQ 0
32#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
5f6f6664
NN
33
34typedef enum VhostUserRequest {
35 VHOST_USER_NONE = 0,
36 VHOST_USER_GET_FEATURES = 1,
37 VHOST_USER_SET_FEATURES = 2,
38 VHOST_USER_SET_OWNER = 3,
d1f8b30e 39 VHOST_USER_RESET_DEVICE = 4,
5f6f6664
NN
40 VHOST_USER_SET_MEM_TABLE = 5,
41 VHOST_USER_SET_LOG_BASE = 6,
42 VHOST_USER_SET_LOG_FD = 7,
43 VHOST_USER_SET_VRING_NUM = 8,
44 VHOST_USER_SET_VRING_ADDR = 9,
45 VHOST_USER_SET_VRING_BASE = 10,
46 VHOST_USER_GET_VRING_BASE = 11,
47 VHOST_USER_SET_VRING_KICK = 12,
48 VHOST_USER_SET_VRING_CALL = 13,
49 VHOST_USER_SET_VRING_ERR = 14,
dcb10c00
MT
50 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
51 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
e2051e9e 52 VHOST_USER_GET_QUEUE_NUM = 17,
7263a0ad 53 VHOST_USER_SET_VRING_ENABLE = 18,
5f6f6664
NN
54 VHOST_USER_MAX
55} VhostUserRequest;
56
57typedef struct VhostUserMemoryRegion {
58 uint64_t guest_phys_addr;
59 uint64_t memory_size;
60 uint64_t userspace_addr;
3fd74b84 61 uint64_t mmap_offset;
5f6f6664
NN
62} VhostUserMemoryRegion;
63
64typedef struct VhostUserMemory {
65 uint32_t nregions;
66 uint32_t padding;
67 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
68} VhostUserMemory;
69
70typedef struct VhostUserMsg {
71 VhostUserRequest request;
72
73#define VHOST_USER_VERSION_MASK (0x3)
74#define VHOST_USER_REPLY_MASK (0x1<<2)
75 uint32_t flags;
76 uint32_t size; /* the following payload size */
77 union {
78#define VHOST_USER_VRING_IDX_MASK (0xff)
79#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
80 uint64_t u64;
81 struct vhost_vring_state state;
82 struct vhost_vring_addr addr;
83 VhostUserMemory memory;
84 };
85} QEMU_PACKED VhostUserMsg;
86
87static VhostUserMsg m __attribute__ ((unused));
88#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
89 + sizeof(m.flags) \
90 + sizeof(m.size))
91
92#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
93
94/* The version of the protocol we support */
95#define VHOST_USER_VERSION (0x1)
96
97static bool ioeventfd_enabled(void)
98{
99 return kvm_enabled() && kvm_eventfds_enabled();
100}
101
5f6f6664
NN
102static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
103{
104 CharDriverState *chr = dev->opaque;
105 uint8_t *p = (uint8_t *) msg;
106 int r, size = VHOST_USER_HDR_SIZE;
107
108 r = qemu_chr_fe_read_all(chr, p, size);
109 if (r != size) {
ab7c5aaf 110 error_report("Failed to read msg header. Read %d instead of %d.", r,
5f6f6664
NN
111 size);
112 goto fail;
113 }
114
115 /* validate received flags */
116 if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
117 error_report("Failed to read msg header."
ab7c5aaf 118 " Flags 0x%x instead of 0x%x.", msg->flags,
5f6f6664
NN
119 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
120 goto fail;
121 }
122
123 /* validate message size is sane */
124 if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
125 error_report("Failed to read msg header."
ab7c5aaf 126 " Size %d exceeds the maximum %zu.", msg->size,
5f6f6664
NN
127 VHOST_USER_PAYLOAD_SIZE);
128 goto fail;
129 }
130
131 if (msg->size) {
132 p += VHOST_USER_HDR_SIZE;
133 size = msg->size;
134 r = qemu_chr_fe_read_all(chr, p, size);
135 if (r != size) {
136 error_report("Failed to read msg payload."
ab7c5aaf 137 " Read %d instead of %d.", r, msg->size);
5f6f6664
NN
138 goto fail;
139 }
140 }
141
142 return 0;
143
144fail:
145 return -1;
146}
147
21e70425
MAL
148static bool vhost_user_one_time_request(VhostUserRequest request)
149{
150 switch (request) {
151 case VHOST_USER_SET_OWNER:
152 case VHOST_USER_RESET_DEVICE:
153 case VHOST_USER_SET_MEM_TABLE:
154 case VHOST_USER_GET_QUEUE_NUM:
155 return true;
156 default:
157 return false;
158 }
159}
160
161/* most non-init callers ignore the error */
5f6f6664
NN
162static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
163 int *fds, int fd_num)
164{
165 CharDriverState *chr = dev->opaque;
166 int size = VHOST_USER_HDR_SIZE + msg->size;
167
21e70425
MAL
168 /*
169 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
170 * we just need send it once in the first time. For later such
171 * request, we just ignore it.
172 */
173 if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) {
174 return 0;
175 }
176
5f6f6664
NN
177 if (fd_num) {
178 qemu_chr_fe_set_msgfds(chr, fds, fd_num);
179 }
180
181 return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ?
182 0 : -1;
183}
184
21e70425
MAL
185static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
186 struct vhost_log *log)
b931bfbf 187{
21e70425
MAL
188 int fds[VHOST_MEMORY_MAX_NREGIONS];
189 size_t fd_num = 0;
190 bool shmfd = virtio_has_feature(dev->protocol_features,
191 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
192 VhostUserMsg msg = {
193 .request = VHOST_USER_SET_LOG_BASE,
194 .flags = VHOST_USER_VERSION,
195 .u64 = base,
196 .size = sizeof(m.u64),
197 };
198
199 if (shmfd && log->fd != -1) {
200 fds[fd_num++] = log->fd;
201 }
202
203 vhost_user_write(dev, &msg, fds, fd_num);
204
205 if (shmfd) {
206 msg.size = 0;
207 if (vhost_user_read(dev, &msg) < 0) {
208 return 0;
209 }
210
211 if (msg.request != VHOST_USER_SET_LOG_BASE) {
212 error_report("Received unexpected msg type. "
213 "Expected %d received %d",
214 VHOST_USER_SET_LOG_BASE, msg.request);
215 return -1;
216 }
b931bfbf 217 }
21e70425
MAL
218
219 return 0;
b931bfbf
CO
220}
221
21e70425
MAL
222static int vhost_user_set_mem_table(struct vhost_dev *dev,
223 struct vhost_memory *mem)
5f6f6664 224{
5f6f6664 225 int fds[VHOST_MEMORY_MAX_NREGIONS];
3fd74b84 226 int i, fd;
5f6f6664 227 size_t fd_num = 0;
21e70425
MAL
228 VhostUserMsg msg = {
229 .request = VHOST_USER_SET_MEM_TABLE,
230 .flags = VHOST_USER_VERSION,
231 };
5f6f6664 232
21e70425
MAL
233 for (i = 0; i < dev->mem->nregions; ++i) {
234 struct vhost_memory_region *reg = dev->mem->regions + i;
235 ram_addr_t ram_addr;
236
237 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
238 qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr,
239 &ram_addr);
240 fd = qemu_get_ram_fd(ram_addr);
241 if (fd > 0) {
242 msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
243 msg.memory.regions[fd_num].memory_size = reg->memory_size;
244 msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
245 msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr -
246 (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr);
247 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
248 fds[fd_num++] = fd;
249 }
7305483a
YL
250 }
251
21e70425
MAL
252 msg.memory.nregions = fd_num;
253
254 if (!fd_num) {
255 error_report("Failed initializing vhost-user memory map, "
256 "consider using -object memory-backend-file share=on");
257 return -1;
b931bfbf
CO
258 }
259
21e70425
MAL
260 msg.size = sizeof(m.memory.nregions);
261 msg.size += sizeof(m.memory.padding);
262 msg.size += fd_num * sizeof(VhostUserMemoryRegion);
5f6f6664 263
21e70425 264 vhost_user_write(dev, &msg, fds, fd_num);
5f6f6664 265
21e70425
MAL
266 return 0;
267}
5f6f6664 268
21e70425
MAL
269static int vhost_user_set_vring_addr(struct vhost_dev *dev,
270 struct vhost_vring_addr *addr)
271{
272 VhostUserMsg msg = {
273 .request = VHOST_USER_SET_VRING_ADDR,
274 .flags = VHOST_USER_VERSION,
275 .addr = *addr,
276 .size = sizeof(*addr),
277 };
5f6f6664 278
21e70425 279 vhost_user_write(dev, &msg, NULL, 0);
5f6f6664 280
21e70425
MAL
281 return 0;
282}
5f6f6664 283
21e70425
MAL
284static int vhost_user_set_vring_endian(struct vhost_dev *dev,
285 struct vhost_vring_state *ring)
286{
287 error_report("vhost-user trying to send unhandled ioctl");
288 return -1;
289}
5f6f6664 290
21e70425
MAL
291static int vhost_set_vring(struct vhost_dev *dev,
292 unsigned long int request,
293 struct vhost_vring_state *ring)
294{
295 VhostUserMsg msg = {
296 .request = request,
297 .flags = VHOST_USER_VERSION,
298 .state = *ring,
299 .size = sizeof(*ring),
300 };
301
302 vhost_user_write(dev, &msg, NULL, 0);
303
304 return 0;
305}
306
307static int vhost_user_set_vring_num(struct vhost_dev *dev,
308 struct vhost_vring_state *ring)
309{
310 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
311}
312
313static int vhost_user_set_vring_base(struct vhost_dev *dev,
314 struct vhost_vring_state *ring)
315{
316 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
317}
318
319static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
320{
321 struct vhost_vring_state state = {
322 .index = dev->vq_index,
323 .num = enable,
324 };
325
326 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ))) {
5f6f6664 327 return -1;
5f6f6664
NN
328 }
329
21e70425
MAL
330 return vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
331}
332
333
334static int vhost_user_get_vring_base(struct vhost_dev *dev,
335 struct vhost_vring_state *ring)
336{
337 VhostUserMsg msg = {
338 .request = VHOST_USER_GET_VRING_BASE,
339 .flags = VHOST_USER_VERSION,
340 .state = *ring,
341 .size = sizeof(*ring),
342 };
343
344 vhost_user_write(dev, &msg, NULL, 0);
345
346 if (vhost_user_read(dev, &msg) < 0) {
5f6f6664
NN
347 return 0;
348 }
349
21e70425
MAL
350 if (msg.request != VHOST_USER_GET_VRING_BASE) {
351 error_report("Received unexpected msg type. Expected %d received %d",
352 VHOST_USER_GET_VRING_BASE, msg.request);
353 return -1;
354 }
5f6f6664 355
21e70425
MAL
356 if (msg.size != sizeof(m.state)) {
357 error_report("Received bad msg size.");
358 return -1;
5f6f6664
NN
359 }
360
21e70425
MAL
361 *ring = msg.state;
362
5f6f6664
NN
363 return 0;
364}
365
21e70425
MAL
366static int vhost_set_vring_file(struct vhost_dev *dev,
367 VhostUserRequest request,
368 struct vhost_vring_file *file)
c2bea314 369{
9a78a5dd
MAL
370 int fds[VHOST_MEMORY_MAX_NREGIONS];
371 size_t fd_num = 0;
c2bea314 372 VhostUserMsg msg = {
21e70425 373 .request = request,
c2bea314 374 .flags = VHOST_USER_VERSION,
21e70425 375 .u64 = file->index & VHOST_USER_VRING_IDX_MASK,
c2bea314
MAL
376 .size = sizeof(m.u64),
377 };
378
21e70425
MAL
379 if (ioeventfd_enabled() && file->fd > 0) {
380 fds[fd_num++] = file->fd;
381 } else {
382 msg.u64 |= VHOST_USER_VRING_NOFD_MASK;
9a78a5dd
MAL
383 }
384
385 vhost_user_write(dev, &msg, fds, fd_num);
386
21e70425
MAL
387 return 0;
388}
9a78a5dd 389
21e70425
MAL
390static int vhost_user_set_vring_kick(struct vhost_dev *dev,
391 struct vhost_vring_file *file)
392{
393 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
394}
395
396static int vhost_user_set_vring_call(struct vhost_dev *dev,
397 struct vhost_vring_file *file)
398{
399 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
400}
401
402static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
403{
404 VhostUserMsg msg = {
405 .request = request,
406 .flags = VHOST_USER_VERSION,
407 .u64 = u64,
408 .size = sizeof(m.u64),
409 };
410
411 vhost_user_write(dev, &msg, NULL, 0);
412
413 return 0;
414}
415
416static int vhost_user_set_features(struct vhost_dev *dev,
417 uint64_t features)
418{
419 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
420}
421
422static int vhost_user_set_protocol_features(struct vhost_dev *dev,
423 uint64_t features)
424{
425 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
426}
427
428static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
429{
430 VhostUserMsg msg = {
431 .request = request,
432 .flags = VHOST_USER_VERSION,
433 };
434
435 if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
436 return 0;
9a78a5dd 437 }
c2bea314 438
21e70425
MAL
439 vhost_user_write(dev, &msg, NULL, 0);
440
441 if (vhost_user_read(dev, &msg) < 0) {
442 return 0;
443 }
444
445 if (msg.request != request) {
446 error_report("Received unexpected msg type. Expected %d received %d",
447 request, msg.request);
448 return -1;
449 }
450
451 if (msg.size != sizeof(m.u64)) {
452 error_report("Received bad msg size.");
453 return -1;
454 }
455
456 *u64 = msg.u64;
457
458 return 0;
459}
460
461static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
462{
463 return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
464}
465
466static int vhost_user_set_owner(struct vhost_dev *dev)
467{
468 VhostUserMsg msg = {
469 .request = VHOST_USER_SET_OWNER,
470 .flags = VHOST_USER_VERSION,
471 };
472
473 vhost_user_write(dev, &msg, NULL, 0);
474
475 return 0;
476}
477
478static int vhost_user_reset_device(struct vhost_dev *dev)
479{
480 VhostUserMsg msg = {
481 .request = VHOST_USER_RESET_DEVICE,
482 .flags = VHOST_USER_VERSION,
483 };
484
485 vhost_user_write(dev, &msg, NULL, 0);
486
c2bea314
MAL
487 return 0;
488}
489
5f6f6664
NN
490static int vhost_user_init(struct vhost_dev *dev, void *opaque)
491{
21e70425 492 uint64_t features;
dcb10c00
MT
493 int err;
494
5f6f6664
NN
495 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
496
497 dev->opaque = opaque;
498
21e70425 499 err = vhost_user_get_features(dev, &features);
dcb10c00
MT
500 if (err < 0) {
501 return err;
502 }
503
504 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
505 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
506
21e70425
MAL
507 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
508 &features);
dcb10c00
MT
509 if (err < 0) {
510 return err;
511 }
512
513 dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK;
21e70425 514 err = vhost_user_set_protocol_features(dev, dev->protocol_features);
dcb10c00
MT
515 if (err < 0) {
516 return err;
517 }
e2051e9e
YL
518
519 /* query the max queues we support if backend supports Multiple Queue */
520 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
21e70425
MAL
521 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
522 &dev->max_queues);
e2051e9e
YL
523 if (err < 0) {
524 return err;
525 }
526 }
dcb10c00
MT
527 }
528
d2fc4402
MAL
529 if (dev->migration_blocker == NULL &&
530 !virtio_has_feature(dev->protocol_features,
531 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
532 error_setg(&dev->migration_blocker,
533 "Migration disabled: vhost-user backend lacks "
534 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
535 }
536
5f6f6664
NN
537 return 0;
538}
539
540static int vhost_user_cleanup(struct vhost_dev *dev)
541{
542 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
543
544 dev->opaque = 0;
545
546 return 0;
547}
548
fc57fd99
YL
549static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
550{
551 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
552
553 return idx;
554}
555
2ce68e4c
IM
556static int vhost_user_memslots_limit(struct vhost_dev *dev)
557{
558 return VHOST_MEMORY_MAX_NREGIONS;
559}
560
1be0ac21
MAL
561static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
562{
563 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
564
565 return virtio_has_feature(dev->protocol_features,
566 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
567}
568
5f6f6664
NN
569const VhostOps user_ops = {
570 .backend_type = VHOST_BACKEND_TYPE_USER,
5f6f6664 571 .vhost_backend_init = vhost_user_init,
fc57fd99 572 .vhost_backend_cleanup = vhost_user_cleanup,
2ce68e4c 573 .vhost_backend_memslots_limit = vhost_user_memslots_limit,
21e70425
MAL
574 .vhost_set_log_base = vhost_user_set_log_base,
575 .vhost_set_mem_table = vhost_user_set_mem_table,
576 .vhost_set_vring_addr = vhost_user_set_vring_addr,
577 .vhost_set_vring_endian = vhost_user_set_vring_endian,
578 .vhost_set_vring_num = vhost_user_set_vring_num,
579 .vhost_set_vring_base = vhost_user_set_vring_base,
580 .vhost_get_vring_base = vhost_user_get_vring_base,
581 .vhost_set_vring_kick = vhost_user_set_vring_kick,
582 .vhost_set_vring_call = vhost_user_set_vring_call,
583 .vhost_set_features = vhost_user_set_features,
584 .vhost_get_features = vhost_user_get_features,
585 .vhost_set_owner = vhost_user_set_owner,
586 .vhost_reset_device = vhost_user_reset_device,
587 .vhost_get_vq_index = vhost_user_get_vq_index,
588 .vhost_set_vring_enable = vhost_user_set_vring_enable,
1be0ac21 589 .vhost_requires_shm_log = vhost_user_requires_shm_log,
fc57fd99 590};
This page took 0.19264 seconds and 4 git commands to generate.