]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Virtio Support | |
3 | * | |
4 | * Copyright IBM, Corp. 2007 | |
5 | * | |
6 | * Authors: | |
7 | * Anthony Liguori <[email protected]> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
10 | * the COPYING file in the top-level directory. | |
11 | * | |
12 | */ | |
13 | ||
14 | #include <inttypes.h> | |
15 | ||
16 | #include "trace.h" | |
17 | #include "qemu/error-report.h" | |
18 | #include "virtio.h" | |
19 | #include "qemu/atomic.h" | |
20 | #include "virtio-bus.h" | |
21 | ||
22 | /* The alignment to use between consumer and producer parts of vring. | |
23 | * x86 pagesize again. */ | |
24 | #define VIRTIO_PCI_VRING_ALIGN 4096 | |
25 | ||
26 | typedef struct VRingDesc | |
27 | { | |
28 | uint64_t addr; | |
29 | uint32_t len; | |
30 | uint16_t flags; | |
31 | uint16_t next; | |
32 | } VRingDesc; | |
33 | ||
34 | typedef struct VRingAvail | |
35 | { | |
36 | uint16_t flags; | |
37 | uint16_t idx; | |
38 | uint16_t ring[0]; | |
39 | } VRingAvail; | |
40 | ||
41 | typedef struct VRingUsedElem | |
42 | { | |
43 | uint32_t id; | |
44 | uint32_t len; | |
45 | } VRingUsedElem; | |
46 | ||
47 | typedef struct VRingUsed | |
48 | { | |
49 | uint16_t flags; | |
50 | uint16_t idx; | |
51 | VRingUsedElem ring[0]; | |
52 | } VRingUsed; | |
53 | ||
54 | typedef struct VRing | |
55 | { | |
56 | unsigned int num; | |
57 | hwaddr desc; | |
58 | hwaddr avail; | |
59 | hwaddr used; | |
60 | } VRing; | |
61 | ||
62 | struct VirtQueue | |
63 | { | |
64 | VRing vring; | |
65 | hwaddr pa; | |
66 | uint16_t last_avail_idx; | |
67 | /* Last used index value we have signalled on */ | |
68 | uint16_t signalled_used; | |
69 | ||
70 | /* Last used index value we have signalled on */ | |
71 | bool signalled_used_valid; | |
72 | ||
73 | /* Notification enabled? */ | |
74 | bool notification; | |
75 | ||
76 | int inuse; | |
77 | ||
78 | uint16_t vector; | |
79 | void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); | |
80 | VirtIODevice *vdev; | |
81 | EventNotifier guest_notifier; | |
82 | EventNotifier host_notifier; | |
83 | }; | |
84 | ||
85 | /* virt queue functions */ | |
86 | static void virtqueue_init(VirtQueue *vq) | |
87 | { | |
88 | hwaddr pa = vq->pa; | |
89 | ||
90 | vq->vring.desc = pa; | |
91 | vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc); | |
92 | vq->vring.used = vring_align(vq->vring.avail + | |
93 | offsetof(VRingAvail, ring[vq->vring.num]), | |
94 | VIRTIO_PCI_VRING_ALIGN); | |
95 | } | |
96 | ||
97 | static inline uint64_t vring_desc_addr(hwaddr desc_pa, int i) | |
98 | { | |
99 | hwaddr pa; | |
100 | pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr); | |
101 | return ldq_phys(pa); | |
102 | } | |
103 | ||
104 | static inline uint32_t vring_desc_len(hwaddr desc_pa, int i) | |
105 | { | |
106 | hwaddr pa; | |
107 | pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len); | |
108 | return ldl_phys(pa); | |
109 | } | |
110 | ||
111 | static inline uint16_t vring_desc_flags(hwaddr desc_pa, int i) | |
112 | { | |
113 | hwaddr pa; | |
114 | pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags); | |
115 | return lduw_phys(pa); | |
116 | } | |
117 | ||
118 | static inline uint16_t vring_desc_next(hwaddr desc_pa, int i) | |
119 | { | |
120 | hwaddr pa; | |
121 | pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next); | |
122 | return lduw_phys(pa); | |
123 | } | |
124 | ||
125 | static inline uint16_t vring_avail_flags(VirtQueue *vq) | |
126 | { | |
127 | hwaddr pa; | |
128 | pa = vq->vring.avail + offsetof(VRingAvail, flags); | |
129 | return lduw_phys(pa); | |
130 | } | |
131 | ||
132 | static inline uint16_t vring_avail_idx(VirtQueue *vq) | |
133 | { | |
134 | hwaddr pa; | |
135 | pa = vq->vring.avail + offsetof(VRingAvail, idx); | |
136 | return lduw_phys(pa); | |
137 | } | |
138 | ||
139 | static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) | |
140 | { | |
141 | hwaddr pa; | |
142 | pa = vq->vring.avail + offsetof(VRingAvail, ring[i]); | |
143 | return lduw_phys(pa); | |
144 | } | |
145 | ||
146 | static inline uint16_t vring_used_event(VirtQueue *vq) | |
147 | { | |
148 | return vring_avail_ring(vq, vq->vring.num); | |
149 | } | |
150 | ||
151 | static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val) | |
152 | { | |
153 | hwaddr pa; | |
154 | pa = vq->vring.used + offsetof(VRingUsed, ring[i].id); | |
155 | stl_phys(pa, val); | |
156 | } | |
157 | ||
158 | static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val) | |
159 | { | |
160 | hwaddr pa; | |
161 | pa = vq->vring.used + offsetof(VRingUsed, ring[i].len); | |
162 | stl_phys(pa, val); | |
163 | } | |
164 | ||
165 | static uint16_t vring_used_idx(VirtQueue *vq) | |
166 | { | |
167 | hwaddr pa; | |
168 | pa = vq->vring.used + offsetof(VRingUsed, idx); | |
169 | return lduw_phys(pa); | |
170 | } | |
171 | ||
172 | static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val) | |
173 | { | |
174 | hwaddr pa; | |
175 | pa = vq->vring.used + offsetof(VRingUsed, idx); | |
176 | stw_phys(pa, val); | |
177 | } | |
178 | ||
179 | static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) | |
180 | { | |
181 | hwaddr pa; | |
182 | pa = vq->vring.used + offsetof(VRingUsed, flags); | |
183 | stw_phys(pa, lduw_phys(pa) | mask); | |
184 | } | |
185 | ||
186 | static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) | |
187 | { | |
188 | hwaddr pa; | |
189 | pa = vq->vring.used + offsetof(VRingUsed, flags); | |
190 | stw_phys(pa, lduw_phys(pa) & ~mask); | |
191 | } | |
192 | ||
193 | static inline void vring_avail_event(VirtQueue *vq, uint16_t val) | |
194 | { | |
195 | hwaddr pa; | |
196 | if (!vq->notification) { | |
197 | return; | |
198 | } | |
199 | pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]); | |
200 | stw_phys(pa, val); | |
201 | } | |
202 | ||
203 | void virtio_queue_set_notification(VirtQueue *vq, int enable) | |
204 | { | |
205 | vq->notification = enable; | |
206 | if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) { | |
207 | vring_avail_event(vq, vring_avail_idx(vq)); | |
208 | } else if (enable) { | |
209 | vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY); | |
210 | } else { | |
211 | vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY); | |
212 | } | |
213 | if (enable) { | |
214 | /* Expose avail event/used flags before caller checks the avail idx. */ | |
215 | smp_mb(); | |
216 | } | |
217 | } | |
218 | ||
219 | int virtio_queue_ready(VirtQueue *vq) | |
220 | { | |
221 | return vq->vring.avail != 0; | |
222 | } | |
223 | ||
224 | int virtio_queue_empty(VirtQueue *vq) | |
225 | { | |
226 | return vring_avail_idx(vq) == vq->last_avail_idx; | |
227 | } | |
228 | ||
229 | void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, | |
230 | unsigned int len, unsigned int idx) | |
231 | { | |
232 | unsigned int offset; | |
233 | int i; | |
234 | ||
235 | trace_virtqueue_fill(vq, elem, len, idx); | |
236 | ||
237 | offset = 0; | |
238 | for (i = 0; i < elem->in_num; i++) { | |
239 | size_t size = MIN(len - offset, elem->in_sg[i].iov_len); | |
240 | ||
241 | cpu_physical_memory_unmap(elem->in_sg[i].iov_base, | |
242 | elem->in_sg[i].iov_len, | |
243 | 1, size); | |
244 | ||
245 | offset += size; | |
246 | } | |
247 | ||
248 | for (i = 0; i < elem->out_num; i++) | |
249 | cpu_physical_memory_unmap(elem->out_sg[i].iov_base, | |
250 | elem->out_sg[i].iov_len, | |
251 | 0, elem->out_sg[i].iov_len); | |
252 | ||
253 | idx = (idx + vring_used_idx(vq)) % vq->vring.num; | |
254 | ||
255 | /* Get a pointer to the next entry in the used ring. */ | |
256 | vring_used_ring_id(vq, idx, elem->index); | |
257 | vring_used_ring_len(vq, idx, len); | |
258 | } | |
259 | ||
260 | void virtqueue_flush(VirtQueue *vq, unsigned int count) | |
261 | { | |
262 | uint16_t old, new; | |
263 | /* Make sure buffer is written before we update index. */ | |
264 | smp_wmb(); | |
265 | trace_virtqueue_flush(vq, count); | |
266 | old = vring_used_idx(vq); | |
267 | new = old + count; | |
268 | vring_used_idx_set(vq, new); | |
269 | vq->inuse -= count; | |
270 | if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) | |
271 | vq->signalled_used_valid = false; | |
272 | } | |
273 | ||
274 | void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, | |
275 | unsigned int len) | |
276 | { | |
277 | virtqueue_fill(vq, elem, len, 0); | |
278 | virtqueue_flush(vq, 1); | |
279 | } | |
280 | ||
281 | static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) | |
282 | { | |
283 | uint16_t num_heads = vring_avail_idx(vq) - idx; | |
284 | ||
285 | /* Check it isn't doing very strange things with descriptor numbers. */ | |
286 | if (num_heads > vq->vring.num) { | |
287 | error_report("Guest moved used index from %u to %u", | |
288 | idx, vring_avail_idx(vq)); | |
289 | exit(1); | |
290 | } | |
291 | /* On success, callers read a descriptor at vq->last_avail_idx. | |
292 | * Make sure descriptor read does not bypass avail index read. */ | |
293 | if (num_heads) { | |
294 | smp_rmb(); | |
295 | } | |
296 | ||
297 | return num_heads; | |
298 | } | |
299 | ||
300 | static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx) | |
301 | { | |
302 | unsigned int head; | |
303 | ||
304 | /* Grab the next descriptor number they're advertising, and increment | |
305 | * the index we've seen. */ | |
306 | head = vring_avail_ring(vq, idx % vq->vring.num); | |
307 | ||
308 | /* If their number is silly, that's a fatal mistake. */ | |
309 | if (head >= vq->vring.num) { | |
310 | error_report("Guest says index %u is available", head); | |
311 | exit(1); | |
312 | } | |
313 | ||
314 | return head; | |
315 | } | |
316 | ||
317 | static unsigned virtqueue_next_desc(hwaddr desc_pa, | |
318 | unsigned int i, unsigned int max) | |
319 | { | |
320 | unsigned int next; | |
321 | ||
322 | /* If this descriptor says it doesn't chain, we're done. */ | |
323 | if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT)) | |
324 | return max; | |
325 | ||
326 | /* Check they're not leading us off end of descriptors. */ | |
327 | next = vring_desc_next(desc_pa, i); | |
328 | /* Make sure compiler knows to grab that: we don't want it changing! */ | |
329 | smp_wmb(); | |
330 | ||
331 | if (next >= max) { | |
332 | error_report("Desc next is %u", next); | |
333 | exit(1); | |
334 | } | |
335 | ||
336 | return next; | |
337 | } | |
338 | ||
339 | void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, | |
340 | unsigned int *out_bytes, | |
341 | unsigned max_in_bytes, unsigned max_out_bytes) | |
342 | { | |
343 | unsigned int idx; | |
344 | unsigned int total_bufs, in_total, out_total; | |
345 | ||
346 | idx = vq->last_avail_idx; | |
347 | ||
348 | total_bufs = in_total = out_total = 0; | |
349 | while (virtqueue_num_heads(vq, idx)) { | |
350 | unsigned int max, num_bufs, indirect = 0; | |
351 | hwaddr desc_pa; | |
352 | int i; | |
353 | ||
354 | max = vq->vring.num; | |
355 | num_bufs = total_bufs; | |
356 | i = virtqueue_get_head(vq, idx++); | |
357 | desc_pa = vq->vring.desc; | |
358 | ||
359 | if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) { | |
360 | if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) { | |
361 | error_report("Invalid size for indirect buffer table"); | |
362 | exit(1); | |
363 | } | |
364 | ||
365 | /* If we've got too many, that implies a descriptor loop. */ | |
366 | if (num_bufs >= max) { | |
367 | error_report("Looped descriptor"); | |
368 | exit(1); | |
369 | } | |
370 | ||
371 | /* loop over the indirect descriptor table */ | |
372 | indirect = 1; | |
373 | max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc); | |
374 | num_bufs = i = 0; | |
375 | desc_pa = vring_desc_addr(desc_pa, i); | |
376 | } | |
377 | ||
378 | do { | |
379 | /* If we've got too many, that implies a descriptor loop. */ | |
380 | if (++num_bufs > max) { | |
381 | error_report("Looped descriptor"); | |
382 | exit(1); | |
383 | } | |
384 | ||
385 | if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) { | |
386 | in_total += vring_desc_len(desc_pa, i); | |
387 | } else { | |
388 | out_total += vring_desc_len(desc_pa, i); | |
389 | } | |
390 | if (in_total >= max_in_bytes && out_total >= max_out_bytes) { | |
391 | goto done; | |
392 | } | |
393 | } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max); | |
394 | ||
395 | if (!indirect) | |
396 | total_bufs = num_bufs; | |
397 | else | |
398 | total_bufs++; | |
399 | } | |
400 | done: | |
401 | if (in_bytes) { | |
402 | *in_bytes = in_total; | |
403 | } | |
404 | if (out_bytes) { | |
405 | *out_bytes = out_total; | |
406 | } | |
407 | } | |
408 | ||
409 | int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, | |
410 | unsigned int out_bytes) | |
411 | { | |
412 | unsigned int in_total, out_total; | |
413 | ||
414 | virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes); | |
415 | return in_bytes <= in_total && out_bytes <= out_total; | |
416 | } | |
417 | ||
418 | void virtqueue_map_sg(struct iovec *sg, hwaddr *addr, | |
419 | size_t num_sg, int is_write) | |
420 | { | |
421 | unsigned int i; | |
422 | hwaddr len; | |
423 | ||
424 | for (i = 0; i < num_sg; i++) { | |
425 | len = sg[i].iov_len; | |
426 | sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write); | |
427 | if (sg[i].iov_base == NULL || len != sg[i].iov_len) { | |
428 | error_report("virtio: trying to map MMIO memory"); | |
429 | exit(1); | |
430 | } | |
431 | } | |
432 | } | |
433 | ||
434 | int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) | |
435 | { | |
436 | unsigned int i, head, max; | |
437 | hwaddr desc_pa = vq->vring.desc; | |
438 | ||
439 | if (!virtqueue_num_heads(vq, vq->last_avail_idx)) | |
440 | return 0; | |
441 | ||
442 | /* When we start there are none of either input nor output. */ | |
443 | elem->out_num = elem->in_num = 0; | |
444 | ||
445 | max = vq->vring.num; | |
446 | ||
447 | i = head = virtqueue_get_head(vq, vq->last_avail_idx++); | |
448 | if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) { | |
449 | vring_avail_event(vq, vring_avail_idx(vq)); | |
450 | } | |
451 | ||
452 | if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) { | |
453 | if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) { | |
454 | error_report("Invalid size for indirect buffer table"); | |
455 | exit(1); | |
456 | } | |
457 | ||
458 | /* loop over the indirect descriptor table */ | |
459 | max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc); | |
460 | desc_pa = vring_desc_addr(desc_pa, i); | |
461 | i = 0; | |
462 | } | |
463 | ||
464 | /* Collect all the descriptors */ | |
465 | do { | |
466 | struct iovec *sg; | |
467 | ||
468 | if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) { | |
469 | if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) { | |
470 | error_report("Too many write descriptors in indirect table"); | |
471 | exit(1); | |
472 | } | |
473 | elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i); | |
474 | sg = &elem->in_sg[elem->in_num++]; | |
475 | } else { | |
476 | if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) { | |
477 | error_report("Too many read descriptors in indirect table"); | |
478 | exit(1); | |
479 | } | |
480 | elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i); | |
481 | sg = &elem->out_sg[elem->out_num++]; | |
482 | } | |
483 | ||
484 | sg->iov_len = vring_desc_len(desc_pa, i); | |
485 | ||
486 | /* If we've got too many, that implies a descriptor loop. */ | |
487 | if ((elem->in_num + elem->out_num) > max) { | |
488 | error_report("Looped descriptor"); | |
489 | exit(1); | |
490 | } | |
491 | } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max); | |
492 | ||
493 | /* Now map what we have collected */ | |
494 | virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1); | |
495 | virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0); | |
496 | ||
497 | elem->index = head; | |
498 | ||
499 | vq->inuse++; | |
500 | ||
501 | trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); | |
502 | return elem->in_num + elem->out_num; | |
503 | } | |
504 | ||
505 | /* virtio device */ | |
506 | static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector) | |
507 | { | |
508 | if (vdev->binding->notify) { | |
509 | vdev->binding->notify(vdev->binding_opaque, vector); | |
510 | } | |
511 | } | |
512 | ||
513 | void virtio_update_irq(VirtIODevice *vdev) | |
514 | { | |
515 | virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); | |
516 | } | |
517 | ||
518 | void virtio_set_status(VirtIODevice *vdev, uint8_t val) | |
519 | { | |
520 | trace_virtio_set_status(vdev, val); | |
521 | ||
522 | if (vdev->set_status) { | |
523 | vdev->set_status(vdev, val); | |
524 | } | |
525 | vdev->status = val; | |
526 | } | |
527 | ||
528 | void virtio_reset(void *opaque) | |
529 | { | |
530 | VirtIODevice *vdev = opaque; | |
531 | int i; | |
532 | ||
533 | virtio_set_status(vdev, 0); | |
534 | ||
535 | if (vdev->reset) | |
536 | vdev->reset(vdev); | |
537 | ||
538 | vdev->guest_features = 0; | |
539 | vdev->queue_sel = 0; | |
540 | vdev->status = 0; | |
541 | vdev->isr = 0; | |
542 | vdev->config_vector = VIRTIO_NO_VECTOR; | |
543 | virtio_notify_vector(vdev, vdev->config_vector); | |
544 | ||
545 | for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
546 | vdev->vq[i].vring.desc = 0; | |
547 | vdev->vq[i].vring.avail = 0; | |
548 | vdev->vq[i].vring.used = 0; | |
549 | vdev->vq[i].last_avail_idx = 0; | |
550 | vdev->vq[i].pa = 0; | |
551 | vdev->vq[i].vector = VIRTIO_NO_VECTOR; | |
552 | vdev->vq[i].signalled_used = 0; | |
553 | vdev->vq[i].signalled_used_valid = false; | |
554 | vdev->vq[i].notification = true; | |
555 | } | |
556 | } | |
557 | ||
558 | uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr) | |
559 | { | |
560 | uint8_t val; | |
561 | ||
562 | vdev->get_config(vdev, vdev->config); | |
563 | ||
564 | if (addr > (vdev->config_len - sizeof(val))) | |
565 | return (uint32_t)-1; | |
566 | ||
567 | val = ldub_p(vdev->config + addr); | |
568 | return val; | |
569 | } | |
570 | ||
571 | uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr) | |
572 | { | |
573 | uint16_t val; | |
574 | ||
575 | vdev->get_config(vdev, vdev->config); | |
576 | ||
577 | if (addr > (vdev->config_len - sizeof(val))) | |
578 | return (uint32_t)-1; | |
579 | ||
580 | val = lduw_p(vdev->config + addr); | |
581 | return val; | |
582 | } | |
583 | ||
584 | uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr) | |
585 | { | |
586 | uint32_t val; | |
587 | ||
588 | vdev->get_config(vdev, vdev->config); | |
589 | ||
590 | if (addr > (vdev->config_len - sizeof(val))) | |
591 | return (uint32_t)-1; | |
592 | ||
593 | val = ldl_p(vdev->config + addr); | |
594 | return val; | |
595 | } | |
596 | ||
597 | void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data) | |
598 | { | |
599 | uint8_t val = data; | |
600 | ||
601 | if (addr > (vdev->config_len - sizeof(val))) | |
602 | return; | |
603 | ||
604 | stb_p(vdev->config + addr, val); | |
605 | ||
606 | if (vdev->set_config) | |
607 | vdev->set_config(vdev, vdev->config); | |
608 | } | |
609 | ||
610 | void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data) | |
611 | { | |
612 | uint16_t val = data; | |
613 | ||
614 | if (addr > (vdev->config_len - sizeof(val))) | |
615 | return; | |
616 | ||
617 | stw_p(vdev->config + addr, val); | |
618 | ||
619 | if (vdev->set_config) | |
620 | vdev->set_config(vdev, vdev->config); | |
621 | } | |
622 | ||
623 | void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data) | |
624 | { | |
625 | uint32_t val = data; | |
626 | ||
627 | if (addr > (vdev->config_len - sizeof(val))) | |
628 | return; | |
629 | ||
630 | stl_p(vdev->config + addr, val); | |
631 | ||
632 | if (vdev->set_config) | |
633 | vdev->set_config(vdev, vdev->config); | |
634 | } | |
635 | ||
636 | void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr) | |
637 | { | |
638 | vdev->vq[n].pa = addr; | |
639 | virtqueue_init(&vdev->vq[n]); | |
640 | } | |
641 | ||
642 | hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n) | |
643 | { | |
644 | return vdev->vq[n].pa; | |
645 | } | |
646 | ||
647 | int virtio_queue_get_num(VirtIODevice *vdev, int n) | |
648 | { | |
649 | return vdev->vq[n].vring.num; | |
650 | } | |
651 | ||
652 | int virtio_queue_get_id(VirtQueue *vq) | |
653 | { | |
654 | VirtIODevice *vdev = vq->vdev; | |
655 | assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]); | |
656 | return vq - &vdev->vq[0]; | |
657 | } | |
658 | ||
659 | void virtio_queue_notify_vq(VirtQueue *vq) | |
660 | { | |
661 | if (vq->vring.desc) { | |
662 | VirtIODevice *vdev = vq->vdev; | |
663 | trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); | |
664 | vq->handle_output(vdev, vq); | |
665 | } | |
666 | } | |
667 | ||
668 | void virtio_queue_notify(VirtIODevice *vdev, int n) | |
669 | { | |
670 | virtio_queue_notify_vq(&vdev->vq[n]); | |
671 | } | |
672 | ||
673 | uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) | |
674 | { | |
675 | return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector : | |
676 | VIRTIO_NO_VECTOR; | |
677 | } | |
678 | ||
679 | void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector) | |
680 | { | |
681 | if (n < VIRTIO_PCI_QUEUE_MAX) | |
682 | vdev->vq[n].vector = vector; | |
683 | } | |
684 | ||
685 | VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, | |
686 | void (*handle_output)(VirtIODevice *, VirtQueue *)) | |
687 | { | |
688 | int i; | |
689 | ||
690 | for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
691 | if (vdev->vq[i].vring.num == 0) | |
692 | break; | |
693 | } | |
694 | ||
695 | if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) | |
696 | abort(); | |
697 | ||
698 | vdev->vq[i].vring.num = queue_size; | |
699 | vdev->vq[i].handle_output = handle_output; | |
700 | ||
701 | return &vdev->vq[i]; | |
702 | } | |
703 | ||
704 | void virtio_irq(VirtQueue *vq) | |
705 | { | |
706 | trace_virtio_irq(vq); | |
707 | vq->vdev->isr |= 0x01; | |
708 | virtio_notify_vector(vq->vdev, vq->vector); | |
709 | } | |
710 | ||
711 | /* Assuming a given event_idx value from the other size, if | |
712 | * we have just incremented index from old to new_idx, | |
713 | * should we trigger an event? */ | |
714 | static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old) | |
715 | { | |
716 | /* Note: Xen has similar logic for notification hold-off | |
717 | * in include/xen/interface/io/ring.h with req_event and req_prod | |
718 | * corresponding to event_idx + 1 and new respectively. | |
719 | * Note also that req_event and req_prod in Xen start at 1, | |
720 | * event indexes in virtio start at 0. */ | |
721 | return (uint16_t)(new - event - 1) < (uint16_t)(new - old); | |
722 | } | |
723 | ||
724 | static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq) | |
725 | { | |
726 | uint16_t old, new; | |
727 | bool v; | |
728 | /* We need to expose used array entries before checking used event. */ | |
729 | smp_mb(); | |
730 | /* Always notify when queue is empty (when feature acknowledge) */ | |
731 | if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) && | |
732 | !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) { | |
733 | return true; | |
734 | } | |
735 | ||
736 | if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) { | |
737 | return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT); | |
738 | } | |
739 | ||
740 | v = vq->signalled_used_valid; | |
741 | vq->signalled_used_valid = true; | |
742 | old = vq->signalled_used; | |
743 | new = vq->signalled_used = vring_used_idx(vq); | |
744 | return !v || vring_need_event(vring_used_event(vq), new, old); | |
745 | } | |
746 | ||
747 | void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) | |
748 | { | |
749 | if (!vring_notify(vdev, vq)) { | |
750 | return; | |
751 | } | |
752 | ||
753 | trace_virtio_notify(vdev, vq); | |
754 | vdev->isr |= 0x01; | |
755 | virtio_notify_vector(vdev, vq->vector); | |
756 | } | |
757 | ||
758 | void virtio_notify_config(VirtIODevice *vdev) | |
759 | { | |
760 | if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) | |
761 | return; | |
762 | ||
763 | vdev->isr |= 0x03; | |
764 | virtio_notify_vector(vdev, vdev->config_vector); | |
765 | } | |
766 | ||
767 | void virtio_save(VirtIODevice *vdev, QEMUFile *f) | |
768 | { | |
769 | int i; | |
770 | ||
771 | if (vdev->binding->save_config) | |
772 | vdev->binding->save_config(vdev->binding_opaque, f); | |
773 | ||
774 | qemu_put_8s(f, &vdev->status); | |
775 | qemu_put_8s(f, &vdev->isr); | |
776 | qemu_put_be16s(f, &vdev->queue_sel); | |
777 | qemu_put_be32s(f, &vdev->guest_features); | |
778 | qemu_put_be32(f, vdev->config_len); | |
779 | qemu_put_buffer(f, vdev->config, vdev->config_len); | |
780 | ||
781 | for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
782 | if (vdev->vq[i].vring.num == 0) | |
783 | break; | |
784 | } | |
785 | ||
786 | qemu_put_be32(f, i); | |
787 | ||
788 | for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
789 | if (vdev->vq[i].vring.num == 0) | |
790 | break; | |
791 | ||
792 | qemu_put_be32(f, vdev->vq[i].vring.num); | |
793 | qemu_put_be64(f, vdev->vq[i].pa); | |
794 | qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); | |
795 | if (vdev->binding->save_queue) | |
796 | vdev->binding->save_queue(vdev->binding_opaque, i, f); | |
797 | } | |
798 | } | |
799 | ||
800 | int virtio_set_features(VirtIODevice *vdev, uint32_t val) | |
801 | { | |
802 | uint32_t supported_features = | |
803 | vdev->binding->get_features(vdev->binding_opaque); | |
804 | bool bad = (val & ~supported_features) != 0; | |
805 | ||
806 | val &= supported_features; | |
807 | if (vdev->set_features) { | |
808 | vdev->set_features(vdev, val); | |
809 | } | |
810 | vdev->guest_features = val; | |
811 | return bad ? -1 : 0; | |
812 | } | |
813 | ||
814 | int virtio_load(VirtIODevice *vdev, QEMUFile *f) | |
815 | { | |
816 | int num, i, ret; | |
817 | uint32_t features; | |
818 | uint32_t supported_features; | |
819 | ||
820 | if (vdev->binding->load_config) { | |
821 | ret = vdev->binding->load_config(vdev->binding_opaque, f); | |
822 | if (ret) | |
823 | return ret; | |
824 | } | |
825 | ||
826 | qemu_get_8s(f, &vdev->status); | |
827 | qemu_get_8s(f, &vdev->isr); | |
828 | qemu_get_be16s(f, &vdev->queue_sel); | |
829 | qemu_get_be32s(f, &features); | |
830 | ||
831 | if (virtio_set_features(vdev, features) < 0) { | |
832 | supported_features = vdev->binding->get_features(vdev->binding_opaque); | |
833 | error_report("Features 0x%x unsupported. Allowed features: 0x%x", | |
834 | features, supported_features); | |
835 | return -1; | |
836 | } | |
837 | vdev->config_len = qemu_get_be32(f); | |
838 | qemu_get_buffer(f, vdev->config, vdev->config_len); | |
839 | ||
840 | num = qemu_get_be32(f); | |
841 | ||
842 | for (i = 0; i < num; i++) { | |
843 | vdev->vq[i].vring.num = qemu_get_be32(f); | |
844 | vdev->vq[i].pa = qemu_get_be64(f); | |
845 | qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); | |
846 | vdev->vq[i].signalled_used_valid = false; | |
847 | vdev->vq[i].notification = true; | |
848 | ||
849 | if (vdev->vq[i].pa) { | |
850 | uint16_t nheads; | |
851 | virtqueue_init(&vdev->vq[i]); | |
852 | nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx; | |
853 | /* Check it isn't doing very strange things with descriptor numbers. */ | |
854 | if (nheads > vdev->vq[i].vring.num) { | |
855 | error_report("VQ %d size 0x%x Guest index 0x%x " | |
856 | "inconsistent with Host index 0x%x: delta 0x%x", | |
857 | i, vdev->vq[i].vring.num, | |
858 | vring_avail_idx(&vdev->vq[i]), | |
859 | vdev->vq[i].last_avail_idx, nheads); | |
860 | return -1; | |
861 | } | |
862 | } else if (vdev->vq[i].last_avail_idx) { | |
863 | error_report("VQ %d address 0x0 " | |
864 | "inconsistent with Host index 0x%x", | |
865 | i, vdev->vq[i].last_avail_idx); | |
866 | return -1; | |
867 | } | |
868 | if (vdev->binding->load_queue) { | |
869 | ret = vdev->binding->load_queue(vdev->binding_opaque, i, f); | |
870 | if (ret) | |
871 | return ret; | |
872 | } | |
873 | } | |
874 | ||
875 | virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); | |
876 | return 0; | |
877 | } | |
878 | ||
879 | void virtio_common_cleanup(VirtIODevice *vdev) | |
880 | { | |
881 | qemu_del_vm_change_state_handler(vdev->vmstate); | |
882 | g_free(vdev->config); | |
883 | g_free(vdev->vq); | |
884 | } | |
885 | ||
886 | void virtio_cleanup(VirtIODevice *vdev) | |
887 | { | |
888 | virtio_common_cleanup(vdev); | |
889 | g_free(vdev); | |
890 | } | |
891 | ||
892 | static void virtio_vmstate_change(void *opaque, int running, RunState state) | |
893 | { | |
894 | VirtIODevice *vdev = opaque; | |
895 | bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); | |
896 | vdev->vm_running = running; | |
897 | ||
898 | if (backend_run) { | |
899 | virtio_set_status(vdev, vdev->status); | |
900 | } | |
901 | ||
902 | if (vdev->binding->vmstate_change) { | |
903 | vdev->binding->vmstate_change(vdev->binding_opaque, backend_run); | |
904 | } | |
905 | ||
906 | if (!backend_run) { | |
907 | virtio_set_status(vdev, vdev->status); | |
908 | } | |
909 | } | |
910 | ||
911 | void virtio_init(VirtIODevice *vdev, const char *name, | |
912 | uint16_t device_id, size_t config_size) | |
913 | { | |
914 | int i; | |
915 | vdev->device_id = device_id; | |
916 | vdev->status = 0; | |
917 | vdev->isr = 0; | |
918 | vdev->queue_sel = 0; | |
919 | vdev->config_vector = VIRTIO_NO_VECTOR; | |
920 | vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX); | |
921 | vdev->vm_running = runstate_is_running(); | |
922 | for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
923 | vdev->vq[i].vector = VIRTIO_NO_VECTOR; | |
924 | vdev->vq[i].vdev = vdev; | |
925 | } | |
926 | ||
927 | vdev->name = name; | |
928 | vdev->config_len = config_size; | |
929 | if (vdev->config_len) { | |
930 | vdev->config = g_malloc0(config_size); | |
931 | } else { | |
932 | vdev->config = NULL; | |
933 | } | |
934 | vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, | |
935 | vdev); | |
936 | } | |
937 | ||
938 | VirtIODevice *virtio_common_init(const char *name, uint16_t device_id, | |
939 | size_t config_size, size_t struct_size) | |
940 | { | |
941 | VirtIODevice *vdev; | |
942 | vdev = g_malloc0(struct_size); | |
943 | virtio_init(vdev, name, device_id, config_size); | |
944 | return vdev; | |
945 | } | |
946 | ||
947 | void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding, | |
948 | DeviceState *opaque) | |
949 | { | |
950 | vdev->binding = binding; | |
951 | vdev->binding_opaque = opaque; | |
952 | } | |
953 | ||
954 | hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) | |
955 | { | |
956 | return vdev->vq[n].vring.desc; | |
957 | } | |
958 | ||
959 | hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n) | |
960 | { | |
961 | return vdev->vq[n].vring.avail; | |
962 | } | |
963 | ||
964 | hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n) | |
965 | { | |
966 | return vdev->vq[n].vring.used; | |
967 | } | |
968 | ||
969 | hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n) | |
970 | { | |
971 | return vdev->vq[n].vring.desc; | |
972 | } | |
973 | ||
974 | hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n) | |
975 | { | |
976 | return sizeof(VRingDesc) * vdev->vq[n].vring.num; | |
977 | } | |
978 | ||
979 | hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) | |
980 | { | |
981 | return offsetof(VRingAvail, ring) + | |
982 | sizeof(uint64_t) * vdev->vq[n].vring.num; | |
983 | } | |
984 | ||
985 | hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) | |
986 | { | |
987 | return offsetof(VRingUsed, ring) + | |
988 | sizeof(VRingUsedElem) * vdev->vq[n].vring.num; | |
989 | } | |
990 | ||
991 | hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n) | |
992 | { | |
993 | return vdev->vq[n].vring.used - vdev->vq[n].vring.desc + | |
994 | virtio_queue_get_used_size(vdev, n); | |
995 | } | |
996 | ||
997 | uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) | |
998 | { | |
999 | return vdev->vq[n].last_avail_idx; | |
1000 | } | |
1001 | ||
1002 | void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx) | |
1003 | { | |
1004 | vdev->vq[n].last_avail_idx = idx; | |
1005 | } | |
1006 | ||
1007 | VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n) | |
1008 | { | |
1009 | return vdev->vq + n; | |
1010 | } | |
1011 | ||
1012 | static void virtio_queue_guest_notifier_read(EventNotifier *n) | |
1013 | { | |
1014 | VirtQueue *vq = container_of(n, VirtQueue, guest_notifier); | |
1015 | if (event_notifier_test_and_clear(n)) { | |
1016 | virtio_irq(vq); | |
1017 | } | |
1018 | } | |
1019 | ||
1020 | void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, | |
1021 | bool with_irqfd) | |
1022 | { | |
1023 | if (assign && !with_irqfd) { | |
1024 | event_notifier_set_handler(&vq->guest_notifier, | |
1025 | virtio_queue_guest_notifier_read); | |
1026 | } else { | |
1027 | event_notifier_set_handler(&vq->guest_notifier, NULL); | |
1028 | } | |
1029 | if (!assign) { | |
1030 | /* Test and clear notifier before closing it, | |
1031 | * in case poll callback didn't have time to run. */ | |
1032 | virtio_queue_guest_notifier_read(&vq->guest_notifier); | |
1033 | } | |
1034 | } | |
1035 | ||
1036 | EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) | |
1037 | { | |
1038 | return &vq->guest_notifier; | |
1039 | } | |
1040 | ||
1041 | static void virtio_queue_host_notifier_read(EventNotifier *n) | |
1042 | { | |
1043 | VirtQueue *vq = container_of(n, VirtQueue, host_notifier); | |
1044 | if (event_notifier_test_and_clear(n)) { | |
1045 | virtio_queue_notify_vq(vq); | |
1046 | } | |
1047 | } | |
1048 | ||
1049 | void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, | |
1050 | bool set_handler) | |
1051 | { | |
1052 | if (assign && set_handler) { | |
1053 | event_notifier_set_handler(&vq->host_notifier, | |
1054 | virtio_queue_host_notifier_read); | |
1055 | } else { | |
1056 | event_notifier_set_handler(&vq->host_notifier, NULL); | |
1057 | } | |
1058 | if (!assign) { | |
1059 | /* Test and clear notifier before after disabling event, | |
1060 | * in case poll callback didn't have time to run. */ | |
1061 | virtio_queue_host_notifier_read(&vq->host_notifier); | |
1062 | } | |
1063 | } | |
1064 | ||
1065 | EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) | |
1066 | { | |
1067 | return &vq->host_notifier; | |
1068 | } | |
1069 | ||
1070 | static int virtio_device_init(DeviceState *qdev) | |
1071 | { | |
1072 | VirtIODevice *vdev = VIRTIO_DEVICE(qdev); | |
1073 | VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(qdev); | |
1074 | assert(k->init != NULL); | |
1075 | if (k->init(vdev) < 0) { | |
1076 | return -1; | |
1077 | } | |
1078 | virtio_bus_plug_device(vdev); | |
1079 | return 0; | |
1080 | } | |
1081 | ||
1082 | static void virtio_device_class_init(ObjectClass *klass, void *data) | |
1083 | { | |
1084 | /* Set the default value here. */ | |
1085 | DeviceClass *dc = DEVICE_CLASS(klass); | |
1086 | dc->init = virtio_device_init; | |
1087 | dc->bus_type = TYPE_VIRTIO_BUS; | |
1088 | } | |
1089 | ||
1090 | static const TypeInfo virtio_device_info = { | |
1091 | .name = TYPE_VIRTIO_DEVICE, | |
1092 | .parent = TYPE_DEVICE, | |
1093 | .instance_size = sizeof(VirtIODevice), | |
1094 | .class_init = virtio_device_class_init, | |
1095 | .abstract = true, | |
1096 | .class_size = sizeof(VirtioDeviceClass), | |
1097 | }; | |
1098 | ||
1099 | static void virtio_register_types(void) | |
1100 | { | |
1101 | type_register_static(&virtio_device_info); | |
1102 | } | |
1103 | ||
1104 | type_init(virtio_register_types) |