]>
Commit | Line | Data |
---|---|---|
967f97fa AL |
1 | /* |
2 | * Virtio Support | |
3 | * | |
4 | * Copyright IBM, Corp. 2007 | |
5 | * | |
6 | * Authors: | |
7 | * Anthony Liguori <[email protected]> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
10 | * the COPYING file in the top-level directory. | |
11 | * | |
12 | */ | |
13 | ||
14 | #include <inttypes.h> | |
967f97fa AL |
15 | |
16 | #include "virtio.h" | |
17 | #include "sysemu.h" | |
18 | ||
19 | //#define VIRTIO_ZERO_COPY | |
20 | ||
21 | /* from Linux's linux/virtio_pci.h */ | |
22 | ||
23 | /* A 32-bit r/o bitmask of the features supported by the host */ | |
24 | #define VIRTIO_PCI_HOST_FEATURES 0 | |
25 | ||
26 | /* A 32-bit r/w bitmask of features activated by the guest */ | |
27 | #define VIRTIO_PCI_GUEST_FEATURES 4 | |
28 | ||
29 | /* A 32-bit r/w PFN for the currently selected queue */ | |
30 | #define VIRTIO_PCI_QUEUE_PFN 8 | |
31 | ||
32 | /* A 16-bit r/o queue size for the currently selected queue */ | |
33 | #define VIRTIO_PCI_QUEUE_NUM 12 | |
34 | ||
35 | /* A 16-bit r/w queue selector */ | |
36 | #define VIRTIO_PCI_QUEUE_SEL 14 | |
37 | ||
38 | /* A 16-bit r/w queue notifier */ | |
39 | #define VIRTIO_PCI_QUEUE_NOTIFY 16 | |
40 | ||
41 | /* An 8-bit device status register. */ | |
42 | #define VIRTIO_PCI_STATUS 18 | |
43 | ||
44 | /* An 8-bit r/o interrupt status register. Reading the value will return the | |
45 | * current contents of the ISR and will also clear it. This is effectively | |
46 | * a read-and-acknowledge. */ | |
47 | #define VIRTIO_PCI_ISR 19 | |
48 | ||
49 | #define VIRTIO_PCI_CONFIG 20 | |
50 | ||
51 | /* Virtio ABI version, if we increment this, we break the guest driver. */ | |
52 | #define VIRTIO_PCI_ABI_VERSION 0 | |
53 | ||
f46f15bc AL |
54 | /* How many bits to shift physical queue address written to QUEUE_PFN. |
55 | * 12 is historical, and due to x86 page size. */ | |
56 | #define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 | |
57 | ||
58 | /* The alignment to use between consumer and producer parts of vring. | |
59 | * x86 pagesize again. */ | |
60 | #define VIRTIO_PCI_VRING_ALIGN 4096 | |
61 | ||
967f97fa AL |
62 | /* QEMU doesn't strictly need write barriers since everything runs in |
63 | * lock-step. We'll leave the calls to wmb() in though to make it obvious for | |
64 | * KVM or if kqemu gets SMP support. | |
65 | */ | |
66 | #define wmb() do { } while (0) | |
67 | ||
68 | typedef struct VRingDesc | |
69 | { | |
70 | uint64_t addr; | |
71 | uint32_t len; | |
72 | uint16_t flags; | |
73 | uint16_t next; | |
74 | } VRingDesc; | |
75 | ||
76 | typedef struct VRingAvail | |
77 | { | |
78 | uint16_t flags; | |
79 | uint16_t idx; | |
80 | uint16_t ring[0]; | |
81 | } VRingAvail; | |
82 | ||
83 | typedef struct VRingUsedElem | |
84 | { | |
85 | uint32_t id; | |
86 | uint32_t len; | |
87 | } VRingUsedElem; | |
88 | ||
89 | typedef struct VRingUsed | |
90 | { | |
91 | uint16_t flags; | |
92 | uint16_t idx; | |
93 | VRingUsedElem ring[0]; | |
94 | } VRingUsed; | |
95 | ||
96 | typedef struct VRing | |
97 | { | |
98 | unsigned int num; | |
99 | target_phys_addr_t desc; | |
100 | target_phys_addr_t avail; | |
101 | target_phys_addr_t used; | |
102 | } VRing; | |
103 | ||
104 | struct VirtQueue | |
105 | { | |
106 | VRing vring; | |
107 | uint32_t pfn; | |
108 | uint16_t last_avail_idx; | |
109 | int inuse; | |
110 | void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); | |
111 | }; | |
112 | ||
113 | #define VIRTIO_PCI_QUEUE_MAX 16 | |
114 | ||
115 | /* virt queue functions */ | |
116 | #ifdef VIRTIO_ZERO_COPY | |
117 | static void *virtio_map_gpa(target_phys_addr_t addr, size_t size) | |
118 | { | |
119 | ram_addr_t off; | |
120 | target_phys_addr_t addr1; | |
121 | ||
122 | off = cpu_get_physical_page_desc(addr); | |
123 | if ((off & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { | |
124 | fprintf(stderr, "virtio DMA to IO ram\n"); | |
125 | exit(1); | |
126 | } | |
127 | ||
128 | off = (off & TARGET_PAGE_MASK) | (addr & ~TARGET_PAGE_MASK); | |
129 | ||
130 | for (addr1 = addr + TARGET_PAGE_SIZE; | |
131 | addr1 < TARGET_PAGE_ALIGN(addr + size); | |
132 | addr1 += TARGET_PAGE_SIZE) { | |
133 | ram_addr_t off1; | |
134 | ||
135 | off1 = cpu_get_physical_page_desc(addr1); | |
136 | if ((off1 & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { | |
137 | fprintf(stderr, "virtio DMA to IO ram\n"); | |
138 | exit(1); | |
139 | } | |
140 | ||
141 | off1 = (off1 & TARGET_PAGE_MASK) | (addr1 & ~TARGET_PAGE_MASK); | |
142 | ||
143 | if (off1 != (off + (addr1 - addr))) { | |
144 | fprintf(stderr, "discontigous virtio memory\n"); | |
145 | exit(1); | |
146 | } | |
147 | } | |
148 | ||
149 | return phys_ram_base + off; | |
150 | } | |
151 | #endif | |
152 | ||
153 | static void virtqueue_init(VirtQueue *vq, target_phys_addr_t pa) | |
154 | { | |
155 | vq->vring.desc = pa; | |
156 | vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc); | |
f46f15bc AL |
157 | vq->vring.used = vring_align(vq->vring.avail + |
158 | offsetof(VRingAvail, ring[vq->vring.num]), | |
159 | VIRTIO_PCI_VRING_ALIGN); | |
967f97fa AL |
160 | } |
161 | ||
162 | static inline uint64_t vring_desc_addr(VirtQueue *vq, int i) | |
163 | { | |
164 | target_phys_addr_t pa; | |
165 | pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr); | |
166 | return ldq_phys(pa); | |
167 | } | |
168 | ||
169 | static inline uint32_t vring_desc_len(VirtQueue *vq, int i) | |
170 | { | |
171 | target_phys_addr_t pa; | |
172 | pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, len); | |
173 | return ldl_phys(pa); | |
174 | } | |
175 | ||
176 | static inline uint16_t vring_desc_flags(VirtQueue *vq, int i) | |
177 | { | |
178 | target_phys_addr_t pa; | |
179 | pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags); | |
180 | return lduw_phys(pa); | |
181 | } | |
182 | ||
183 | static inline uint16_t vring_desc_next(VirtQueue *vq, int i) | |
184 | { | |
185 | target_phys_addr_t pa; | |
186 | pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, next); | |
187 | return lduw_phys(pa); | |
188 | } | |
189 | ||
190 | static inline uint16_t vring_avail_flags(VirtQueue *vq) | |
191 | { | |
192 | target_phys_addr_t pa; | |
193 | pa = vq->vring.avail + offsetof(VRingAvail, flags); | |
194 | return lduw_phys(pa); | |
195 | } | |
196 | ||
197 | static inline uint16_t vring_avail_idx(VirtQueue *vq) | |
198 | { | |
199 | target_phys_addr_t pa; | |
200 | pa = vq->vring.avail + offsetof(VRingAvail, idx); | |
201 | return lduw_phys(pa); | |
202 | } | |
203 | ||
204 | static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) | |
205 | { | |
206 | target_phys_addr_t pa; | |
207 | pa = vq->vring.avail + offsetof(VRingAvail, ring[i]); | |
208 | return lduw_phys(pa); | |
209 | } | |
210 | ||
211 | static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val) | |
212 | { | |
213 | target_phys_addr_t pa; | |
214 | pa = vq->vring.used + offsetof(VRingUsed, ring[i].id); | |
215 | stl_phys(pa, val); | |
216 | } | |
217 | ||
218 | static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val) | |
219 | { | |
220 | target_phys_addr_t pa; | |
221 | pa = vq->vring.used + offsetof(VRingUsed, ring[i].len); | |
222 | stl_phys(pa, val); | |
223 | } | |
224 | ||
225 | static uint16_t vring_used_idx(VirtQueue *vq) | |
226 | { | |
227 | target_phys_addr_t pa; | |
228 | pa = vq->vring.used + offsetof(VRingUsed, idx); | |
229 | return lduw_phys(pa); | |
230 | } | |
231 | ||
232 | static inline void vring_used_idx_increment(VirtQueue *vq, uint16_t val) | |
233 | { | |
234 | target_phys_addr_t pa; | |
235 | pa = vq->vring.used + offsetof(VRingUsed, idx); | |
236 | stw_phys(pa, vring_used_idx(vq) + val); | |
237 | } | |
238 | ||
239 | static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) | |
240 | { | |
241 | target_phys_addr_t pa; | |
242 | pa = vq->vring.used + offsetof(VRingUsed, flags); | |
243 | stw_phys(pa, lduw_phys(pa) | mask); | |
244 | } | |
245 | ||
246 | static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) | |
247 | { | |
248 | target_phys_addr_t pa; | |
249 | pa = vq->vring.used + offsetof(VRingUsed, flags); | |
250 | stw_phys(pa, lduw_phys(pa) & ~mask); | |
251 | } | |
252 | ||
253 | void virtio_queue_set_notification(VirtQueue *vq, int enable) | |
254 | { | |
255 | if (enable) | |
256 | vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY); | |
257 | else | |
258 | vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY); | |
259 | } | |
260 | ||
261 | int virtio_queue_ready(VirtQueue *vq) | |
262 | { | |
263 | return vq->vring.avail != 0; | |
264 | } | |
265 | ||
266 | int virtio_queue_empty(VirtQueue *vq) | |
267 | { | |
268 | return vring_avail_idx(vq) == vq->last_avail_idx; | |
269 | } | |
270 | ||
271 | void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, | |
272 | unsigned int len, unsigned int idx) | |
273 | { | |
274 | unsigned int offset; | |
275 | int i; | |
276 | ||
277 | #ifndef VIRTIO_ZERO_COPY | |
278 | for (i = 0; i < elem->out_num; i++) | |
279 | qemu_free(elem->out_sg[i].iov_base); | |
280 | #endif | |
281 | ||
282 | offset = 0; | |
283 | for (i = 0; i < elem->in_num; i++) { | |
284 | size_t size = MIN(len - offset, elem->in_sg[i].iov_len); | |
285 | ||
286 | #ifdef VIRTIO_ZERO_COPY | |
287 | if (size) { | |
288 | ram_addr_t addr = (uint8_t *)elem->in_sg[i].iov_base - phys_ram_base; | |
289 | ram_addr_t off; | |
290 | ||
291 | for (off = 0; off < size; off += TARGET_PAGE_SIZE) | |
292 | cpu_physical_memory_set_dirty(addr + off); | |
293 | } | |
294 | #else | |
295 | if (size) | |
296 | cpu_physical_memory_write(elem->in_addr[i], | |
297 | elem->in_sg[i].iov_base, | |
298 | size); | |
299 | ||
300 | qemu_free(elem->in_sg[i].iov_base); | |
301 | #endif | |
302 | ||
303 | offset += size; | |
304 | } | |
305 | ||
306 | idx = (idx + vring_used_idx(vq)) % vq->vring.num; | |
307 | ||
308 | /* Get a pointer to the next entry in the used ring. */ | |
309 | vring_used_ring_id(vq, idx, elem->index); | |
310 | vring_used_ring_len(vq, idx, len); | |
311 | } | |
312 | ||
313 | void virtqueue_flush(VirtQueue *vq, unsigned int count) | |
314 | { | |
315 | /* Make sure buffer is written before we update index. */ | |
316 | wmb(); | |
317 | vring_used_idx_increment(vq, count); | |
318 | vq->inuse -= count; | |
319 | } | |
320 | ||
321 | void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, | |
322 | unsigned int len) | |
323 | { | |
324 | virtqueue_fill(vq, elem, len, 0); | |
325 | virtqueue_flush(vq, 1); | |
326 | } | |
327 | ||
328 | static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) | |
329 | { | |
330 | uint16_t num_heads = vring_avail_idx(vq) - idx; | |
331 | ||
332 | /* Check it isn't doing very strange things with descriptor numbers. */ | |
bb6834cf AL |
333 | if (num_heads > vq->vring.num) { |
334 | fprintf(stderr, "Guest moved used index from %u to %u", | |
335 | idx, vring_avail_idx(vq)); | |
336 | exit(1); | |
337 | } | |
967f97fa AL |
338 | |
339 | return num_heads; | |
340 | } | |
341 | ||
342 | static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx) | |
343 | { | |
344 | unsigned int head; | |
345 | ||
346 | /* Grab the next descriptor number they're advertising, and increment | |
347 | * the index we've seen. */ | |
348 | head = vring_avail_ring(vq, idx % vq->vring.num); | |
349 | ||
350 | /* If their number is silly, that's a fatal mistake. */ | |
bb6834cf AL |
351 | if (head >= vq->vring.num) { |
352 | fprintf(stderr, "Guest says index %u is available", head); | |
353 | exit(1); | |
354 | } | |
967f97fa AL |
355 | |
356 | return head; | |
357 | } | |
358 | ||
359 | static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i) | |
360 | { | |
361 | unsigned int next; | |
362 | ||
363 | /* If this descriptor says it doesn't chain, we're done. */ | |
364 | if (!(vring_desc_flags(vq, i) & VRING_DESC_F_NEXT)) | |
365 | return vq->vring.num; | |
366 | ||
367 | /* Check they're not leading us off end of descriptors. */ | |
368 | next = vring_desc_next(vq, i); | |
369 | /* Make sure compiler knows to grab that: we don't want it changing! */ | |
370 | wmb(); | |
371 | ||
bb6834cf AL |
372 | if (next >= vq->vring.num) { |
373 | fprintf(stderr, "Desc next is %u", next); | |
374 | exit(1); | |
375 | } | |
967f97fa AL |
376 | |
377 | return next; | |
378 | } | |
379 | ||
380 | int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) | |
381 | { | |
382 | unsigned int idx; | |
383 | int num_bufs, in_total, out_total; | |
384 | ||
385 | idx = vq->last_avail_idx; | |
386 | ||
387 | num_bufs = in_total = out_total = 0; | |
388 | while (virtqueue_num_heads(vq, idx)) { | |
389 | int i; | |
390 | ||
391 | i = virtqueue_get_head(vq, idx++); | |
392 | do { | |
393 | /* If we've got too many, that implies a descriptor loop. */ | |
bb6834cf AL |
394 | if (++num_bufs > vq->vring.num) { |
395 | fprintf(stderr, "Looped descriptor"); | |
396 | exit(1); | |
397 | } | |
967f97fa AL |
398 | |
399 | if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) { | |
400 | if (in_bytes > 0 && | |
401 | (in_total += vring_desc_len(vq, i)) >= in_bytes) | |
402 | return 1; | |
403 | } else { | |
404 | if (out_bytes > 0 && | |
405 | (out_total += vring_desc_len(vq, i)) >= out_bytes) | |
406 | return 1; | |
407 | } | |
408 | } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num); | |
409 | } | |
410 | ||
411 | return 0; | |
412 | } | |
413 | ||
414 | int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) | |
415 | { | |
416 | unsigned int i, head; | |
417 | ||
418 | if (!virtqueue_num_heads(vq, vq->last_avail_idx)) | |
419 | return 0; | |
420 | ||
421 | /* When we start there are none of either input nor output. */ | |
422 | elem->out_num = elem->in_num = 0; | |
423 | ||
424 | i = head = virtqueue_get_head(vq, vq->last_avail_idx++); | |
425 | do { | |
426 | struct iovec *sg; | |
427 | ||
428 | if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) { | |
429 | elem->in_addr[elem->in_num] = vring_desc_addr(vq, i); | |
430 | sg = &elem->in_sg[elem->in_num++]; | |
431 | } else | |
432 | sg = &elem->out_sg[elem->out_num++]; | |
433 | ||
434 | /* Grab the first descriptor, and check it's OK. */ | |
435 | sg->iov_len = vring_desc_len(vq, i); | |
436 | ||
437 | #ifdef VIRTIO_ZERO_COPY | |
438 | sg->iov_base = virtio_map_gpa(vring_desc_addr(vq, i), sg->iov_len); | |
439 | #else | |
440 | /* cap individual scatter element size to prevent unbounded allocations | |
441 | of memory from the guest. Practically speaking, no virtio driver | |
442 | will ever pass more than a page in each element. We set the cap to | |
443 | be 2MB in case for some reason a large page makes it way into the | |
444 | sg list. When we implement a zero copy API, this limitation will | |
445 | disappear */ | |
446 | if (sg->iov_len > (2 << 20)) | |
447 | sg->iov_len = 2 << 20; | |
448 | ||
449 | sg->iov_base = qemu_malloc(sg->iov_len); | |
450 | if (sg->iov_base && | |
451 | !(vring_desc_flags(vq, i) & VRING_DESC_F_WRITE)) { | |
452 | cpu_physical_memory_read(vring_desc_addr(vq, i), | |
453 | sg->iov_base, | |
454 | sg->iov_len); | |
455 | } | |
456 | #endif | |
bb6834cf AL |
457 | if (sg->iov_base == NULL) { |
458 | fprintf(stderr, "Invalid mapping\n"); | |
459 | exit(1); | |
460 | } | |
967f97fa AL |
461 | |
462 | /* If we've got too many, that implies a descriptor loop. */ | |
bb6834cf AL |
463 | if ((elem->in_num + elem->out_num) > vq->vring.num) { |
464 | fprintf(stderr, "Looped descriptor"); | |
465 | exit(1); | |
466 | } | |
967f97fa AL |
467 | } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num); |
468 | ||
469 | elem->index = head; | |
470 | ||
471 | vq->inuse++; | |
472 | ||
473 | return elem->in_num + elem->out_num; | |
474 | } | |
475 | ||
476 | /* virtio device */ | |
477 | ||
478 | static VirtIODevice *to_virtio_device(PCIDevice *pci_dev) | |
479 | { | |
480 | return (VirtIODevice *)pci_dev; | |
481 | } | |
482 | ||
483 | static void virtio_update_irq(VirtIODevice *vdev) | |
484 | { | |
485 | qemu_set_irq(vdev->pci_dev.irq[0], vdev->isr & 1); | |
486 | } | |
487 | ||
69d6451c | 488 | static void virtio_reset(void *opaque) |
967f97fa AL |
489 | { |
490 | VirtIODevice *vdev = opaque; | |
491 | int i; | |
492 | ||
493 | if (vdev->reset) | |
494 | vdev->reset(vdev); | |
495 | ||
496 | vdev->features = 0; | |
497 | vdev->queue_sel = 0; | |
498 | vdev->status = 0; | |
499 | vdev->isr = 0; | |
500 | virtio_update_irq(vdev); | |
501 | ||
502 | for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
503 | vdev->vq[i].vring.desc = 0; | |
504 | vdev->vq[i].vring.avail = 0; | |
505 | vdev->vq[i].vring.used = 0; | |
506 | vdev->vq[i].last_avail_idx = 0; | |
507 | vdev->vq[i].pfn = 0; | |
508 | } | |
509 | } | |
510 | ||
511 | static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) | |
512 | { | |
513 | VirtIODevice *vdev = to_virtio_device(opaque); | |
514 | ram_addr_t pa; | |
515 | ||
516 | addr -= vdev->addr; | |
517 | ||
518 | switch (addr) { | |
519 | case VIRTIO_PCI_GUEST_FEATURES: | |
520 | if (vdev->set_features) | |
521 | vdev->set_features(vdev, val); | |
522 | vdev->features = val; | |
523 | break; | |
524 | case VIRTIO_PCI_QUEUE_PFN: | |
f46f15bc | 525 | pa = (ram_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT; |
967f97fa AL |
526 | vdev->vq[vdev->queue_sel].pfn = val; |
527 | if (pa == 0) { | |
528 | virtio_reset(vdev); | |
529 | } else { | |
530 | virtqueue_init(&vdev->vq[vdev->queue_sel], pa); | |
531 | } | |
532 | break; | |
533 | case VIRTIO_PCI_QUEUE_SEL: | |
534 | if (val < VIRTIO_PCI_QUEUE_MAX) | |
535 | vdev->queue_sel = val; | |
536 | break; | |
537 | case VIRTIO_PCI_QUEUE_NOTIFY: | |
538 | if (val < VIRTIO_PCI_QUEUE_MAX && vdev->vq[val].vring.desc) | |
539 | vdev->vq[val].handle_output(vdev, &vdev->vq[val]); | |
540 | break; | |
541 | case VIRTIO_PCI_STATUS: | |
542 | vdev->status = val & 0xFF; | |
543 | if (vdev->status == 0) | |
544 | virtio_reset(vdev); | |
545 | break; | |
546 | } | |
547 | } | |
548 | ||
549 | static uint32_t virtio_ioport_read(void *opaque, uint32_t addr) | |
550 | { | |
551 | VirtIODevice *vdev = to_virtio_device(opaque); | |
552 | uint32_t ret = 0xFFFFFFFF; | |
553 | ||
554 | addr -= vdev->addr; | |
555 | ||
556 | switch (addr) { | |
557 | case VIRTIO_PCI_HOST_FEATURES: | |
558 | ret = vdev->get_features(vdev); | |
559 | ret |= (1 << VIRTIO_F_NOTIFY_ON_EMPTY); | |
560 | break; | |
561 | case VIRTIO_PCI_GUEST_FEATURES: | |
562 | ret = vdev->features; | |
563 | break; | |
564 | case VIRTIO_PCI_QUEUE_PFN: | |
565 | ret = vdev->vq[vdev->queue_sel].pfn; | |
566 | break; | |
567 | case VIRTIO_PCI_QUEUE_NUM: | |
568 | ret = vdev->vq[vdev->queue_sel].vring.num; | |
569 | break; | |
570 | case VIRTIO_PCI_QUEUE_SEL: | |
571 | ret = vdev->queue_sel; | |
572 | break; | |
573 | case VIRTIO_PCI_STATUS: | |
574 | ret = vdev->status; | |
575 | break; | |
576 | case VIRTIO_PCI_ISR: | |
577 | /* reading from the ISR also clears it. */ | |
578 | ret = vdev->isr; | |
579 | vdev->isr = 0; | |
580 | virtio_update_irq(vdev); | |
581 | break; | |
582 | default: | |
583 | break; | |
584 | } | |
585 | ||
586 | return ret; | |
587 | } | |
588 | ||
589 | static uint32_t virtio_config_readb(void *opaque, uint32_t addr) | |
590 | { | |
591 | VirtIODevice *vdev = opaque; | |
592 | uint8_t val; | |
593 | ||
594 | vdev->get_config(vdev, vdev->config); | |
595 | ||
596 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
597 | if (addr > (vdev->config_len - sizeof(val))) | |
598 | return (uint32_t)-1; | |
599 | ||
600 | memcpy(&val, vdev->config + addr, sizeof(val)); | |
601 | return val; | |
602 | } | |
603 | ||
604 | static uint32_t virtio_config_readw(void *opaque, uint32_t addr) | |
605 | { | |
606 | VirtIODevice *vdev = opaque; | |
607 | uint16_t val; | |
608 | ||
609 | vdev->get_config(vdev, vdev->config); | |
610 | ||
611 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
612 | if (addr > (vdev->config_len - sizeof(val))) | |
613 | return (uint32_t)-1; | |
614 | ||
615 | memcpy(&val, vdev->config + addr, sizeof(val)); | |
616 | return val; | |
617 | } | |
618 | ||
619 | static uint32_t virtio_config_readl(void *opaque, uint32_t addr) | |
620 | { | |
621 | VirtIODevice *vdev = opaque; | |
622 | uint32_t val; | |
623 | ||
624 | vdev->get_config(vdev, vdev->config); | |
625 | ||
626 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
627 | if (addr > (vdev->config_len - sizeof(val))) | |
628 | return (uint32_t)-1; | |
629 | ||
630 | memcpy(&val, vdev->config + addr, sizeof(val)); | |
631 | return val; | |
632 | } | |
633 | ||
634 | static void virtio_config_writeb(void *opaque, uint32_t addr, uint32_t data) | |
635 | { | |
636 | VirtIODevice *vdev = opaque; | |
637 | uint8_t val = data; | |
638 | ||
639 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
640 | if (addr > (vdev->config_len - sizeof(val))) | |
641 | return; | |
642 | ||
643 | memcpy(vdev->config + addr, &val, sizeof(val)); | |
644 | ||
645 | if (vdev->set_config) | |
646 | vdev->set_config(vdev, vdev->config); | |
647 | } | |
648 | ||
649 | static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data) | |
650 | { | |
651 | VirtIODevice *vdev = opaque; | |
652 | uint16_t val = data; | |
653 | ||
654 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
655 | if (addr > (vdev->config_len - sizeof(val))) | |
656 | return; | |
657 | ||
658 | memcpy(vdev->config + addr, &val, sizeof(val)); | |
659 | ||
660 | if (vdev->set_config) | |
661 | vdev->set_config(vdev, vdev->config); | |
662 | } | |
663 | ||
664 | static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data) | |
665 | { | |
666 | VirtIODevice *vdev = opaque; | |
667 | uint32_t val = data; | |
668 | ||
669 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
670 | if (addr > (vdev->config_len - sizeof(val))) | |
671 | return; | |
672 | ||
673 | memcpy(vdev->config + addr, &val, sizeof(val)); | |
674 | ||
675 | if (vdev->set_config) | |
676 | vdev->set_config(vdev, vdev->config); | |
677 | } | |
678 | ||
679 | static void virtio_map(PCIDevice *pci_dev, int region_num, | |
680 | uint32_t addr, uint32_t size, int type) | |
681 | { | |
682 | VirtIODevice *vdev = to_virtio_device(pci_dev); | |
683 | int i; | |
684 | ||
685 | vdev->addr = addr; | |
686 | for (i = 0; i < 3; i++) { | |
687 | register_ioport_write(addr, 20, 1 << i, virtio_ioport_write, vdev); | |
688 | register_ioport_read(addr, 20, 1 << i, virtio_ioport_read, vdev); | |
689 | } | |
690 | ||
691 | if (vdev->config_len) { | |
692 | register_ioport_write(addr + 20, vdev->config_len, 1, | |
693 | virtio_config_writeb, vdev); | |
694 | register_ioport_write(addr + 20, vdev->config_len, 2, | |
695 | virtio_config_writew, vdev); | |
696 | register_ioport_write(addr + 20, vdev->config_len, 4, | |
697 | virtio_config_writel, vdev); | |
698 | register_ioport_read(addr + 20, vdev->config_len, 1, | |
699 | virtio_config_readb, vdev); | |
700 | register_ioport_read(addr + 20, vdev->config_len, 2, | |
701 | virtio_config_readw, vdev); | |
702 | register_ioport_read(addr + 20, vdev->config_len, 4, | |
703 | virtio_config_readl, vdev); | |
704 | ||
705 | vdev->get_config(vdev, vdev->config); | |
706 | } | |
707 | } | |
708 | ||
709 | VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, | |
710 | void (*handle_output)(VirtIODevice *, VirtQueue *)) | |
711 | { | |
712 | int i; | |
713 | ||
714 | for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
715 | if (vdev->vq[i].vring.num == 0) | |
716 | break; | |
717 | } | |
718 | ||
719 | if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) | |
720 | abort(); | |
721 | ||
722 | vdev->vq[i].vring.num = queue_size; | |
723 | vdev->vq[i].handle_output = handle_output; | |
724 | ||
725 | return &vdev->vq[i]; | |
726 | } | |
727 | ||
728 | void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) | |
729 | { | |
730 | /* Always notify when queue is empty */ | |
731 | if ((vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx) && | |
732 | (vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT)) | |
733 | return; | |
734 | ||
735 | vdev->isr |= 0x01; | |
736 | virtio_update_irq(vdev); | |
737 | } | |
738 | ||
739 | void virtio_notify_config(VirtIODevice *vdev) | |
740 | { | |
741 | vdev->isr |= 0x03; | |
742 | virtio_update_irq(vdev); | |
743 | } | |
744 | ||
745 | void virtio_save(VirtIODevice *vdev, QEMUFile *f) | |
746 | { | |
747 | int i; | |
748 | ||
749 | pci_device_save(&vdev->pci_dev, f); | |
750 | ||
751 | qemu_put_be32s(f, &vdev->addr); | |
752 | qemu_put_8s(f, &vdev->status); | |
753 | qemu_put_8s(f, &vdev->isr); | |
754 | qemu_put_be16s(f, &vdev->queue_sel); | |
755 | qemu_put_be32s(f, &vdev->features); | |
756 | qemu_put_be32(f, vdev->config_len); | |
757 | qemu_put_buffer(f, vdev->config, vdev->config_len); | |
758 | ||
759 | for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
760 | if (vdev->vq[i].vring.num == 0) | |
761 | break; | |
762 | } | |
763 | ||
764 | qemu_put_be32(f, i); | |
765 | ||
766 | for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
767 | if (vdev->vq[i].vring.num == 0) | |
768 | break; | |
769 | ||
770 | qemu_put_be32(f, vdev->vq[i].vring.num); | |
771 | qemu_put_be32s(f, &vdev->vq[i].pfn); | |
772 | qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); | |
773 | } | |
774 | } | |
775 | ||
776 | void virtio_load(VirtIODevice *vdev, QEMUFile *f) | |
777 | { | |
778 | int num, i; | |
779 | ||
780 | pci_device_load(&vdev->pci_dev, f); | |
781 | ||
782 | qemu_get_be32s(f, &vdev->addr); | |
783 | qemu_get_8s(f, &vdev->status); | |
784 | qemu_get_8s(f, &vdev->isr); | |
785 | qemu_get_be16s(f, &vdev->queue_sel); | |
786 | qemu_get_be32s(f, &vdev->features); | |
787 | vdev->config_len = qemu_get_be32(f); | |
788 | qemu_get_buffer(f, vdev->config, vdev->config_len); | |
789 | ||
790 | num = qemu_get_be32(f); | |
791 | ||
792 | for (i = 0; i < num; i++) { | |
793 | vdev->vq[i].vring.num = qemu_get_be32(f); | |
794 | qemu_get_be32s(f, &vdev->vq[i].pfn); | |
795 | qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); | |
796 | ||
797 | if (vdev->vq[i].pfn) { | |
798 | target_phys_addr_t pa; | |
799 | ||
f46f15bc | 800 | pa = (ram_addr_t)vdev->vq[i].pfn << VIRTIO_PCI_QUEUE_ADDR_SHIFT; |
967f97fa AL |
801 | virtqueue_init(&vdev->vq[i], pa); |
802 | } | |
803 | } | |
804 | ||
805 | virtio_update_irq(vdev); | |
806 | } | |
807 | ||
808 | VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name, | |
809 | uint16_t vendor, uint16_t device, | |
810 | uint16_t subvendor, uint16_t subdevice, | |
811 | uint8_t class_code, uint8_t subclass_code, | |
812 | uint8_t pif, size_t config_size, | |
813 | size_t struct_size) | |
814 | { | |
815 | VirtIODevice *vdev; | |
816 | PCIDevice *pci_dev; | |
817 | uint8_t *config; | |
818 | uint32_t size; | |
819 | ||
820 | pci_dev = pci_register_device(bus, name, struct_size, | |
821 | -1, NULL, NULL); | |
822 | if (!pci_dev) | |
823 | return NULL; | |
824 | ||
825 | vdev = to_virtio_device(pci_dev); | |
826 | ||
827 | vdev->status = 0; | |
828 | vdev->isr = 0; | |
829 | vdev->queue_sel = 0; | |
830 | vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX); | |
831 | ||
832 | config = pci_dev->config; | |
833 | config[0x00] = vendor & 0xFF; | |
834 | config[0x01] = (vendor >> 8) & 0xFF; | |
835 | config[0x02] = device & 0xFF; | |
836 | config[0x03] = (device >> 8) & 0xFF; | |
837 | ||
838 | config[0x08] = VIRTIO_PCI_ABI_VERSION; | |
839 | ||
840 | config[0x09] = pif; | |
841 | config[0x0a] = subclass_code; | |
842 | config[0x0b] = class_code; | |
843 | config[0x0e] = 0x00; | |
844 | ||
845 | config[0x2c] = subvendor & 0xFF; | |
846 | config[0x2d] = (subvendor >> 8) & 0xFF; | |
847 | config[0x2e] = subdevice & 0xFF; | |
848 | config[0x2f] = (subdevice >> 8) & 0xFF; | |
849 | ||
850 | config[0x3d] = 1; | |
851 | ||
852 | vdev->name = name; | |
853 | vdev->config_len = config_size; | |
854 | if (vdev->config_len) | |
855 | vdev->config = qemu_mallocz(config_size); | |
856 | else | |
857 | vdev->config = NULL; | |
858 | ||
859 | size = 20 + config_size; | |
860 | if (size & (size-1)) | |
ad46db9a | 861 | size = 1 << qemu_fls(size); |
967f97fa AL |
862 | |
863 | pci_register_io_region(pci_dev, 0, size, PCI_ADDRESS_SPACE_IO, | |
864 | virtio_map); | |
865 | qemu_register_reset(virtio_reset, vdev); | |
866 | ||
867 | return vdev; | |
868 | } |