]>
Commit | Line | Data |
---|---|---|
967f97fa AL |
1 | /* |
2 | * Virtio Support | |
3 | * | |
4 | * Copyright IBM, Corp. 2007 | |
5 | * | |
6 | * Authors: | |
7 | * Anthony Liguori <[email protected]> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
10 | * the COPYING file in the top-level directory. | |
11 | * | |
12 | */ | |
13 | ||
14 | #include <inttypes.h> | |
967f97fa | 15 | |
64979a4d | 16 | #include "trace.h" |
ce67ed65 | 17 | #include "qemu-error.h" |
967f97fa | 18 | #include "virtio.h" |
967f97fa | 19 | |
f46f15bc AL |
20 | /* The alignment to use between consumer and producer parts of vring. |
21 | * x86 pagesize again. */ | |
22 | #define VIRTIO_PCI_VRING_ALIGN 4096 | |
23 | ||
967f97fa AL |
24 | /* QEMU doesn't strictly need write barriers since everything runs in |
25 | * lock-step. We'll leave the calls to wmb() in though to make it obvious for | |
26 | * KVM or if kqemu gets SMP support. | |
79758e95 MT |
27 | * In any case, we must prevent the compiler from reordering the code. |
28 | * TODO: we likely need some rmb()/mb() as well. | |
967f97fa | 29 | */ |
79758e95 MT |
30 | |
31 | #define wmb() __asm__ __volatile__("": : :"memory") | |
967f97fa AL |
32 | |
33 | typedef struct VRingDesc | |
34 | { | |
35 | uint64_t addr; | |
36 | uint32_t len; | |
37 | uint16_t flags; | |
38 | uint16_t next; | |
39 | } VRingDesc; | |
40 | ||
41 | typedef struct VRingAvail | |
42 | { | |
43 | uint16_t flags; | |
44 | uint16_t idx; | |
45 | uint16_t ring[0]; | |
46 | } VRingAvail; | |
47 | ||
48 | typedef struct VRingUsedElem | |
49 | { | |
50 | uint32_t id; | |
51 | uint32_t len; | |
52 | } VRingUsedElem; | |
53 | ||
54 | typedef struct VRingUsed | |
55 | { | |
56 | uint16_t flags; | |
57 | uint16_t idx; | |
58 | VRingUsedElem ring[0]; | |
59 | } VRingUsed; | |
60 | ||
61 | typedef struct VRing | |
62 | { | |
63 | unsigned int num; | |
c227f099 AL |
64 | target_phys_addr_t desc; |
65 | target_phys_addr_t avail; | |
66 | target_phys_addr_t used; | |
967f97fa AL |
67 | } VRing; |
68 | ||
69 | struct VirtQueue | |
70 | { | |
71 | VRing vring; | |
c227f099 | 72 | target_phys_addr_t pa; |
967f97fa | 73 | uint16_t last_avail_idx; |
bcbabae8 MT |
74 | /* Last used index value we have signalled on */ |
75 | uint16_t signalled_used; | |
76 | ||
77 | /* Last used index value we have signalled on */ | |
78 | bool signalled_used_valid; | |
79 | ||
80 | /* Notification enabled? */ | |
81 | bool notification; | |
82 | ||
967f97fa | 83 | int inuse; |
bcbabae8 | 84 | |
7055e687 | 85 | uint16_t vector; |
967f97fa | 86 | void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); |
1cbdabe2 MT |
87 | VirtIODevice *vdev; |
88 | EventNotifier guest_notifier; | |
89 | EventNotifier host_notifier; | |
967f97fa AL |
90 | }; |
91 | ||
967f97fa | 92 | /* virt queue functions */ |
53c25cea | 93 | static void virtqueue_init(VirtQueue *vq) |
967f97fa | 94 | { |
c227f099 | 95 | target_phys_addr_t pa = vq->pa; |
53c25cea | 96 | |
967f97fa AL |
97 | vq->vring.desc = pa; |
98 | vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc); | |
f46f15bc AL |
99 | vq->vring.used = vring_align(vq->vring.avail + |
100 | offsetof(VRingAvail, ring[vq->vring.num]), | |
101 | VIRTIO_PCI_VRING_ALIGN); | |
967f97fa AL |
102 | } |
103 | ||
c227f099 | 104 | static inline uint64_t vring_desc_addr(target_phys_addr_t desc_pa, int i) |
967f97fa | 105 | { |
c227f099 | 106 | target_phys_addr_t pa; |
5774cf98 | 107 | pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr); |
967f97fa AL |
108 | return ldq_phys(pa); |
109 | } | |
110 | ||
c227f099 | 111 | static inline uint32_t vring_desc_len(target_phys_addr_t desc_pa, int i) |
967f97fa | 112 | { |
c227f099 | 113 | target_phys_addr_t pa; |
5774cf98 | 114 | pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len); |
967f97fa AL |
115 | return ldl_phys(pa); |
116 | } | |
117 | ||
c227f099 | 118 | static inline uint16_t vring_desc_flags(target_phys_addr_t desc_pa, int i) |
967f97fa | 119 | { |
c227f099 | 120 | target_phys_addr_t pa; |
5774cf98 | 121 | pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags); |
967f97fa AL |
122 | return lduw_phys(pa); |
123 | } | |
124 | ||
c227f099 | 125 | static inline uint16_t vring_desc_next(target_phys_addr_t desc_pa, int i) |
967f97fa | 126 | { |
c227f099 | 127 | target_phys_addr_t pa; |
5774cf98 | 128 | pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next); |
967f97fa AL |
129 | return lduw_phys(pa); |
130 | } | |
131 | ||
132 | static inline uint16_t vring_avail_flags(VirtQueue *vq) | |
133 | { | |
c227f099 | 134 | target_phys_addr_t pa; |
967f97fa AL |
135 | pa = vq->vring.avail + offsetof(VRingAvail, flags); |
136 | return lduw_phys(pa); | |
137 | } | |
138 | ||
139 | static inline uint16_t vring_avail_idx(VirtQueue *vq) | |
140 | { | |
c227f099 | 141 | target_phys_addr_t pa; |
967f97fa AL |
142 | pa = vq->vring.avail + offsetof(VRingAvail, idx); |
143 | return lduw_phys(pa); | |
144 | } | |
145 | ||
146 | static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) | |
147 | { | |
c227f099 | 148 | target_phys_addr_t pa; |
967f97fa AL |
149 | pa = vq->vring.avail + offsetof(VRingAvail, ring[i]); |
150 | return lduw_phys(pa); | |
151 | } | |
152 | ||
bcbabae8 MT |
153 | static inline uint16_t vring_used_event(VirtQueue *vq) |
154 | { | |
155 | return vring_avail_ring(vq, vq->vring.num); | |
156 | } | |
157 | ||
967f97fa AL |
158 | static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val) |
159 | { | |
c227f099 | 160 | target_phys_addr_t pa; |
967f97fa AL |
161 | pa = vq->vring.used + offsetof(VRingUsed, ring[i].id); |
162 | stl_phys(pa, val); | |
163 | } | |
164 | ||
165 | static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val) | |
166 | { | |
c227f099 | 167 | target_phys_addr_t pa; |
967f97fa AL |
168 | pa = vq->vring.used + offsetof(VRingUsed, ring[i].len); |
169 | stl_phys(pa, val); | |
170 | } | |
171 | ||
172 | static uint16_t vring_used_idx(VirtQueue *vq) | |
173 | { | |
c227f099 | 174 | target_phys_addr_t pa; |
967f97fa AL |
175 | pa = vq->vring.used + offsetof(VRingUsed, idx); |
176 | return lduw_phys(pa); | |
177 | } | |
178 | ||
bcbabae8 | 179 | static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val) |
967f97fa | 180 | { |
c227f099 | 181 | target_phys_addr_t pa; |
967f97fa | 182 | pa = vq->vring.used + offsetof(VRingUsed, idx); |
bcbabae8 | 183 | stw_phys(pa, val); |
967f97fa AL |
184 | } |
185 | ||
186 | static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) | |
187 | { | |
c227f099 | 188 | target_phys_addr_t pa; |
967f97fa AL |
189 | pa = vq->vring.used + offsetof(VRingUsed, flags); |
190 | stw_phys(pa, lduw_phys(pa) | mask); | |
191 | } | |
192 | ||
193 | static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) | |
194 | { | |
c227f099 | 195 | target_phys_addr_t pa; |
967f97fa AL |
196 | pa = vq->vring.used + offsetof(VRingUsed, flags); |
197 | stw_phys(pa, lduw_phys(pa) & ~mask); | |
198 | } | |
199 | ||
bcbabae8 MT |
200 | static inline void vring_avail_event(VirtQueue *vq, uint16_t val) |
201 | { | |
202 | target_phys_addr_t pa; | |
203 | if (!vq->notification) { | |
204 | return; | |
205 | } | |
206 | pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]); | |
207 | stw_phys(pa, val); | |
208 | } | |
209 | ||
967f97fa AL |
210 | void virtio_queue_set_notification(VirtQueue *vq, int enable) |
211 | { | |
bcbabae8 MT |
212 | vq->notification = enable; |
213 | if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) { | |
214 | vring_avail_event(vq, vring_avail_idx(vq)); | |
215 | } else if (enable) { | |
967f97fa | 216 | vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY); |
bcbabae8 | 217 | } else { |
967f97fa | 218 | vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY); |
bcbabae8 | 219 | } |
967f97fa AL |
220 | } |
221 | ||
222 | int virtio_queue_ready(VirtQueue *vq) | |
223 | { | |
224 | return vq->vring.avail != 0; | |
225 | } | |
226 | ||
227 | int virtio_queue_empty(VirtQueue *vq) | |
228 | { | |
229 | return vring_avail_idx(vq) == vq->last_avail_idx; | |
230 | } | |
231 | ||
232 | void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, | |
233 | unsigned int len, unsigned int idx) | |
234 | { | |
235 | unsigned int offset; | |
236 | int i; | |
237 | ||
64979a4d SH |
238 | trace_virtqueue_fill(vq, elem, len, idx); |
239 | ||
967f97fa AL |
240 | offset = 0; |
241 | for (i = 0; i < elem->in_num; i++) { | |
242 | size_t size = MIN(len - offset, elem->in_sg[i].iov_len); | |
243 | ||
26b258e1 AL |
244 | cpu_physical_memory_unmap(elem->in_sg[i].iov_base, |
245 | elem->in_sg[i].iov_len, | |
246 | 1, size); | |
967f97fa | 247 | |
26b258e1 | 248 | offset += elem->in_sg[i].iov_len; |
967f97fa AL |
249 | } |
250 | ||
26b258e1 AL |
251 | for (i = 0; i < elem->out_num; i++) |
252 | cpu_physical_memory_unmap(elem->out_sg[i].iov_base, | |
253 | elem->out_sg[i].iov_len, | |
254 | 0, elem->out_sg[i].iov_len); | |
255 | ||
967f97fa AL |
256 | idx = (idx + vring_used_idx(vq)) % vq->vring.num; |
257 | ||
258 | /* Get a pointer to the next entry in the used ring. */ | |
259 | vring_used_ring_id(vq, idx, elem->index); | |
260 | vring_used_ring_len(vq, idx, len); | |
261 | } | |
262 | ||
263 | void virtqueue_flush(VirtQueue *vq, unsigned int count) | |
264 | { | |
bcbabae8 | 265 | uint16_t old, new; |
967f97fa AL |
266 | /* Make sure buffer is written before we update index. */ |
267 | wmb(); | |
64979a4d | 268 | trace_virtqueue_flush(vq, count); |
bcbabae8 MT |
269 | old = vring_used_idx(vq); |
270 | new = old + count; | |
271 | vring_used_idx_set(vq, new); | |
967f97fa | 272 | vq->inuse -= count; |
bcbabae8 MT |
273 | if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) |
274 | vq->signalled_used_valid = false; | |
967f97fa AL |
275 | } |
276 | ||
277 | void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, | |
278 | unsigned int len) | |
279 | { | |
280 | virtqueue_fill(vq, elem, len, 0); | |
281 | virtqueue_flush(vq, 1); | |
282 | } | |
283 | ||
284 | static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) | |
285 | { | |
286 | uint16_t num_heads = vring_avail_idx(vq) - idx; | |
287 | ||
288 | /* Check it isn't doing very strange things with descriptor numbers. */ | |
bb6834cf | 289 | if (num_heads > vq->vring.num) { |
ce67ed65 SH |
290 | error_report("Guest moved used index from %u to %u", |
291 | idx, vring_avail_idx(vq)); | |
bb6834cf AL |
292 | exit(1); |
293 | } | |
967f97fa AL |
294 | |
295 | return num_heads; | |
296 | } | |
297 | ||
298 | static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx) | |
299 | { | |
300 | unsigned int head; | |
301 | ||
302 | /* Grab the next descriptor number they're advertising, and increment | |
303 | * the index we've seen. */ | |
304 | head = vring_avail_ring(vq, idx % vq->vring.num); | |
305 | ||
306 | /* If their number is silly, that's a fatal mistake. */ | |
bb6834cf | 307 | if (head >= vq->vring.num) { |
ce67ed65 | 308 | error_report("Guest says index %u is available", head); |
bb6834cf AL |
309 | exit(1); |
310 | } | |
967f97fa AL |
311 | |
312 | return head; | |
313 | } | |
314 | ||
c227f099 | 315 | static unsigned virtqueue_next_desc(target_phys_addr_t desc_pa, |
5774cf98 | 316 | unsigned int i, unsigned int max) |
967f97fa AL |
317 | { |
318 | unsigned int next; | |
319 | ||
320 | /* If this descriptor says it doesn't chain, we're done. */ | |
5774cf98 MM |
321 | if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT)) |
322 | return max; | |
967f97fa AL |
323 | |
324 | /* Check they're not leading us off end of descriptors. */ | |
5774cf98 | 325 | next = vring_desc_next(desc_pa, i); |
967f97fa AL |
326 | /* Make sure compiler knows to grab that: we don't want it changing! */ |
327 | wmb(); | |
328 | ||
5774cf98 | 329 | if (next >= max) { |
ce67ed65 | 330 | error_report("Desc next is %u", next); |
bb6834cf AL |
331 | exit(1); |
332 | } | |
967f97fa AL |
333 | |
334 | return next; | |
335 | } | |
336 | ||
337 | int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) | |
338 | { | |
efeea6d0 MM |
339 | unsigned int idx; |
340 | int total_bufs, in_total, out_total; | |
967f97fa AL |
341 | |
342 | idx = vq->last_avail_idx; | |
343 | ||
efeea6d0 | 344 | total_bufs = in_total = out_total = 0; |
967f97fa | 345 | while (virtqueue_num_heads(vq, idx)) { |
efeea6d0 | 346 | unsigned int max, num_bufs, indirect = 0; |
c227f099 | 347 | target_phys_addr_t desc_pa; |
967f97fa AL |
348 | int i; |
349 | ||
efeea6d0 MM |
350 | max = vq->vring.num; |
351 | num_bufs = total_bufs; | |
967f97fa | 352 | i = virtqueue_get_head(vq, idx++); |
efeea6d0 MM |
353 | desc_pa = vq->vring.desc; |
354 | ||
355 | if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) { | |
356 | if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) { | |
ce67ed65 | 357 | error_report("Invalid size for indirect buffer table"); |
efeea6d0 MM |
358 | exit(1); |
359 | } | |
360 | ||
361 | /* If we've got too many, that implies a descriptor loop. */ | |
362 | if (num_bufs >= max) { | |
ce67ed65 | 363 | error_report("Looped descriptor"); |
efeea6d0 MM |
364 | exit(1); |
365 | } | |
366 | ||
367 | /* loop over the indirect descriptor table */ | |
368 | indirect = 1; | |
369 | max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc); | |
370 | num_bufs = i = 0; | |
371 | desc_pa = vring_desc_addr(desc_pa, i); | |
372 | } | |
373 | ||
967f97fa AL |
374 | do { |
375 | /* If we've got too many, that implies a descriptor loop. */ | |
5774cf98 | 376 | if (++num_bufs > max) { |
ce67ed65 | 377 | error_report("Looped descriptor"); |
bb6834cf AL |
378 | exit(1); |
379 | } | |
967f97fa | 380 | |
5774cf98 | 381 | if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) { |
967f97fa | 382 | if (in_bytes > 0 && |
5774cf98 | 383 | (in_total += vring_desc_len(desc_pa, i)) >= in_bytes) |
967f97fa AL |
384 | return 1; |
385 | } else { | |
386 | if (out_bytes > 0 && | |
5774cf98 | 387 | (out_total += vring_desc_len(desc_pa, i)) >= out_bytes) |
967f97fa AL |
388 | return 1; |
389 | } | |
5774cf98 | 390 | } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max); |
efeea6d0 MM |
391 | |
392 | if (!indirect) | |
393 | total_bufs = num_bufs; | |
394 | else | |
395 | total_bufs++; | |
967f97fa AL |
396 | } |
397 | ||
398 | return 0; | |
399 | } | |
400 | ||
42fb2e07 KW |
401 | void virtqueue_map_sg(struct iovec *sg, target_phys_addr_t *addr, |
402 | size_t num_sg, int is_write) | |
403 | { | |
404 | unsigned int i; | |
405 | target_phys_addr_t len; | |
406 | ||
407 | for (i = 0; i < num_sg; i++) { | |
408 | len = sg[i].iov_len; | |
409 | sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write); | |
410 | if (sg[i].iov_base == NULL || len != sg[i].iov_len) { | |
ce67ed65 | 411 | error_report("virtio: trying to map MMIO memory"); |
42fb2e07 KW |
412 | exit(1); |
413 | } | |
414 | } | |
415 | } | |
416 | ||
967f97fa AL |
417 | int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) |
418 | { | |
5774cf98 | 419 | unsigned int i, head, max; |
c227f099 | 420 | target_phys_addr_t desc_pa = vq->vring.desc; |
967f97fa AL |
421 | |
422 | if (!virtqueue_num_heads(vq, vq->last_avail_idx)) | |
423 | return 0; | |
424 | ||
425 | /* When we start there are none of either input nor output. */ | |
426 | elem->out_num = elem->in_num = 0; | |
427 | ||
5774cf98 MM |
428 | max = vq->vring.num; |
429 | ||
967f97fa | 430 | i = head = virtqueue_get_head(vq, vq->last_avail_idx++); |
bcbabae8 MT |
431 | if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) { |
432 | vring_avail_event(vq, vring_avail_idx(vq)); | |
433 | } | |
efeea6d0 MM |
434 | |
435 | if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) { | |
436 | if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) { | |
ce67ed65 | 437 | error_report("Invalid size for indirect buffer table"); |
efeea6d0 MM |
438 | exit(1); |
439 | } | |
440 | ||
441 | /* loop over the indirect descriptor table */ | |
442 | max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc); | |
443 | desc_pa = vring_desc_addr(desc_pa, i); | |
444 | i = 0; | |
445 | } | |
446 | ||
42fb2e07 | 447 | /* Collect all the descriptors */ |
967f97fa AL |
448 | do { |
449 | struct iovec *sg; | |
450 | ||
5774cf98 | 451 | if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) { |
c8eac1cf MT |
452 | if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) { |
453 | error_report("Too many write descriptors in indirect table"); | |
454 | exit(1); | |
455 | } | |
5774cf98 | 456 | elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i); |
967f97fa | 457 | sg = &elem->in_sg[elem->in_num++]; |
42fb2e07 | 458 | } else { |
c8eac1cf MT |
459 | if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) { |
460 | error_report("Too many read descriptors in indirect table"); | |
461 | exit(1); | |
462 | } | |
42fb2e07 | 463 | elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i); |
967f97fa | 464 | sg = &elem->out_sg[elem->out_num++]; |
42fb2e07 | 465 | } |
967f97fa | 466 | |
5774cf98 | 467 | sg->iov_len = vring_desc_len(desc_pa, i); |
967f97fa AL |
468 | |
469 | /* If we've got too many, that implies a descriptor loop. */ | |
5774cf98 | 470 | if ((elem->in_num + elem->out_num) > max) { |
ce67ed65 | 471 | error_report("Looped descriptor"); |
bb6834cf AL |
472 | exit(1); |
473 | } | |
5774cf98 | 474 | } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max); |
967f97fa | 475 | |
42fb2e07 KW |
476 | /* Now map what we have collected */ |
477 | virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1); | |
478 | virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0); | |
479 | ||
967f97fa AL |
480 | elem->index = head; |
481 | ||
482 | vq->inuse++; | |
483 | ||
64979a4d | 484 | trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); |
967f97fa AL |
485 | return elem->in_num + elem->out_num; |
486 | } | |
487 | ||
488 | /* virtio device */ | |
7055e687 MT |
489 | static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector) |
490 | { | |
491 | if (vdev->binding->notify) { | |
492 | vdev->binding->notify(vdev->binding_opaque, vector); | |
493 | } | |
494 | } | |
967f97fa | 495 | |
53c25cea | 496 | void virtio_update_irq(VirtIODevice *vdev) |
967f97fa | 497 | { |
7055e687 | 498 | virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); |
967f97fa AL |
499 | } |
500 | ||
53c25cea | 501 | void virtio_reset(void *opaque) |
967f97fa AL |
502 | { |
503 | VirtIODevice *vdev = opaque; | |
504 | int i; | |
505 | ||
e0c472d8 MT |
506 | virtio_set_status(vdev, 0); |
507 | ||
967f97fa AL |
508 | if (vdev->reset) |
509 | vdev->reset(vdev); | |
510 | ||
704a76fc | 511 | vdev->guest_features = 0; |
967f97fa AL |
512 | vdev->queue_sel = 0; |
513 | vdev->status = 0; | |
514 | vdev->isr = 0; | |
7055e687 MT |
515 | vdev->config_vector = VIRTIO_NO_VECTOR; |
516 | virtio_notify_vector(vdev, vdev->config_vector); | |
967f97fa AL |
517 | |
518 | for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
519 | vdev->vq[i].vring.desc = 0; | |
520 | vdev->vq[i].vring.avail = 0; | |
521 | vdev->vq[i].vring.used = 0; | |
522 | vdev->vq[i].last_avail_idx = 0; | |
53c25cea | 523 | vdev->vq[i].pa = 0; |
7055e687 | 524 | vdev->vq[i].vector = VIRTIO_NO_VECTOR; |
bcbabae8 MT |
525 | vdev->vq[i].signalled_used = 0; |
526 | vdev->vq[i].signalled_used_valid = false; | |
527 | vdev->vq[i].notification = true; | |
967f97fa AL |
528 | } |
529 | } | |
530 | ||
53c25cea | 531 | uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr) |
967f97fa | 532 | { |
967f97fa AL |
533 | uint8_t val; |
534 | ||
535 | vdev->get_config(vdev, vdev->config); | |
536 | ||
967f97fa AL |
537 | if (addr > (vdev->config_len - sizeof(val))) |
538 | return (uint32_t)-1; | |
539 | ||
540 | memcpy(&val, vdev->config + addr, sizeof(val)); | |
541 | return val; | |
542 | } | |
543 | ||
53c25cea | 544 | uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr) |
967f97fa | 545 | { |
967f97fa AL |
546 | uint16_t val; |
547 | ||
548 | vdev->get_config(vdev, vdev->config); | |
549 | ||
967f97fa AL |
550 | if (addr > (vdev->config_len - sizeof(val))) |
551 | return (uint32_t)-1; | |
552 | ||
553 | memcpy(&val, vdev->config + addr, sizeof(val)); | |
554 | return val; | |
555 | } | |
556 | ||
53c25cea | 557 | uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr) |
967f97fa | 558 | { |
967f97fa AL |
559 | uint32_t val; |
560 | ||
561 | vdev->get_config(vdev, vdev->config); | |
562 | ||
967f97fa AL |
563 | if (addr > (vdev->config_len - sizeof(val))) |
564 | return (uint32_t)-1; | |
565 | ||
566 | memcpy(&val, vdev->config + addr, sizeof(val)); | |
567 | return val; | |
568 | } | |
569 | ||
53c25cea | 570 | void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data) |
967f97fa | 571 | { |
967f97fa AL |
572 | uint8_t val = data; |
573 | ||
967f97fa AL |
574 | if (addr > (vdev->config_len - sizeof(val))) |
575 | return; | |
576 | ||
577 | memcpy(vdev->config + addr, &val, sizeof(val)); | |
578 | ||
579 | if (vdev->set_config) | |
580 | vdev->set_config(vdev, vdev->config); | |
581 | } | |
582 | ||
53c25cea | 583 | void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data) |
967f97fa | 584 | { |
967f97fa AL |
585 | uint16_t val = data; |
586 | ||
967f97fa AL |
587 | if (addr > (vdev->config_len - sizeof(val))) |
588 | return; | |
589 | ||
590 | memcpy(vdev->config + addr, &val, sizeof(val)); | |
591 | ||
592 | if (vdev->set_config) | |
593 | vdev->set_config(vdev, vdev->config); | |
594 | } | |
595 | ||
53c25cea | 596 | void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data) |
967f97fa | 597 | { |
967f97fa AL |
598 | uint32_t val = data; |
599 | ||
967f97fa AL |
600 | if (addr > (vdev->config_len - sizeof(val))) |
601 | return; | |
602 | ||
603 | memcpy(vdev->config + addr, &val, sizeof(val)); | |
604 | ||
605 | if (vdev->set_config) | |
606 | vdev->set_config(vdev, vdev->config); | |
607 | } | |
608 | ||
c227f099 | 609 | void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr) |
967f97fa | 610 | { |
7055e687 MT |
611 | vdev->vq[n].pa = addr; |
612 | virtqueue_init(&vdev->vq[n]); | |
53c25cea PB |
613 | } |
614 | ||
c227f099 | 615 | target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n) |
53c25cea PB |
616 | { |
617 | return vdev->vq[n].pa; | |
618 | } | |
619 | ||
620 | int virtio_queue_get_num(VirtIODevice *vdev, int n) | |
621 | { | |
622 | return vdev->vq[n].vring.num; | |
623 | } | |
967f97fa | 624 | |
25db9ebe SH |
625 | void virtio_queue_notify_vq(VirtQueue *vq) |
626 | { | |
627 | if (vq->vring.desc) { | |
628 | VirtIODevice *vdev = vq->vdev; | |
629 | trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); | |
630 | vq->handle_output(vdev, vq); | |
631 | } | |
632 | } | |
633 | ||
53c25cea PB |
634 | void virtio_queue_notify(VirtIODevice *vdev, int n) |
635 | { | |
7157e2e2 | 636 | virtio_queue_notify_vq(&vdev->vq[n]); |
967f97fa AL |
637 | } |
638 | ||
7055e687 MT |
639 | uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) |
640 | { | |
641 | return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector : | |
642 | VIRTIO_NO_VECTOR; | |
643 | } | |
644 | ||
645 | void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector) | |
646 | { | |
647 | if (n < VIRTIO_PCI_QUEUE_MAX) | |
648 | vdev->vq[n].vector = vector; | |
649 | } | |
650 | ||
967f97fa AL |
651 | VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, |
652 | void (*handle_output)(VirtIODevice *, VirtQueue *)) | |
653 | { | |
654 | int i; | |
655 | ||
656 | for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
657 | if (vdev->vq[i].vring.num == 0) | |
658 | break; | |
659 | } | |
660 | ||
661 | if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) | |
662 | abort(); | |
663 | ||
664 | vdev->vq[i].vring.num = queue_size; | |
665 | vdev->vq[i].handle_output = handle_output; | |
666 | ||
667 | return &vdev->vq[i]; | |
668 | } | |
669 | ||
1cbdabe2 MT |
670 | void virtio_irq(VirtQueue *vq) |
671 | { | |
64979a4d | 672 | trace_virtio_irq(vq); |
1cbdabe2 MT |
673 | vq->vdev->isr |= 0x01; |
674 | virtio_notify_vector(vq->vdev, vq->vector); | |
675 | } | |
676 | ||
bcbabae8 MT |
677 | /* Assuming a given event_idx value from the other size, if |
678 | * we have just incremented index from old to new_idx, | |
679 | * should we trigger an event? */ | |
680 | static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old) | |
967f97fa | 681 | { |
bcbabae8 MT |
682 | /* Note: Xen has similar logic for notification hold-off |
683 | * in include/xen/interface/io/ring.h with req_event and req_prod | |
684 | * corresponding to event_idx + 1 and new respectively. | |
685 | * Note also that req_event and req_prod in Xen start at 1, | |
686 | * event indexes in virtio start at 0. */ | |
687 | return (uint16_t)(new - event - 1) < (uint16_t)(new - old); | |
688 | } | |
689 | ||
690 | static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq) | |
691 | { | |
692 | uint16_t old, new; | |
693 | bool v; | |
97b83deb | 694 | /* Always notify when queue is empty (when feature acknowledge) */ |
bcbabae8 MT |
695 | if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) && |
696 | !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) { | |
697 | return true; | |
698 | } | |
699 | ||
700 | if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) { | |
701 | return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT); | |
702 | } | |
703 | ||
704 | v = vq->signalled_used_valid; | |
705 | vq->signalled_used_valid = true; | |
706 | old = vq->signalled_used; | |
707 | new = vq->signalled_used = vring_used_idx(vq); | |
708 | return !v || vring_need_event(vring_used_event(vq), new, old); | |
709 | } | |
710 | ||
711 | void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) | |
712 | { | |
713 | if (!vring_notify(vdev, vq)) { | |
967f97fa | 714 | return; |
bcbabae8 | 715 | } |
967f97fa | 716 | |
64979a4d | 717 | trace_virtio_notify(vdev, vq); |
967f97fa | 718 | vdev->isr |= 0x01; |
7055e687 | 719 | virtio_notify_vector(vdev, vq->vector); |
967f97fa AL |
720 | } |
721 | ||
722 | void virtio_notify_config(VirtIODevice *vdev) | |
723 | { | |
7625162c AL |
724 | if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) |
725 | return; | |
726 | ||
967f97fa | 727 | vdev->isr |= 0x03; |
7055e687 | 728 | virtio_notify_vector(vdev, vdev->config_vector); |
967f97fa AL |
729 | } |
730 | ||
731 | void virtio_save(VirtIODevice *vdev, QEMUFile *f) | |
732 | { | |
733 | int i; | |
734 | ||
ff24bd58 MT |
735 | if (vdev->binding->save_config) |
736 | vdev->binding->save_config(vdev->binding_opaque, f); | |
967f97fa | 737 | |
967f97fa AL |
738 | qemu_put_8s(f, &vdev->status); |
739 | qemu_put_8s(f, &vdev->isr); | |
740 | qemu_put_be16s(f, &vdev->queue_sel); | |
704a76fc | 741 | qemu_put_be32s(f, &vdev->guest_features); |
967f97fa AL |
742 | qemu_put_be32(f, vdev->config_len); |
743 | qemu_put_buffer(f, vdev->config, vdev->config_len); | |
744 | ||
745 | for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
746 | if (vdev->vq[i].vring.num == 0) | |
747 | break; | |
748 | } | |
749 | ||
750 | qemu_put_be32(f, i); | |
751 | ||
752 | for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
753 | if (vdev->vq[i].vring.num == 0) | |
754 | break; | |
755 | ||
756 | qemu_put_be32(f, vdev->vq[i].vring.num); | |
53c25cea | 757 | qemu_put_be64(f, vdev->vq[i].pa); |
967f97fa | 758 | qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); |
ff24bd58 MT |
759 | if (vdev->binding->save_queue) |
760 | vdev->binding->save_queue(vdev->binding_opaque, i, f); | |
967f97fa AL |
761 | } |
762 | } | |
763 | ||
ff24bd58 | 764 | int virtio_load(VirtIODevice *vdev, QEMUFile *f) |
967f97fa | 765 | { |
ff24bd58 | 766 | int num, i, ret; |
6d74ca5a | 767 | uint32_t features; |
8172539d | 768 | uint32_t supported_features = |
6d74ca5a | 769 | vdev->binding->get_features(vdev->binding_opaque); |
967f97fa | 770 | |
ff24bd58 MT |
771 | if (vdev->binding->load_config) { |
772 | ret = vdev->binding->load_config(vdev->binding_opaque, f); | |
773 | if (ret) | |
774 | return ret; | |
775 | } | |
967f97fa | 776 | |
967f97fa AL |
777 | qemu_get_8s(f, &vdev->status); |
778 | qemu_get_8s(f, &vdev->isr); | |
779 | qemu_get_be16s(f, &vdev->queue_sel); | |
6d74ca5a MT |
780 | qemu_get_be32s(f, &features); |
781 | if (features & ~supported_features) { | |
ce67ed65 SH |
782 | error_report("Features 0x%x unsupported. Allowed features: 0x%x", |
783 | features, supported_features); | |
6d74ca5a MT |
784 | return -1; |
785 | } | |
fae054b0 MT |
786 | if (vdev->set_features) |
787 | vdev->set_features(vdev, features); | |
704a76fc | 788 | vdev->guest_features = features; |
967f97fa AL |
789 | vdev->config_len = qemu_get_be32(f); |
790 | qemu_get_buffer(f, vdev->config, vdev->config_len); | |
791 | ||
792 | num = qemu_get_be32(f); | |
793 | ||
794 | for (i = 0; i < num; i++) { | |
795 | vdev->vq[i].vring.num = qemu_get_be32(f); | |
53c25cea | 796 | vdev->vq[i].pa = qemu_get_be64(f); |
967f97fa | 797 | qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); |
bcbabae8 MT |
798 | vdev->vq[i].signalled_used_valid = false; |
799 | vdev->vq[i].notification = true; | |
967f97fa | 800 | |
53c25cea | 801 | if (vdev->vq[i].pa) { |
1abeb5a6 | 802 | uint16_t nheads; |
53c25cea | 803 | virtqueue_init(&vdev->vq[i]); |
1abeb5a6 MT |
804 | nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx; |
805 | /* Check it isn't doing very strange things with descriptor numbers. */ | |
806 | if (nheads > vdev->vq[i].vring.num) { | |
807 | error_report("VQ %d size 0x%x Guest index 0x%x " | |
6daf194d | 808 | "inconsistent with Host index 0x%x: delta 0x%x", |
1abeb5a6 MT |
809 | i, vdev->vq[i].vring.num, |
810 | vring_avail_idx(&vdev->vq[i]), | |
811 | vdev->vq[i].last_avail_idx, nheads); | |
812 | return -1; | |
813 | } | |
814 | } else if (vdev->vq[i].last_avail_idx) { | |
815 | error_report("VQ %d address 0x0 " | |
6daf194d | 816 | "inconsistent with Host index 0x%x", |
1abeb5a6 MT |
817 | i, vdev->vq[i].last_avail_idx); |
818 | return -1; | |
258dc7c9 | 819 | } |
ff24bd58 MT |
820 | if (vdev->binding->load_queue) { |
821 | ret = vdev->binding->load_queue(vdev->binding_opaque, i, f); | |
822 | if (ret) | |
823 | return ret; | |
7055e687 | 824 | } |
967f97fa AL |
825 | } |
826 | ||
7055e687 | 827 | virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); |
ff24bd58 | 828 | return 0; |
967f97fa AL |
829 | } |
830 | ||
b946a153 AL |
831 | void virtio_cleanup(VirtIODevice *vdev) |
832 | { | |
85cf2a8d | 833 | qemu_del_vm_change_state_handler(vdev->vmstate); |
b946a153 | 834 | if (vdev->config) |
7267c094 AL |
835 | g_free(vdev->config); |
836 | g_free(vdev->vq); | |
837 | g_free(vdev); | |
b946a153 AL |
838 | } |
839 | ||
1dfb4dd9 | 840 | static void virtio_vmstate_change(void *opaque, int running, RunState state) |
85cf2a8d MT |
841 | { |
842 | VirtIODevice *vdev = opaque; | |
843 | bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); | |
844 | vdev->vm_running = running; | |
845 | ||
846 | if (backend_run) { | |
847 | virtio_set_status(vdev, vdev->status); | |
848 | } | |
849 | ||
850 | if (vdev->binding->vmstate_change) { | |
851 | vdev->binding->vmstate_change(vdev->binding_opaque, backend_run); | |
852 | } | |
853 | ||
854 | if (!backend_run) { | |
855 | virtio_set_status(vdev, vdev->status); | |
856 | } | |
857 | } | |
858 | ||
53c25cea PB |
859 | VirtIODevice *virtio_common_init(const char *name, uint16_t device_id, |
860 | size_t config_size, size_t struct_size) | |
967f97fa AL |
861 | { |
862 | VirtIODevice *vdev; | |
b8193adb | 863 | int i; |
967f97fa | 864 | |
7267c094 | 865 | vdev = g_malloc0(struct_size); |
967f97fa | 866 | |
53c25cea | 867 | vdev->device_id = device_id; |
967f97fa AL |
868 | vdev->status = 0; |
869 | vdev->isr = 0; | |
870 | vdev->queue_sel = 0; | |
7055e687 | 871 | vdev->config_vector = VIRTIO_NO_VECTOR; |
7267c094 | 872 | vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX); |
d3674c57 | 873 | vdev->vm_running = vm_running; |
1cbdabe2 | 874 | for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { |
b8193adb | 875 | vdev->vq[i].vector = VIRTIO_NO_VECTOR; |
1cbdabe2 MT |
876 | vdev->vq[i].vdev = vdev; |
877 | } | |
967f97fa | 878 | |
967f97fa AL |
879 | vdev->name = name; |
880 | vdev->config_len = config_size; | |
881 | if (vdev->config_len) | |
7267c094 | 882 | vdev->config = g_malloc0(config_size); |
967f97fa AL |
883 | else |
884 | vdev->config = NULL; | |
885 | ||
85cf2a8d MT |
886 | vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, vdev); |
887 | ||
967f97fa AL |
888 | return vdev; |
889 | } | |
53c25cea PB |
890 | |
891 | void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding, | |
892 | void *opaque) | |
893 | { | |
894 | vdev->binding = binding; | |
895 | vdev->binding_opaque = opaque; | |
896 | } | |
1cbdabe2 MT |
897 | |
898 | target_phys_addr_t virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) | |
899 | { | |
900 | return vdev->vq[n].vring.desc; | |
901 | } | |
902 | ||
903 | target_phys_addr_t virtio_queue_get_avail_addr(VirtIODevice *vdev, int n) | |
904 | { | |
905 | return vdev->vq[n].vring.avail; | |
906 | } | |
907 | ||
908 | target_phys_addr_t virtio_queue_get_used_addr(VirtIODevice *vdev, int n) | |
909 | { | |
910 | return vdev->vq[n].vring.used; | |
911 | } | |
912 | ||
913 | target_phys_addr_t virtio_queue_get_ring_addr(VirtIODevice *vdev, int n) | |
914 | { | |
915 | return vdev->vq[n].vring.desc; | |
916 | } | |
917 | ||
918 | target_phys_addr_t virtio_queue_get_desc_size(VirtIODevice *vdev, int n) | |
919 | { | |
920 | return sizeof(VRingDesc) * vdev->vq[n].vring.num; | |
921 | } | |
922 | ||
923 | target_phys_addr_t virtio_queue_get_avail_size(VirtIODevice *vdev, int n) | |
924 | { | |
925 | return offsetof(VRingAvail, ring) + | |
2b3af999 | 926 | sizeof(uint64_t) * vdev->vq[n].vring.num; |
1cbdabe2 MT |
927 | } |
928 | ||
929 | target_phys_addr_t virtio_queue_get_used_size(VirtIODevice *vdev, int n) | |
930 | { | |
931 | return offsetof(VRingUsed, ring) + | |
932 | sizeof(VRingUsedElem) * vdev->vq[n].vring.num; | |
933 | } | |
934 | ||
935 | target_phys_addr_t virtio_queue_get_ring_size(VirtIODevice *vdev, int n) | |
936 | { | |
937 | return vdev->vq[n].vring.used - vdev->vq[n].vring.desc + | |
938 | virtio_queue_get_used_size(vdev, n); | |
939 | } | |
940 | ||
941 | uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) | |
942 | { | |
943 | return vdev->vq[n].last_avail_idx; | |
944 | } | |
945 | ||
946 | void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx) | |
947 | { | |
948 | vdev->vq[n].last_avail_idx = idx; | |
949 | } | |
950 | ||
951 | VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n) | |
952 | { | |
953 | return vdev->vq + n; | |
954 | } | |
955 | ||
956 | EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) | |
957 | { | |
958 | return &vq->guest_notifier; | |
959 | } | |
960 | EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) | |
961 | { | |
962 | return &vq->host_notifier; | |
963 | } |