]>
Commit | Line | Data |
---|---|---|
88807f89 SH |
1 | /* Copyright 2012 Red Hat, Inc. |
2 | * Copyright IBM, Corp. 2012 | |
3 | * | |
4 | * Based on Linux 2.6.39 vhost code: | |
5 | * Copyright (C) 2009 Red Hat, Inc. | |
6 | * Copyright (C) 2006 Rusty Russell IBM Corporation | |
7 | * | |
8 | * Author: Michael S. Tsirkin <[email protected]> | |
9 | * Stefan Hajnoczi <[email protected]> | |
10 | * | |
11 | * Inspiration, some code, and most witty comments come from | |
12 | * Documentation/virtual/lguest/lguest.c, by Rusty Russell | |
13 | * | |
14 | * This work is licensed under the terms of the GNU GPL, version 2. | |
15 | */ | |
16 | ||
17 | #include "trace.h" | |
0d09e41a | 18 | #include "hw/virtio/dataplane/vring.h" |
b4a42f81 | 19 | #include "qemu/error-report.h" |
88807f89 SH |
20 | |
21 | /* Map the guest's vring to host memory */ | |
22 | bool vring_setup(Vring *vring, VirtIODevice *vdev, int n) | |
23 | { | |
24 | hwaddr vring_addr = virtio_queue_get_ring_addr(vdev, n); | |
25 | hwaddr vring_size = virtio_queue_get_ring_size(vdev, n); | |
26 | void *vring_ptr; | |
27 | ||
28 | vring->broken = false; | |
29 | ||
30 | hostmem_init(&vring->hostmem); | |
31 | vring_ptr = hostmem_lookup(&vring->hostmem, vring_addr, vring_size, true); | |
32 | if (!vring_ptr) { | |
33 | error_report("Failed to map vring " | |
34 | "addr %#" HWADDR_PRIx " size %" HWADDR_PRIu, | |
35 | vring_addr, vring_size); | |
36 | vring->broken = true; | |
37 | return false; | |
38 | } | |
39 | ||
40 | vring_init(&vring->vr, virtio_queue_get_num(vdev, n), vring_ptr, 4096); | |
41 | ||
9154b02c SH |
42 | vring->last_avail_idx = virtio_queue_get_last_avail_idx(vdev, n); |
43 | vring->last_used_idx = vring->vr.used->idx; | |
88807f89 SH |
44 | vring->signalled_used = 0; |
45 | vring->signalled_used_valid = false; | |
46 | ||
47 | trace_vring_setup(virtio_queue_get_ring_addr(vdev, n), | |
48 | vring->vr.desc, vring->vr.avail, vring->vr.used); | |
49 | return true; | |
50 | } | |
51 | ||
9154b02c | 52 | void vring_teardown(Vring *vring, VirtIODevice *vdev, int n) |
88807f89 | 53 | { |
9154b02c | 54 | virtio_queue_set_last_avail_idx(vdev, n, vring->last_avail_idx); |
6793dfd1 | 55 | virtio_queue_invalidate_signalled_used(vdev, n); |
9154b02c | 56 | |
88807f89 SH |
57 | hostmem_finalize(&vring->hostmem); |
58 | } | |
59 | ||
60 | /* Disable guest->host notifies */ | |
61 | void vring_disable_notification(VirtIODevice *vdev, Vring *vring) | |
62 | { | |
63 | if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) { | |
64 | vring->vr.used->flags |= VRING_USED_F_NO_NOTIFY; | |
65 | } | |
66 | } | |
67 | ||
68 | /* Enable guest->host notifies | |
69 | * | |
70 | * Return true if the vring is empty, false if there are more requests. | |
71 | */ | |
72 | bool vring_enable_notification(VirtIODevice *vdev, Vring *vring) | |
73 | { | |
74 | if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) { | |
75 | vring_avail_event(&vring->vr) = vring->vr.avail->idx; | |
76 | } else { | |
77 | vring->vr.used->flags &= ~VRING_USED_F_NO_NOTIFY; | |
78 | } | |
79 | smp_mb(); /* ensure update is seen before reading avail_idx */ | |
80 | return !vring_more_avail(vring); | |
81 | } | |
82 | ||
83 | /* This is stolen from linux/drivers/vhost/vhost.c:vhost_notify() */ | |
84 | bool vring_should_notify(VirtIODevice *vdev, Vring *vring) | |
85 | { | |
86 | uint16_t old, new; | |
87 | bool v; | |
88 | /* Flush out used index updates. This is paired | |
89 | * with the barrier that the Guest executes when enabling | |
90 | * interrupts. */ | |
91 | smp_mb(); | |
92 | ||
93 | if ((vdev->guest_features & VIRTIO_F_NOTIFY_ON_EMPTY) && | |
94 | unlikely(vring->vr.avail->idx == vring->last_avail_idx)) { | |
95 | return true; | |
96 | } | |
97 | ||
98 | if (!(vdev->guest_features & VIRTIO_RING_F_EVENT_IDX)) { | |
99 | return !(vring->vr.avail->flags & VRING_AVAIL_F_NO_INTERRUPT); | |
100 | } | |
101 | old = vring->signalled_used; | |
102 | v = vring->signalled_used_valid; | |
103 | new = vring->signalled_used = vring->last_used_idx; | |
104 | vring->signalled_used_valid = true; | |
105 | ||
106 | if (unlikely(!v)) { | |
107 | return true; | |
108 | } | |
109 | ||
110 | return vring_need_event(vring_used_event(&vring->vr), new, old); | |
111 | } | |
112 | ||
4d684832 | 113 | |
8c1b566f | 114 | static int get_desc(Vring *vring, VirtQueueElement *elem, |
4d684832 PB |
115 | struct vring_desc *desc) |
116 | { | |
117 | unsigned *num; | |
8c1b566f PB |
118 | struct iovec *iov; |
119 | hwaddr *addr; | |
4d684832 PB |
120 | |
121 | if (desc->flags & VRING_DESC_F_WRITE) { | |
8c1b566f PB |
122 | num = &elem->in_num; |
123 | iov = &elem->in_sg[*num]; | |
124 | addr = &elem->in_addr[*num]; | |
4d684832 | 125 | } else { |
8c1b566f PB |
126 | num = &elem->out_num; |
127 | iov = &elem->out_sg[*num]; | |
128 | addr = &elem->out_addr[*num]; | |
4d684832 PB |
129 | |
130 | /* If it's an output descriptor, they're all supposed | |
131 | * to come before any input descriptors. */ | |
8c1b566f | 132 | if (unlikely(elem->in_num)) { |
4d684832 PB |
133 | error_report("Descriptor has out after in"); |
134 | return -EFAULT; | |
135 | } | |
136 | } | |
137 | ||
138 | /* Stop for now if there are not enough iovecs available. */ | |
8c1b566f | 139 | if (*num >= VIRTQUEUE_MAX_SIZE) { |
4d684832 PB |
140 | return -ENOBUFS; |
141 | } | |
142 | ||
143 | /* TODO handle non-contiguous memory across region boundaries */ | |
144 | iov->iov_base = hostmem_lookup(&vring->hostmem, desc->addr, desc->len, | |
145 | desc->flags & VRING_DESC_F_WRITE); | |
146 | if (!iov->iov_base) { | |
147 | error_report("Failed to map descriptor addr %#" PRIx64 " len %u", | |
148 | (uint64_t)desc->addr, desc->len); | |
149 | return -EFAULT; | |
150 | } | |
151 | ||
152 | iov->iov_len = desc->len; | |
8c1b566f | 153 | *addr = desc->addr; |
4d684832 PB |
154 | *num += 1; |
155 | return 0; | |
156 | } | |
157 | ||
88807f89 | 158 | /* This is stolen from linux/drivers/vhost/vhost.c. */ |
8c1b566f | 159 | static int get_indirect(Vring *vring, VirtQueueElement *elem, |
88807f89 SH |
160 | struct vring_desc *indirect) |
161 | { | |
162 | struct vring_desc desc; | |
163 | unsigned int i = 0, count, found = 0; | |
4d684832 | 164 | int ret; |
88807f89 SH |
165 | |
166 | /* Sanity check */ | |
167 | if (unlikely(indirect->len % sizeof(desc))) { | |
168 | error_report("Invalid length in indirect descriptor: " | |
169 | "len %#x not multiple of %#zx", | |
170 | indirect->len, sizeof(desc)); | |
171 | vring->broken = true; | |
172 | return -EFAULT; | |
173 | } | |
174 | ||
175 | count = indirect->len / sizeof(desc); | |
176 | /* Buffers are chained via a 16 bit next field, so | |
177 | * we can have at most 2^16 of these. */ | |
178 | if (unlikely(count > USHRT_MAX + 1)) { | |
179 | error_report("Indirect buffer length too big: %d", indirect->len); | |
180 | vring->broken = true; | |
181 | return -EFAULT; | |
182 | } | |
183 | ||
184 | do { | |
185 | struct vring_desc *desc_ptr; | |
186 | ||
187 | /* Translate indirect descriptor */ | |
188 | desc_ptr = hostmem_lookup(&vring->hostmem, | |
189 | indirect->addr + found * sizeof(desc), | |
190 | sizeof(desc), false); | |
191 | if (!desc_ptr) { | |
192 | error_report("Failed to map indirect descriptor " | |
193 | "addr %#" PRIx64 " len %zu", | |
194 | (uint64_t)indirect->addr + found * sizeof(desc), | |
195 | sizeof(desc)); | |
196 | vring->broken = true; | |
197 | return -EFAULT; | |
198 | } | |
199 | desc = *desc_ptr; | |
200 | ||
201 | /* Ensure descriptor has been loaded before accessing fields */ | |
202 | barrier(); /* read_barrier_depends(); */ | |
203 | ||
204 | if (unlikely(++found > count)) { | |
205 | error_report("Loop detected: last one at %u " | |
206 | "indirect size %u", i, count); | |
207 | vring->broken = true; | |
208 | return -EFAULT; | |
209 | } | |
210 | ||
211 | if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) { | |
212 | error_report("Nested indirect descriptor"); | |
213 | vring->broken = true; | |
214 | return -EFAULT; | |
215 | } | |
216 | ||
8c1b566f | 217 | ret = get_desc(vring, elem, &desc); |
4d684832 PB |
218 | if (ret < 0) { |
219 | vring->broken |= (ret == -EFAULT); | |
220 | return ret; | |
88807f89 SH |
221 | } |
222 | i = desc.next; | |
223 | } while (desc.flags & VRING_DESC_F_NEXT); | |
224 | return 0; | |
225 | } | |
226 | ||
8c1b566f PB |
227 | void vring_free_element(VirtQueueElement *elem) |
228 | { | |
229 | g_slice_free(VirtQueueElement, elem); | |
230 | } | |
231 | ||
88807f89 SH |
232 | /* This looks in the virtqueue and for the first available buffer, and converts |
233 | * it to an iovec for convenient access. Since descriptors consist of some | |
234 | * number of output then some number of input descriptors, it's actually two | |
235 | * iovecs, but we pack them into one and note how many of each there were. | |
236 | * | |
237 | * This function returns the descriptor number found, or vq->num (which is | |
238 | * never a valid descriptor number) if none was found. A negative code is | |
239 | * returned on error. | |
240 | * | |
241 | * Stolen from linux/drivers/vhost/vhost.c. | |
242 | */ | |
243 | int vring_pop(VirtIODevice *vdev, Vring *vring, | |
8c1b566f | 244 | VirtQueueElement **p_elem) |
88807f89 SH |
245 | { |
246 | struct vring_desc desc; | |
247 | unsigned int i, head, found = 0, num = vring->vr.num; | |
248 | uint16_t avail_idx, last_avail_idx; | |
8c1b566f | 249 | VirtQueueElement *elem = NULL; |
4d684832 | 250 | int ret; |
88807f89 SH |
251 | |
252 | /* If there was a fatal error then refuse operation */ | |
253 | if (vring->broken) { | |
781c117f PB |
254 | ret = -EFAULT; |
255 | goto out; | |
88807f89 SH |
256 | } |
257 | ||
258 | /* Check it isn't doing very strange things with descriptor numbers. */ | |
259 | last_avail_idx = vring->last_avail_idx; | |
260 | avail_idx = vring->vr.avail->idx; | |
261 | barrier(); /* load indices now and not again later */ | |
262 | ||
263 | if (unlikely((uint16_t)(avail_idx - last_avail_idx) > num)) { | |
264 | error_report("Guest moved used index from %u to %u", | |
265 | last_avail_idx, avail_idx); | |
781c117f PB |
266 | ret = -EFAULT; |
267 | goto out; | |
88807f89 SH |
268 | } |
269 | ||
270 | /* If there's nothing new since last we looked. */ | |
271 | if (avail_idx == last_avail_idx) { | |
781c117f PB |
272 | ret = -EAGAIN; |
273 | goto out; | |
88807f89 SH |
274 | } |
275 | ||
276 | /* Only get avail ring entries after they have been exposed by guest. */ | |
277 | smp_rmb(); | |
278 | ||
279 | /* Grab the next descriptor number they're advertising, and increment | |
280 | * the index we've seen. */ | |
281 | head = vring->vr.avail->ring[last_avail_idx % num]; | |
282 | ||
8c1b566f PB |
283 | elem = g_slice_new(VirtQueueElement); |
284 | elem->index = head; | |
285 | elem->in_num = elem->out_num = 0; | |
286 | ||
88807f89 SH |
287 | /* If their number is silly, that's an error. */ |
288 | if (unlikely(head >= num)) { | |
289 | error_report("Guest says index %u > %u is available", head, num); | |
781c117f PB |
290 | ret = -EFAULT; |
291 | goto out; | |
88807f89 SH |
292 | } |
293 | ||
294 | if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) { | |
295 | vring_avail_event(&vring->vr) = vring->vr.avail->idx; | |
296 | } | |
297 | ||
88807f89 SH |
298 | i = head; |
299 | do { | |
300 | if (unlikely(i >= num)) { | |
301 | error_report("Desc index is %u > %u, head = %u", i, num, head); | |
781c117f PB |
302 | ret = -EFAULT; |
303 | goto out; | |
88807f89 SH |
304 | } |
305 | if (unlikely(++found > num)) { | |
306 | error_report("Loop detected: last one at %u vq size %u head %u", | |
307 | i, num, head); | |
781c117f PB |
308 | ret = -EFAULT; |
309 | goto out; | |
88807f89 SH |
310 | } |
311 | desc = vring->vr.desc[i]; | |
312 | ||
313 | /* Ensure descriptor is loaded before accessing fields */ | |
314 | barrier(); | |
315 | ||
316 | if (desc.flags & VRING_DESC_F_INDIRECT) { | |
8c1b566f | 317 | int ret = get_indirect(vring, elem, &desc); |
88807f89 | 318 | if (ret < 0) { |
781c117f | 319 | goto out; |
88807f89 SH |
320 | } |
321 | continue; | |
322 | } | |
323 | ||
8c1b566f | 324 | ret = get_desc(vring, elem, &desc); |
4d684832 | 325 | if (ret < 0) { |
781c117f | 326 | goto out; |
88807f89 SH |
327 | } |
328 | ||
88807f89 SH |
329 | i = desc.next; |
330 | } while (desc.flags & VRING_DESC_F_NEXT); | |
331 | ||
332 | /* On success, increment avail index. */ | |
333 | vring->last_avail_idx++; | |
8c1b566f | 334 | *p_elem = elem; |
88807f89 | 335 | return head; |
781c117f PB |
336 | |
337 | out: | |
338 | assert(ret < 0); | |
339 | if (ret == -EFAULT) { | |
340 | vring->broken = true; | |
341 | } | |
8c1b566f PB |
342 | if (elem) { |
343 | vring_free_element(elem); | |
344 | } | |
345 | *p_elem = NULL; | |
781c117f | 346 | return ret; |
88807f89 SH |
347 | } |
348 | ||
349 | /* After we've used one of their buffers, we tell them about it. | |
350 | * | |
351 | * Stolen from linux/drivers/vhost/vhost.c. | |
352 | */ | |
8c1b566f | 353 | void vring_push(Vring *vring, VirtQueueElement *elem, int len) |
88807f89 SH |
354 | { |
355 | struct vring_used_elem *used; | |
8c1b566f | 356 | unsigned int head = elem->index; |
88807f89 SH |
357 | uint16_t new; |
358 | ||
8c1b566f PB |
359 | vring_free_element(elem); |
360 | ||
88807f89 SH |
361 | /* Don't touch vring if a fatal error occurred */ |
362 | if (vring->broken) { | |
363 | return; | |
364 | } | |
365 | ||
366 | /* The virtqueue contains a ring of used buffers. Get a pointer to the | |
367 | * next entry in that used ring. */ | |
368 | used = &vring->vr.used->ring[vring->last_used_idx % vring->vr.num]; | |
369 | used->id = head; | |
370 | used->len = len; | |
371 | ||
372 | /* Make sure buffer is written before we update index. */ | |
373 | smp_wmb(); | |
374 | ||
375 | new = vring->vr.used->idx = ++vring->last_used_idx; | |
376 | if (unlikely((int16_t)(new - vring->signalled_used) < (uint16_t)1)) { | |
377 | vring->signalled_used_valid = false; | |
378 | } | |
379 | } |