]>
Commit | Line | Data |
---|---|---|
f87d0fbb RR |
1 | /* |
2 | * Helpers for the host side of a virtio ring. | |
3 | * | |
4 | * Since these may be in userspace, we use (inline) accessors. | |
5 | */ | |
f558a845 | 6 | #include <linux/module.h> |
f87d0fbb RR |
7 | #include <linux/vringh.h> |
8 | #include <linux/virtio_ring.h> | |
9 | #include <linux/kernel.h> | |
10 | #include <linux/ratelimit.h> | |
11 | #include <linux/uaccess.h> | |
12 | #include <linux/slab.h> | |
13 | #include <linux/export.h> | |
b9f7ac8c | 14 | #include <uapi/linux/virtio_config.h> |
f87d0fbb RR |
15 | |
16 | static __printf(1,2) __cold void vringh_bad(const char *fmt, ...) | |
17 | { | |
18 | static DEFINE_RATELIMIT_STATE(vringh_rs, | |
19 | DEFAULT_RATELIMIT_INTERVAL, | |
20 | DEFAULT_RATELIMIT_BURST); | |
21 | if (__ratelimit(&vringh_rs)) { | |
22 | va_list ap; | |
23 | va_start(ap, fmt); | |
24 | printk(KERN_NOTICE "vringh:"); | |
25 | vprintk(fmt, ap); | |
26 | va_end(ap); | |
27 | } | |
28 | } | |
29 | ||
30 | /* Returns vring->num if empty, -ve on error. */ | |
31 | static inline int __vringh_get_head(const struct vringh *vrh, | |
b9f7ac8c MT |
32 | int (*getu16)(const struct vringh *vrh, |
33 | u16 *val, const __virtio16 *p), | |
f87d0fbb RR |
34 | u16 *last_avail_idx) |
35 | { | |
36 | u16 avail_idx, i, head; | |
37 | int err; | |
38 | ||
b9f7ac8c | 39 | err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx); |
f87d0fbb RR |
40 | if (err) { |
41 | vringh_bad("Failed to access avail idx at %p", | |
42 | &vrh->vring.avail->idx); | |
43 | return err; | |
44 | } | |
45 | ||
46 | if (*last_avail_idx == avail_idx) | |
47 | return vrh->vring.num; | |
48 | ||
49 | /* Only get avail ring entries after they have been exposed by guest. */ | |
50 | virtio_rmb(vrh->weak_barriers); | |
51 | ||
52 | i = *last_avail_idx & (vrh->vring.num - 1); | |
53 | ||
b9f7ac8c | 54 | err = getu16(vrh, &head, &vrh->vring.avail->ring[i]); |
f87d0fbb RR |
55 | if (err) { |
56 | vringh_bad("Failed to read head: idx %d address %p", | |
57 | *last_avail_idx, &vrh->vring.avail->ring[i]); | |
58 | return err; | |
59 | } | |
60 | ||
61 | if (head >= vrh->vring.num) { | |
62 | vringh_bad("Guest says index %u > %u is available", | |
63 | head, vrh->vring.num); | |
64 | return -EINVAL; | |
65 | } | |
66 | ||
67 | (*last_avail_idx)++; | |
68 | return head; | |
69 | } | |
70 | ||
71 | /* Copy some bytes to/from the iovec. Returns num copied. */ | |
72 | static inline ssize_t vringh_iov_xfer(struct vringh_kiov *iov, | |
73 | void *ptr, size_t len, | |
74 | int (*xfer)(void *addr, void *ptr, | |
75 | size_t len)) | |
76 | { | |
77 | int err, done = 0; | |
78 | ||
79 | while (len && iov->i < iov->used) { | |
80 | size_t partlen; | |
81 | ||
82 | partlen = min(iov->iov[iov->i].iov_len, len); | |
83 | err = xfer(iov->iov[iov->i].iov_base, ptr, partlen); | |
84 | if (err) | |
85 | return err; | |
86 | done += partlen; | |
87 | len -= partlen; | |
88 | ptr += partlen; | |
89 | iov->consumed += partlen; | |
90 | iov->iov[iov->i].iov_len -= partlen; | |
91 | iov->iov[iov->i].iov_base += partlen; | |
92 | ||
93 | if (!iov->iov[iov->i].iov_len) { | |
94 | /* Fix up old iov element then increment. */ | |
95 | iov->iov[iov->i].iov_len = iov->consumed; | |
96 | iov->iov[iov->i].iov_base -= iov->consumed; | |
97 | ||
98 | iov->consumed = 0; | |
99 | iov->i++; | |
100 | } | |
101 | } | |
102 | return done; | |
103 | } | |
104 | ||
105 | /* May reduce *len if range is shorter. */ | |
106 | static inline bool range_check(struct vringh *vrh, u64 addr, size_t *len, | |
107 | struct vringh_range *range, | |
108 | bool (*getrange)(struct vringh *, | |
109 | u64, struct vringh_range *)) | |
110 | { | |
111 | if (addr < range->start || addr > range->end_incl) { | |
112 | if (!getrange(vrh, addr, range)) | |
113 | return false; | |
114 | } | |
115 | BUG_ON(addr < range->start || addr > range->end_incl); | |
116 | ||
117 | /* To end of memory? */ | |
118 | if (unlikely(addr + *len == 0)) { | |
119 | if (range->end_incl == -1ULL) | |
120 | return true; | |
121 | goto truncate; | |
122 | } | |
123 | ||
124 | /* Otherwise, don't wrap. */ | |
125 | if (addr + *len < addr) { | |
126 | vringh_bad("Wrapping descriptor %zu@0x%llx", | |
127 | *len, (unsigned long long)addr); | |
128 | return false; | |
129 | } | |
130 | ||
131 | if (unlikely(addr + *len - 1 > range->end_incl)) | |
132 | goto truncate; | |
133 | return true; | |
134 | ||
135 | truncate: | |
136 | *len = range->end_incl + 1 - addr; | |
137 | return true; | |
138 | } | |
139 | ||
140 | static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len, | |
141 | struct vringh_range *range, | |
142 | bool (*getrange)(struct vringh *, | |
143 | u64, struct vringh_range *)) | |
144 | { | |
145 | return true; | |
146 | } | |
147 | ||
148 | /* No reason for this code to be inline. */ | |
b9f7ac8c MT |
149 | static int move_to_indirect(const struct vringh *vrh, |
150 | int *up_next, u16 *i, void *addr, | |
f87d0fbb RR |
151 | const struct vring_desc *desc, |
152 | struct vring_desc **descs, int *desc_max) | |
153 | { | |
b9f7ac8c MT |
154 | u32 len; |
155 | ||
f87d0fbb RR |
156 | /* Indirect tables can't have indirect. */ |
157 | if (*up_next != -1) { | |
158 | vringh_bad("Multilevel indirect %u->%u", *up_next, *i); | |
159 | return -EINVAL; | |
160 | } | |
161 | ||
b9f7ac8c MT |
162 | len = vringh32_to_cpu(vrh, desc->len); |
163 | if (unlikely(len % sizeof(struct vring_desc))) { | |
f87d0fbb RR |
164 | vringh_bad("Strange indirect len %u", desc->len); |
165 | return -EINVAL; | |
166 | } | |
167 | ||
168 | /* We will check this when we follow it! */ | |
b9f7ac8c MT |
169 | if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) |
170 | *up_next = vringh16_to_cpu(vrh, desc->next); | |
f87d0fbb RR |
171 | else |
172 | *up_next = -2; | |
173 | *descs = addr; | |
b9f7ac8c | 174 | *desc_max = len / sizeof(struct vring_desc); |
f87d0fbb RR |
175 | |
176 | /* Now, start at the first indirect. */ | |
177 | *i = 0; | |
178 | return 0; | |
179 | } | |
180 | ||
181 | static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp) | |
182 | { | |
183 | struct kvec *new; | |
184 | unsigned int flag, new_num = (iov->max_num & ~VRINGH_IOV_ALLOCATED) * 2; | |
185 | ||
186 | if (new_num < 8) | |
187 | new_num = 8; | |
188 | ||
189 | flag = (iov->max_num & VRINGH_IOV_ALLOCATED); | |
190 | if (flag) | |
191 | new = krealloc(iov->iov, new_num * sizeof(struct iovec), gfp); | |
192 | else { | |
193 | new = kmalloc(new_num * sizeof(struct iovec), gfp); | |
194 | if (new) { | |
195 | memcpy(new, iov->iov, | |
196 | iov->max_num * sizeof(struct iovec)); | |
197 | flag = VRINGH_IOV_ALLOCATED; | |
198 | } | |
199 | } | |
200 | if (!new) | |
201 | return -ENOMEM; | |
202 | iov->iov = new; | |
203 | iov->max_num = (new_num | flag); | |
204 | return 0; | |
205 | } | |
206 | ||
207 | static u16 __cold return_from_indirect(const struct vringh *vrh, int *up_next, | |
208 | struct vring_desc **descs, int *desc_max) | |
209 | { | |
210 | u16 i = *up_next; | |
211 | ||
212 | *up_next = -1; | |
213 | *descs = vrh->vring.desc; | |
214 | *desc_max = vrh->vring.num; | |
215 | return i; | |
216 | } | |
217 | ||
218 | static int slow_copy(struct vringh *vrh, void *dst, const void *src, | |
219 | bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, | |
220 | struct vringh_range *range, | |
221 | bool (*getrange)(struct vringh *vrh, | |
222 | u64, | |
223 | struct vringh_range *)), | |
224 | bool (*getrange)(struct vringh *vrh, | |
225 | u64 addr, | |
226 | struct vringh_range *r), | |
227 | struct vringh_range *range, | |
228 | int (*copy)(void *dst, const void *src, size_t len)) | |
229 | { | |
230 | size_t part, len = sizeof(struct vring_desc); | |
231 | ||
232 | do { | |
233 | u64 addr; | |
234 | int err; | |
235 | ||
236 | part = len; | |
237 | addr = (u64)(unsigned long)src - range->offset; | |
238 | ||
239 | if (!rcheck(vrh, addr, &part, range, getrange)) | |
240 | return -EINVAL; | |
241 | ||
242 | err = copy(dst, src, part); | |
243 | if (err) | |
244 | return err; | |
245 | ||
246 | dst += part; | |
247 | src += part; | |
248 | len -= part; | |
249 | } while (len); | |
250 | return 0; | |
251 | } | |
252 | ||
253 | static inline int | |
254 | __vringh_iov(struct vringh *vrh, u16 i, | |
255 | struct vringh_kiov *riov, | |
256 | struct vringh_kiov *wiov, | |
257 | bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, | |
258 | struct vringh_range *range, | |
259 | bool (*getrange)(struct vringh *, u64, | |
260 | struct vringh_range *)), | |
261 | bool (*getrange)(struct vringh *, u64, struct vringh_range *), | |
262 | gfp_t gfp, | |
263 | int (*copy)(void *dst, const void *src, size_t len)) | |
264 | { | |
265 | int err, count = 0, up_next, desc_max; | |
266 | struct vring_desc desc, *descs; | |
267 | struct vringh_range range = { -1ULL, 0 }, slowrange; | |
268 | bool slow = false; | |
269 | ||
270 | /* We start traversing vring's descriptor table. */ | |
271 | descs = vrh->vring.desc; | |
272 | desc_max = vrh->vring.num; | |
273 | up_next = -1; | |
274 | ||
275 | if (riov) | |
276 | riov->i = riov->used = 0; | |
277 | else if (wiov) | |
278 | wiov->i = wiov->used = 0; | |
279 | else | |
280 | /* You must want something! */ | |
281 | BUG(); | |
282 | ||
283 | for (;;) { | |
284 | void *addr; | |
285 | struct vringh_kiov *iov; | |
286 | size_t len; | |
287 | ||
288 | if (unlikely(slow)) | |
289 | err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange, | |
290 | &slowrange, copy); | |
291 | else | |
292 | err = copy(&desc, &descs[i], sizeof(desc)); | |
293 | if (unlikely(err)) | |
294 | goto fail; | |
295 | ||
b9f7ac8c MT |
296 | if (unlikely(desc.flags & |
297 | cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) { | |
298 | u64 a = vringh64_to_cpu(vrh, desc.addr); | |
299 | ||
f87d0fbb | 300 | /* Make sure it's OK, and get offset. */ |
b9f7ac8c MT |
301 | len = vringh32_to_cpu(vrh, desc.len); |
302 | if (!rcheck(vrh, a, &len, &range, getrange)) { | |
f87d0fbb RR |
303 | err = -EINVAL; |
304 | goto fail; | |
305 | } | |
306 | ||
b9f7ac8c | 307 | if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) { |
f87d0fbb RR |
308 | slow = true; |
309 | /* We need to save this range to use offset */ | |
310 | slowrange = range; | |
311 | } | |
312 | ||
b9f7ac8c MT |
313 | addr = (void *)(long)(a + range.offset); |
314 | err = move_to_indirect(vrh, &up_next, &i, addr, &desc, | |
f87d0fbb RR |
315 | &descs, &desc_max); |
316 | if (err) | |
317 | goto fail; | |
318 | continue; | |
319 | } | |
320 | ||
321 | if (count++ == vrh->vring.num) { | |
322 | vringh_bad("Descriptor loop in %p", descs); | |
323 | err = -ELOOP; | |
324 | goto fail; | |
325 | } | |
326 | ||
b9f7ac8c | 327 | if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE)) |
f87d0fbb RR |
328 | iov = wiov; |
329 | else { | |
330 | iov = riov; | |
331 | if (unlikely(wiov && wiov->i)) { | |
332 | vringh_bad("Readable desc %p after writable", | |
333 | &descs[i]); | |
334 | err = -EINVAL; | |
335 | goto fail; | |
336 | } | |
337 | } | |
338 | ||
339 | if (!iov) { | |
340 | vringh_bad("Unexpected %s desc", | |
341 | !wiov ? "writable" : "readable"); | |
342 | err = -EPROTO; | |
343 | goto fail; | |
344 | } | |
345 | ||
346 | again: | |
347 | /* Make sure it's OK, and get offset. */ | |
b9f7ac8c MT |
348 | len = vringh32_to_cpu(vrh, desc.len); |
349 | if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range, | |
350 | getrange)) { | |
f87d0fbb RR |
351 | err = -EINVAL; |
352 | goto fail; | |
353 | } | |
b9f7ac8c MT |
354 | addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) + |
355 | range.offset); | |
f87d0fbb RR |
356 | |
357 | if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) { | |
358 | err = resize_iovec(iov, gfp); | |
359 | if (err) | |
360 | goto fail; | |
361 | } | |
362 | ||
363 | iov->iov[iov->used].iov_base = addr; | |
364 | iov->iov[iov->used].iov_len = len; | |
365 | iov->used++; | |
366 | ||
b9f7ac8c MT |
367 | if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) { |
368 | desc.len = cpu_to_vringh32(vrh, | |
369 | vringh32_to_cpu(vrh, desc.len) - len); | |
370 | desc.addr = cpu_to_vringh64(vrh, | |
371 | vringh64_to_cpu(vrh, desc.addr) + len); | |
f87d0fbb RR |
372 | goto again; |
373 | } | |
374 | ||
b9f7ac8c MT |
375 | if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) { |
376 | i = vringh16_to_cpu(vrh, desc.next); | |
f87d0fbb RR |
377 | } else { |
378 | /* Just in case we need to finish traversing above. */ | |
379 | if (unlikely(up_next > 0)) { | |
380 | i = return_from_indirect(vrh, &up_next, | |
381 | &descs, &desc_max); | |
382 | slow = false; | |
383 | } else | |
384 | break; | |
385 | } | |
386 | ||
387 | if (i >= desc_max) { | |
388 | vringh_bad("Chained index %u > %u", i, desc_max); | |
389 | err = -EINVAL; | |
390 | goto fail; | |
391 | } | |
392 | } | |
393 | ||
394 | return 0; | |
395 | ||
396 | fail: | |
397 | return err; | |
398 | } | |
399 | ||
400 | static inline int __vringh_complete(struct vringh *vrh, | |
401 | const struct vring_used_elem *used, | |
402 | unsigned int num_used, | |
b9f7ac8c MT |
403 | int (*putu16)(const struct vringh *vrh, |
404 | __virtio16 *p, u16 val), | |
f87d0fbb RR |
405 | int (*putused)(struct vring_used_elem *dst, |
406 | const struct vring_used_elem | |
407 | *src, unsigned num)) | |
408 | { | |
409 | struct vring_used *used_ring; | |
410 | int err; | |
411 | u16 used_idx, off; | |
412 | ||
413 | used_ring = vrh->vring.used; | |
414 | used_idx = vrh->last_used_idx + vrh->completed; | |
415 | ||
416 | off = used_idx % vrh->vring.num; | |
417 | ||
418 | /* Compiler knows num_used == 1 sometimes, hence extra check */ | |
419 | if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) { | |
420 | u16 part = vrh->vring.num - off; | |
421 | err = putused(&used_ring->ring[off], used, part); | |
422 | if (!err) | |
423 | err = putused(&used_ring->ring[0], used + part, | |
424 | num_used - part); | |
425 | } else | |
426 | err = putused(&used_ring->ring[off], used, num_used); | |
427 | ||
428 | if (err) { | |
429 | vringh_bad("Failed to write %u used entries %u at %p", | |
430 | num_used, off, &used_ring->ring[off]); | |
431 | return err; | |
432 | } | |
433 | ||
434 | /* Make sure buffer is written before we update index. */ | |
435 | virtio_wmb(vrh->weak_barriers); | |
436 | ||
b9f7ac8c | 437 | err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used); |
f87d0fbb RR |
438 | if (err) { |
439 | vringh_bad("Failed to update used index at %p", | |
440 | &vrh->vring.used->idx); | |
441 | return err; | |
442 | } | |
443 | ||
444 | vrh->completed += num_used; | |
445 | return 0; | |
446 | } | |
447 | ||
448 | ||
449 | static inline int __vringh_need_notify(struct vringh *vrh, | |
b9f7ac8c MT |
450 | int (*getu16)(const struct vringh *vrh, |
451 | u16 *val, | |
452 | const __virtio16 *p)) | |
f87d0fbb RR |
453 | { |
454 | bool notify; | |
455 | u16 used_event; | |
456 | int err; | |
457 | ||
458 | /* Flush out used index update. This is paired with the | |
459 | * barrier that the Guest executes when enabling | |
460 | * interrupts. */ | |
461 | virtio_mb(vrh->weak_barriers); | |
462 | ||
463 | /* Old-style, without event indices. */ | |
464 | if (!vrh->event_indices) { | |
465 | u16 flags; | |
b9f7ac8c | 466 | err = getu16(vrh, &flags, &vrh->vring.avail->flags); |
f87d0fbb RR |
467 | if (err) { |
468 | vringh_bad("Failed to get flags at %p", | |
469 | &vrh->vring.avail->flags); | |
470 | return err; | |
471 | } | |
472 | return (!(flags & VRING_AVAIL_F_NO_INTERRUPT)); | |
473 | } | |
474 | ||
475 | /* Modern: we know when other side wants to know. */ | |
b9f7ac8c | 476 | err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring)); |
f87d0fbb RR |
477 | if (err) { |
478 | vringh_bad("Failed to get used event idx at %p", | |
479 | &vring_used_event(&vrh->vring)); | |
480 | return err; | |
481 | } | |
482 | ||
483 | /* Just in case we added so many that we wrap. */ | |
484 | if (unlikely(vrh->completed > 0xffff)) | |
485 | notify = true; | |
486 | else | |
487 | notify = vring_need_event(used_event, | |
488 | vrh->last_used_idx + vrh->completed, | |
489 | vrh->last_used_idx); | |
490 | ||
491 | vrh->last_used_idx += vrh->completed; | |
492 | vrh->completed = 0; | |
493 | return notify; | |
494 | } | |
495 | ||
496 | static inline bool __vringh_notify_enable(struct vringh *vrh, | |
b9f7ac8c MT |
497 | int (*getu16)(const struct vringh *vrh, |
498 | u16 *val, const __virtio16 *p), | |
499 | int (*putu16)(const struct vringh *vrh, | |
500 | __virtio16 *p, u16 val)) | |
f87d0fbb RR |
501 | { |
502 | u16 avail; | |
503 | ||
504 | if (!vrh->event_indices) { | |
505 | /* Old-school; update flags. */ | |
b9f7ac8c | 506 | if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) { |
f87d0fbb RR |
507 | vringh_bad("Clearing used flags %p", |
508 | &vrh->vring.used->flags); | |
509 | return true; | |
510 | } | |
511 | } else { | |
b9f7ac8c | 512 | if (putu16(vrh, &vring_avail_event(&vrh->vring), |
f87d0fbb RR |
513 | vrh->last_avail_idx) != 0) { |
514 | vringh_bad("Updating avail event index %p", | |
515 | &vring_avail_event(&vrh->vring)); | |
516 | return true; | |
517 | } | |
518 | } | |
519 | ||
520 | /* They could have slipped one in as we were doing that: make | |
521 | * sure it's written, then check again. */ | |
522 | virtio_mb(vrh->weak_barriers); | |
523 | ||
b9f7ac8c | 524 | if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) { |
f87d0fbb RR |
525 | vringh_bad("Failed to check avail idx at %p", |
526 | &vrh->vring.avail->idx); | |
527 | return true; | |
528 | } | |
529 | ||
530 | /* This is unlikely, so we just leave notifications enabled | |
531 | * (if we're using event_indices, we'll only get one | |
532 | * notification anyway). */ | |
533 | return avail == vrh->last_avail_idx; | |
534 | } | |
535 | ||
536 | static inline void __vringh_notify_disable(struct vringh *vrh, | |
b9f7ac8c MT |
537 | int (*putu16)(const struct vringh *vrh, |
538 | __virtio16 *p, u16 val)) | |
f87d0fbb RR |
539 | { |
540 | if (!vrh->event_indices) { | |
541 | /* Old-school; update flags. */ | |
b9f7ac8c MT |
542 | if (putu16(vrh, &vrh->vring.used->flags, |
543 | VRING_USED_F_NO_NOTIFY)) { | |
f87d0fbb RR |
544 | vringh_bad("Setting used flags %p", |
545 | &vrh->vring.used->flags); | |
546 | } | |
547 | } | |
548 | } | |
549 | ||
550 | /* Userspace access helpers: in this case, addresses are really userspace. */ | |
b9f7ac8c | 551 | static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p) |
f87d0fbb | 552 | { |
b9f7ac8c MT |
553 | __virtio16 v = 0; |
554 | int rc = get_user(v, (__force __virtio16 __user *)p); | |
555 | *val = vringh16_to_cpu(vrh, v); | |
556 | return rc; | |
f87d0fbb RR |
557 | } |
558 | ||
b9f7ac8c | 559 | static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val) |
f87d0fbb | 560 | { |
b9f7ac8c MT |
561 | __virtio16 v = cpu_to_vringh16(vrh, val); |
562 | return put_user(v, (__force __virtio16 __user *)p); | |
f87d0fbb RR |
563 | } |
564 | ||
565 | static inline int copydesc_user(void *dst, const void *src, size_t len) | |
566 | { | |
567 | return copy_from_user(dst, (__force void __user *)src, len) ? | |
568 | -EFAULT : 0; | |
569 | } | |
570 | ||
571 | static inline int putused_user(struct vring_used_elem *dst, | |
572 | const struct vring_used_elem *src, | |
573 | unsigned int num) | |
574 | { | |
575 | return copy_to_user((__force void __user *)dst, src, | |
576 | sizeof(*dst) * num) ? -EFAULT : 0; | |
577 | } | |
578 | ||
579 | static inline int xfer_from_user(void *src, void *dst, size_t len) | |
580 | { | |
581 | return copy_from_user(dst, (__force void __user *)src, len) ? | |
582 | -EFAULT : 0; | |
583 | } | |
584 | ||
585 | static inline int xfer_to_user(void *dst, void *src, size_t len) | |
586 | { | |
587 | return copy_to_user((__force void __user *)dst, src, len) ? | |
588 | -EFAULT : 0; | |
589 | } | |
590 | ||
591 | /** | |
592 | * vringh_init_user - initialize a vringh for a userspace vring. | |
593 | * @vrh: the vringh to initialize. | |
594 | * @features: the feature bits for this ring. | |
595 | * @num: the number of elements. | |
596 | * @weak_barriers: true if we only need memory barriers, not I/O. | |
597 | * @desc: the userpace descriptor pointer. | |
598 | * @avail: the userpace avail pointer. | |
599 | * @used: the userpace used pointer. | |
600 | * | |
601 | * Returns an error if num is invalid: you should check pointers | |
602 | * yourself! | |
603 | */ | |
b97a8a90 | 604 | int vringh_init_user(struct vringh *vrh, u64 features, |
f87d0fbb RR |
605 | unsigned int num, bool weak_barriers, |
606 | struct vring_desc __user *desc, | |
607 | struct vring_avail __user *avail, | |
608 | struct vring_used __user *used) | |
609 | { | |
610 | /* Sane power of 2 please! */ | |
611 | if (!num || num > 0xffff || (num & (num - 1))) { | |
612 | vringh_bad("Bad ring size %u", num); | |
613 | return -EINVAL; | |
614 | } | |
615 | ||
b9f7ac8c | 616 | vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1)); |
f87d0fbb RR |
617 | vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); |
618 | vrh->weak_barriers = weak_barriers; | |
619 | vrh->completed = 0; | |
620 | vrh->last_avail_idx = 0; | |
621 | vrh->last_used_idx = 0; | |
622 | vrh->vring.num = num; | |
623 | /* vring expects kernel addresses, but only used via accessors. */ | |
624 | vrh->vring.desc = (__force struct vring_desc *)desc; | |
625 | vrh->vring.avail = (__force struct vring_avail *)avail; | |
626 | vrh->vring.used = (__force struct vring_used *)used; | |
627 | return 0; | |
628 | } | |
629 | EXPORT_SYMBOL(vringh_init_user); | |
630 | ||
631 | /** | |
632 | * vringh_getdesc_user - get next available descriptor from userspace ring. | |
633 | * @vrh: the userspace vring. | |
634 | * @riov: where to put the readable descriptors (or NULL) | |
635 | * @wiov: where to put the writable descriptors (or NULL) | |
636 | * @getrange: function to call to check ranges. | |
637 | * @head: head index we received, for passing to vringh_complete_user(). | |
638 | * | |
639 | * Returns 0 if there was no descriptor, 1 if there was, or -errno. | |
640 | * | |
641 | * Note that on error return, you can tell the difference between an | |
642 | * invalid ring and a single invalid descriptor: in the former case, | |
643 | * *head will be vrh->vring.num. You may be able to ignore an invalid | |
644 | * descriptor, but there's not much you can do with an invalid ring. | |
645 | * | |
646 | * Note that you may need to clean up riov and wiov, even on error! | |
647 | */ | |
648 | int vringh_getdesc_user(struct vringh *vrh, | |
649 | struct vringh_iov *riov, | |
650 | struct vringh_iov *wiov, | |
651 | bool (*getrange)(struct vringh *vrh, | |
652 | u64 addr, struct vringh_range *r), | |
653 | u16 *head) | |
654 | { | |
655 | int err; | |
656 | ||
657 | *head = vrh->vring.num; | |
658 | err = __vringh_get_head(vrh, getu16_user, &vrh->last_avail_idx); | |
659 | if (err < 0) | |
660 | return err; | |
661 | ||
662 | /* Empty... */ | |
663 | if (err == vrh->vring.num) | |
664 | return 0; | |
665 | ||
666 | /* We need the layouts to be the identical for this to work */ | |
667 | BUILD_BUG_ON(sizeof(struct vringh_kiov) != sizeof(struct vringh_iov)); | |
668 | BUILD_BUG_ON(offsetof(struct vringh_kiov, iov) != | |
669 | offsetof(struct vringh_iov, iov)); | |
670 | BUILD_BUG_ON(offsetof(struct vringh_kiov, i) != | |
671 | offsetof(struct vringh_iov, i)); | |
672 | BUILD_BUG_ON(offsetof(struct vringh_kiov, used) != | |
673 | offsetof(struct vringh_iov, used)); | |
674 | BUILD_BUG_ON(offsetof(struct vringh_kiov, max_num) != | |
675 | offsetof(struct vringh_iov, max_num)); | |
676 | BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec)); | |
677 | BUILD_BUG_ON(offsetof(struct iovec, iov_base) != | |
678 | offsetof(struct kvec, iov_base)); | |
679 | BUILD_BUG_ON(offsetof(struct iovec, iov_len) != | |
680 | offsetof(struct kvec, iov_len)); | |
681 | BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_base) | |
682 | != sizeof(((struct kvec *)NULL)->iov_base)); | |
683 | BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_len) | |
684 | != sizeof(((struct kvec *)NULL)->iov_len)); | |
685 | ||
686 | *head = err; | |
687 | err = __vringh_iov(vrh, *head, (struct vringh_kiov *)riov, | |
688 | (struct vringh_kiov *)wiov, | |
689 | range_check, getrange, GFP_KERNEL, copydesc_user); | |
690 | if (err) | |
691 | return err; | |
692 | ||
693 | return 1; | |
694 | } | |
695 | EXPORT_SYMBOL(vringh_getdesc_user); | |
696 | ||
697 | /** | |
698 | * vringh_iov_pull_user - copy bytes from vring_iov. | |
699 | * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume) | |
700 | * @dst: the place to copy. | |
701 | * @len: the maximum length to copy. | |
702 | * | |
703 | * Returns the bytes copied <= len or a negative errno. | |
704 | */ | |
705 | ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len) | |
706 | { | |
707 | return vringh_iov_xfer((struct vringh_kiov *)riov, | |
708 | dst, len, xfer_from_user); | |
709 | } | |
710 | EXPORT_SYMBOL(vringh_iov_pull_user); | |
711 | ||
712 | /** | |
713 | * vringh_iov_push_user - copy bytes into vring_iov. | |
714 | * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume) | |
715 | * @dst: the place to copy. | |
716 | * @len: the maximum length to copy. | |
717 | * | |
718 | * Returns the bytes copied <= len or a negative errno. | |
719 | */ | |
720 | ssize_t vringh_iov_push_user(struct vringh_iov *wiov, | |
721 | const void *src, size_t len) | |
722 | { | |
723 | return vringh_iov_xfer((struct vringh_kiov *)wiov, | |
724 | (void *)src, len, xfer_to_user); | |
725 | } | |
726 | EXPORT_SYMBOL(vringh_iov_push_user); | |
727 | ||
728 | /** | |
729 | * vringh_abandon_user - we've decided not to handle the descriptor(s). | |
730 | * @vrh: the vring. | |
731 | * @num: the number of descriptors to put back (ie. num | |
732 | * vringh_get_user() to undo). | |
733 | * | |
734 | * The next vringh_get_user() will return the old descriptor(s) again. | |
735 | */ | |
736 | void vringh_abandon_user(struct vringh *vrh, unsigned int num) | |
737 | { | |
738 | /* We only update vring_avail_event(vr) when we want to be notified, | |
739 | * so we haven't changed that yet. */ | |
740 | vrh->last_avail_idx -= num; | |
741 | } | |
742 | EXPORT_SYMBOL(vringh_abandon_user); | |
743 | ||
744 | /** | |
745 | * vringh_complete_user - we've finished with descriptor, publish it. | |
746 | * @vrh: the vring. | |
747 | * @head: the head as filled in by vringh_getdesc_user. | |
748 | * @len: the length of data we have written. | |
749 | * | |
750 | * You should check vringh_need_notify_user() after one or more calls | |
751 | * to this function. | |
752 | */ | |
753 | int vringh_complete_user(struct vringh *vrh, u16 head, u32 len) | |
754 | { | |
755 | struct vring_used_elem used; | |
756 | ||
b9f7ac8c MT |
757 | used.id = cpu_to_vringh32(vrh, head); |
758 | used.len = cpu_to_vringh32(vrh, len); | |
f87d0fbb RR |
759 | return __vringh_complete(vrh, &used, 1, putu16_user, putused_user); |
760 | } | |
761 | EXPORT_SYMBOL(vringh_complete_user); | |
762 | ||
763 | /** | |
764 | * vringh_complete_multi_user - we've finished with many descriptors. | |
765 | * @vrh: the vring. | |
766 | * @used: the head, length pairs. | |
767 | * @num_used: the number of used elements. | |
768 | * | |
769 | * You should check vringh_need_notify_user() after one or more calls | |
770 | * to this function. | |
771 | */ | |
772 | int vringh_complete_multi_user(struct vringh *vrh, | |
773 | const struct vring_used_elem used[], | |
774 | unsigned num_used) | |
775 | { | |
776 | return __vringh_complete(vrh, used, num_used, | |
777 | putu16_user, putused_user); | |
778 | } | |
779 | EXPORT_SYMBOL(vringh_complete_multi_user); | |
780 | ||
781 | /** | |
782 | * vringh_notify_enable_user - we want to know if something changes. | |
783 | * @vrh: the vring. | |
784 | * | |
785 | * This always enables notifications, but returns false if there are | |
786 | * now more buffers available in the vring. | |
787 | */ | |
788 | bool vringh_notify_enable_user(struct vringh *vrh) | |
789 | { | |
790 | return __vringh_notify_enable(vrh, getu16_user, putu16_user); | |
791 | } | |
792 | EXPORT_SYMBOL(vringh_notify_enable_user); | |
793 | ||
794 | /** | |
795 | * vringh_notify_disable_user - don't tell us if something changes. | |
796 | * @vrh: the vring. | |
797 | * | |
798 | * This is our normal running state: we disable and then only enable when | |
799 | * we're going to sleep. | |
800 | */ | |
801 | void vringh_notify_disable_user(struct vringh *vrh) | |
802 | { | |
803 | __vringh_notify_disable(vrh, putu16_user); | |
804 | } | |
805 | EXPORT_SYMBOL(vringh_notify_disable_user); | |
806 | ||
807 | /** | |
808 | * vringh_need_notify_user - must we tell the other side about used buffers? | |
809 | * @vrh: the vring we've called vringh_complete_user() on. | |
810 | * | |
811 | * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. | |
812 | */ | |
813 | int vringh_need_notify_user(struct vringh *vrh) | |
814 | { | |
815 | return __vringh_need_notify(vrh, getu16_user); | |
816 | } | |
817 | EXPORT_SYMBOL(vringh_need_notify_user); | |
818 | ||
819 | /* Kernelspace access helpers. */ | |
b9f7ac8c MT |
820 | static inline int getu16_kern(const struct vringh *vrh, |
821 | u16 *val, const __virtio16 *p) | |
f87d0fbb | 822 | { |
b9f7ac8c | 823 | *val = vringh16_to_cpu(vrh, ACCESS_ONCE(*p)); |
f87d0fbb RR |
824 | return 0; |
825 | } | |
826 | ||
b9f7ac8c | 827 | static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val) |
f87d0fbb | 828 | { |
b9f7ac8c | 829 | ACCESS_ONCE(*p) = cpu_to_vringh16(vrh, val); |
f87d0fbb RR |
830 | return 0; |
831 | } | |
832 | ||
833 | static inline int copydesc_kern(void *dst, const void *src, size_t len) | |
834 | { | |
835 | memcpy(dst, src, len); | |
836 | return 0; | |
837 | } | |
838 | ||
839 | static inline int putused_kern(struct vring_used_elem *dst, | |
840 | const struct vring_used_elem *src, | |
841 | unsigned int num) | |
842 | { | |
843 | memcpy(dst, src, num * sizeof(*dst)); | |
844 | return 0; | |
845 | } | |
846 | ||
847 | static inline int xfer_kern(void *src, void *dst, size_t len) | |
848 | { | |
849 | memcpy(dst, src, len); | |
850 | return 0; | |
851 | } | |
852 | ||
853 | /** | |
854 | * vringh_init_kern - initialize a vringh for a kernelspace vring. | |
855 | * @vrh: the vringh to initialize. | |
856 | * @features: the feature bits for this ring. | |
857 | * @num: the number of elements. | |
858 | * @weak_barriers: true if we only need memory barriers, not I/O. | |
859 | * @desc: the userpace descriptor pointer. | |
860 | * @avail: the userpace avail pointer. | |
861 | * @used: the userpace used pointer. | |
862 | * | |
863 | * Returns an error if num is invalid. | |
864 | */ | |
b97a8a90 | 865 | int vringh_init_kern(struct vringh *vrh, u64 features, |
f87d0fbb RR |
866 | unsigned int num, bool weak_barriers, |
867 | struct vring_desc *desc, | |
868 | struct vring_avail *avail, | |
869 | struct vring_used *used) | |
870 | { | |
871 | /* Sane power of 2 please! */ | |
872 | if (!num || num > 0xffff || (num & (num - 1))) { | |
873 | vringh_bad("Bad ring size %u", num); | |
874 | return -EINVAL; | |
875 | } | |
876 | ||
b9f7ac8c | 877 | vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1)); |
f87d0fbb RR |
878 | vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); |
879 | vrh->weak_barriers = weak_barriers; | |
880 | vrh->completed = 0; | |
881 | vrh->last_avail_idx = 0; | |
882 | vrh->last_used_idx = 0; | |
883 | vrh->vring.num = num; | |
884 | vrh->vring.desc = desc; | |
885 | vrh->vring.avail = avail; | |
886 | vrh->vring.used = used; | |
887 | return 0; | |
888 | } | |
889 | EXPORT_SYMBOL(vringh_init_kern); | |
890 | ||
891 | /** | |
892 | * vringh_getdesc_kern - get next available descriptor from kernelspace ring. | |
893 | * @vrh: the kernelspace vring. | |
894 | * @riov: where to put the readable descriptors (or NULL) | |
895 | * @wiov: where to put the writable descriptors (or NULL) | |
896 | * @head: head index we received, for passing to vringh_complete_kern(). | |
897 | * @gfp: flags for allocating larger riov/wiov. | |
898 | * | |
899 | * Returns 0 if there was no descriptor, 1 if there was, or -errno. | |
900 | * | |
901 | * Note that on error return, you can tell the difference between an | |
902 | * invalid ring and a single invalid descriptor: in the former case, | |
903 | * *head will be vrh->vring.num. You may be able to ignore an invalid | |
904 | * descriptor, but there's not much you can do with an invalid ring. | |
905 | * | |
906 | * Note that you may need to clean up riov and wiov, even on error! | |
907 | */ | |
908 | int vringh_getdesc_kern(struct vringh *vrh, | |
909 | struct vringh_kiov *riov, | |
910 | struct vringh_kiov *wiov, | |
911 | u16 *head, | |
912 | gfp_t gfp) | |
913 | { | |
914 | int err; | |
915 | ||
916 | err = __vringh_get_head(vrh, getu16_kern, &vrh->last_avail_idx); | |
917 | if (err < 0) | |
918 | return err; | |
919 | ||
920 | /* Empty... */ | |
921 | if (err == vrh->vring.num) | |
922 | return 0; | |
923 | ||
924 | *head = err; | |
925 | err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL, | |
926 | gfp, copydesc_kern); | |
927 | if (err) | |
928 | return err; | |
929 | ||
930 | return 1; | |
931 | } | |
932 | EXPORT_SYMBOL(vringh_getdesc_kern); | |
933 | ||
934 | /** | |
935 | * vringh_iov_pull_kern - copy bytes from vring_iov. | |
936 | * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume) | |
937 | * @dst: the place to copy. | |
938 | * @len: the maximum length to copy. | |
939 | * | |
940 | * Returns the bytes copied <= len or a negative errno. | |
941 | */ | |
942 | ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len) | |
943 | { | |
944 | return vringh_iov_xfer(riov, dst, len, xfer_kern); | |
945 | } | |
946 | EXPORT_SYMBOL(vringh_iov_pull_kern); | |
947 | ||
948 | /** | |
949 | * vringh_iov_push_kern - copy bytes into vring_iov. | |
950 | * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume) | |
951 | * @dst: the place to copy. | |
952 | * @len: the maximum length to copy. | |
953 | * | |
954 | * Returns the bytes copied <= len or a negative errno. | |
955 | */ | |
956 | ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov, | |
957 | const void *src, size_t len) | |
958 | { | |
959 | return vringh_iov_xfer(wiov, (void *)src, len, xfer_kern); | |
960 | } | |
961 | EXPORT_SYMBOL(vringh_iov_push_kern); | |
962 | ||
963 | /** | |
964 | * vringh_abandon_kern - we've decided not to handle the descriptor(s). | |
965 | * @vrh: the vring. | |
966 | * @num: the number of descriptors to put back (ie. num | |
967 | * vringh_get_kern() to undo). | |
968 | * | |
969 | * The next vringh_get_kern() will return the old descriptor(s) again. | |
970 | */ | |
971 | void vringh_abandon_kern(struct vringh *vrh, unsigned int num) | |
972 | { | |
973 | /* We only update vring_avail_event(vr) when we want to be notified, | |
974 | * so we haven't changed that yet. */ | |
975 | vrh->last_avail_idx -= num; | |
976 | } | |
977 | EXPORT_SYMBOL(vringh_abandon_kern); | |
978 | ||
979 | /** | |
980 | * vringh_complete_kern - we've finished with descriptor, publish it. | |
981 | * @vrh: the vring. | |
982 | * @head: the head as filled in by vringh_getdesc_kern. | |
983 | * @len: the length of data we have written. | |
984 | * | |
985 | * You should check vringh_need_notify_kern() after one or more calls | |
986 | * to this function. | |
987 | */ | |
988 | int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len) | |
989 | { | |
990 | struct vring_used_elem used; | |
991 | ||
b9f7ac8c MT |
992 | used.id = cpu_to_vringh32(vrh, head); |
993 | used.len = cpu_to_vringh32(vrh, len); | |
f87d0fbb RR |
994 | |
995 | return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern); | |
996 | } | |
997 | EXPORT_SYMBOL(vringh_complete_kern); | |
998 | ||
999 | /** | |
1000 | * vringh_notify_enable_kern - we want to know if something changes. | |
1001 | * @vrh: the vring. | |
1002 | * | |
1003 | * This always enables notifications, but returns false if there are | |
1004 | * now more buffers available in the vring. | |
1005 | */ | |
1006 | bool vringh_notify_enable_kern(struct vringh *vrh) | |
1007 | { | |
1008 | return __vringh_notify_enable(vrh, getu16_kern, putu16_kern); | |
1009 | } | |
1010 | EXPORT_SYMBOL(vringh_notify_enable_kern); | |
1011 | ||
1012 | /** | |
1013 | * vringh_notify_disable_kern - don't tell us if something changes. | |
1014 | * @vrh: the vring. | |
1015 | * | |
1016 | * This is our normal running state: we disable and then only enable when | |
1017 | * we're going to sleep. | |
1018 | */ | |
1019 | void vringh_notify_disable_kern(struct vringh *vrh) | |
1020 | { | |
1021 | __vringh_notify_disable(vrh, putu16_kern); | |
1022 | } | |
1023 | EXPORT_SYMBOL(vringh_notify_disable_kern); | |
1024 | ||
1025 | /** | |
1026 | * vringh_need_notify_kern - must we tell the other side about used buffers? | |
1027 | * @vrh: the vring we've called vringh_complete_kern() on. | |
1028 | * | |
1029 | * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. | |
1030 | */ | |
1031 | int vringh_need_notify_kern(struct vringh *vrh) | |
1032 | { | |
1033 | return __vringh_need_notify(vrh, getu16_kern); | |
1034 | } | |
1035 | EXPORT_SYMBOL(vringh_need_notify_kern); | |
f558a845 DJ |
1036 | |
1037 | MODULE_LICENSE("GPL"); |