]> Git Repo - linux.git/blob - drivers/virtio/virtio_ring.c
Linux 6.14-rc3
[linux.git] / drivers / virtio / virtio_ring.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Virtio ring implementation.
3  *
4  *  Copyright 2007 Rusty Russell IBM Corporation
5  */
6 #include <linux/virtio.h>
7 #include <linux/virtio_ring.h>
8 #include <linux/virtio_config.h>
9 #include <linux/device.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/hrtimer.h>
13 #include <linux/dma-mapping.h>
14 #include <linux/kmsan.h>
15 #include <linux/spinlock.h>
16 #include <xen/xen.h>
17
18 #ifdef DEBUG
19 /* For development, we want to crash whenever the ring is screwed. */
20 #define BAD_RING(_vq, fmt, args...)                             \
21         do {                                                    \
22                 dev_err(&(_vq)->vq.vdev->dev,                   \
23                         "%s:"fmt, (_vq)->vq.name, ##args);      \
24                 BUG();                                          \
25         } while (0)
26 /* Caller is supposed to guarantee no reentry. */
27 #define START_USE(_vq)                                          \
28         do {                                                    \
29                 if ((_vq)->in_use)                              \
30                         panic("%s:in_use = %i\n",               \
31                               (_vq)->vq.name, (_vq)->in_use);   \
32                 (_vq)->in_use = __LINE__;                       \
33         } while (0)
34 #define END_USE(_vq) \
35         do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
36 #define LAST_ADD_TIME_UPDATE(_vq)                               \
37         do {                                                    \
38                 ktime_t now = ktime_get();                      \
39                                                                 \
40                 /* No kick or get, with .1 second between?  Warn. */ \
41                 if ((_vq)->last_add_time_valid)                 \
42                         WARN_ON(ktime_to_ms(ktime_sub(now,      \
43                                 (_vq)->last_add_time)) > 100);  \
44                 (_vq)->last_add_time = now;                     \
45                 (_vq)->last_add_time_valid = true;              \
46         } while (0)
47 #define LAST_ADD_TIME_CHECK(_vq)                                \
48         do {                                                    \
49                 if ((_vq)->last_add_time_valid) {               \
50                         WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
51                                       (_vq)->last_add_time)) > 100); \
52                 }                                               \
53         } while (0)
54 #define LAST_ADD_TIME_INVALID(_vq)                              \
55         ((_vq)->last_add_time_valid = false)
56 #else
57 #define BAD_RING(_vq, fmt, args...)                             \
58         do {                                                    \
59                 dev_err(&_vq->vq.vdev->dev,                     \
60                         "%s:"fmt, (_vq)->vq.name, ##args);      \
61                 (_vq)->broken = true;                           \
62         } while (0)
63 #define START_USE(vq)
64 #define END_USE(vq)
65 #define LAST_ADD_TIME_UPDATE(vq)
66 #define LAST_ADD_TIME_CHECK(vq)
67 #define LAST_ADD_TIME_INVALID(vq)
68 #endif
69
70 struct vring_desc_state_split {
71         void *data;                     /* Data for callback. */
72
73         /* Indirect desc table and extra table, if any. These two will be
74          * allocated together. So we won't stress more to the memory allocator.
75          */
76         struct vring_desc *indir_desc;
77 };
78
79 struct vring_desc_state_packed {
80         void *data;                     /* Data for callback. */
81
82         /* Indirect desc table and extra table, if any. These two will be
83          * allocated together. So we won't stress more to the memory allocator.
84          */
85         struct vring_packed_desc *indir_desc;
86         u16 num;                        /* Descriptor list length. */
87         u16 last;                       /* The last desc state in a list. */
88 };
89
90 struct vring_desc_extra {
91         dma_addr_t addr;                /* Descriptor DMA addr. */
92         u32 len;                        /* Descriptor length. */
93         u16 flags;                      /* Descriptor flags. */
94         u16 next;                       /* The next desc state in a list. */
95 };
96
97 struct vring_virtqueue_split {
98         /* Actual memory layout for this queue. */
99         struct vring vring;
100
101         /* Last written value to avail->flags */
102         u16 avail_flags_shadow;
103
104         /*
105          * Last written value to avail->idx in
106          * guest byte order.
107          */
108         u16 avail_idx_shadow;
109
110         /* Per-descriptor state. */
111         struct vring_desc_state_split *desc_state;
112         struct vring_desc_extra *desc_extra;
113
114         /* DMA address and size information */
115         dma_addr_t queue_dma_addr;
116         size_t queue_size_in_bytes;
117
118         /*
119          * The parameters for creating vrings are reserved for creating new
120          * vring.
121          */
122         u32 vring_align;
123         bool may_reduce_num;
124 };
125
126 struct vring_virtqueue_packed {
127         /* Actual memory layout for this queue. */
128         struct {
129                 unsigned int num;
130                 struct vring_packed_desc *desc;
131                 struct vring_packed_desc_event *driver;
132                 struct vring_packed_desc_event *device;
133         } vring;
134
135         /* Driver ring wrap counter. */
136         bool avail_wrap_counter;
137
138         /* Avail used flags. */
139         u16 avail_used_flags;
140
141         /* Index of the next avail descriptor. */
142         u16 next_avail_idx;
143
144         /*
145          * Last written value to driver->flags in
146          * guest byte order.
147          */
148         u16 event_flags_shadow;
149
150         /* Per-descriptor state. */
151         struct vring_desc_state_packed *desc_state;
152         struct vring_desc_extra *desc_extra;
153
154         /* DMA address and size information */
155         dma_addr_t ring_dma_addr;
156         dma_addr_t driver_event_dma_addr;
157         dma_addr_t device_event_dma_addr;
158         size_t ring_size_in_bytes;
159         size_t event_size_in_bytes;
160 };
161
162 struct vring_virtqueue {
163         struct virtqueue vq;
164
165         /* Is this a packed ring? */
166         bool packed_ring;
167
168         /* Is DMA API used? */
169         bool use_dma_api;
170
171         /* Can we use weak barriers? */
172         bool weak_barriers;
173
174         /* Other side has made a mess, don't try any more. */
175         bool broken;
176
177         /* Host supports indirect buffers */
178         bool indirect;
179
180         /* Host publishes avail event idx */
181         bool event;
182
183         /* Head of free buffer list. */
184         unsigned int free_head;
185         /* Number we've added since last sync. */
186         unsigned int num_added;
187
188         /* Last used index  we've seen.
189          * for split ring, it just contains last used index
190          * for packed ring:
191          * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
192          * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
193          */
194         u16 last_used_idx;
195
196         /* Hint for event idx: already triggered no need to disable. */
197         bool event_triggered;
198
199         union {
200                 /* Available for split ring */
201                 struct vring_virtqueue_split split;
202
203                 /* Available for packed ring */
204                 struct vring_virtqueue_packed packed;
205         };
206
207         /* How to notify other side. FIXME: commonalize hcalls! */
208         bool (*notify)(struct virtqueue *vq);
209
210         /* DMA, allocation, and size information */
211         bool we_own_ring;
212
213         /* Device used for doing DMA */
214         struct device *dma_dev;
215
216 #ifdef DEBUG
217         /* They're supposed to lock for us. */
218         unsigned int in_use;
219
220         /* Figure out if their kicks are too delayed. */
221         bool last_add_time_valid;
222         ktime_t last_add_time;
223 #endif
224 };
225
226 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
227 static void vring_free(struct virtqueue *_vq);
228
229 /*
230  * Helpers.
231  */
232
233 #define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq)
234
235 static bool virtqueue_use_indirect(const struct vring_virtqueue *vq,
236                                    unsigned int total_sg)
237 {
238         /*
239          * If the host supports indirect descriptor tables, and we have multiple
240          * buffers, then go indirect. FIXME: tune this threshold
241          */
242         return (vq->indirect && total_sg > 1 && vq->vq.num_free);
243 }
244
245 /*
246  * Modern virtio devices have feature bits to specify whether they need a
247  * quirk and bypass the IOMMU. If not there, just use the DMA API.
248  *
249  * If there, the interaction between virtio and DMA API is messy.
250  *
251  * On most systems with virtio, physical addresses match bus addresses,
252  * and it doesn't particularly matter whether we use the DMA API.
253  *
254  * On some systems, including Xen and any system with a physical device
255  * that speaks virtio behind a physical IOMMU, we must use the DMA API
256  * for virtio DMA to work at all.
257  *
258  * On other systems, including SPARC and PPC64, virtio-pci devices are
259  * enumerated as though they are behind an IOMMU, but the virtio host
260  * ignores the IOMMU, so we must either pretend that the IOMMU isn't
261  * there or somehow map everything as the identity.
262  *
263  * For the time being, we preserve historic behavior and bypass the DMA
264  * API.
265  *
266  * TODO: install a per-device DMA ops structure that does the right thing
267  * taking into account all the above quirks, and use the DMA API
268  * unconditionally on data path.
269  */
270
271 static bool vring_use_dma_api(const struct virtio_device *vdev)
272 {
273         if (!virtio_has_dma_quirk(vdev))
274                 return true;
275
276         /* Otherwise, we are left to guess. */
277         /*
278          * In theory, it's possible to have a buggy QEMU-supposed
279          * emulated Q35 IOMMU and Xen enabled at the same time.  On
280          * such a configuration, virtio has never worked and will
281          * not work without an even larger kludge.  Instead, enable
282          * the DMA API if we're a Xen guest, which at least allows
283          * all of the sensible Xen configurations to work correctly.
284          */
285         if (xen_domain())
286                 return true;
287
288         return false;
289 }
290
291 static bool vring_need_unmap_buffer(const struct vring_virtqueue *vring,
292                                     const struct vring_desc_extra *extra)
293 {
294         return vring->use_dma_api && (extra->addr != DMA_MAPPING_ERROR);
295 }
296
297 size_t virtio_max_dma_size(const struct virtio_device *vdev)
298 {
299         size_t max_segment_size = SIZE_MAX;
300
301         if (vring_use_dma_api(vdev))
302                 max_segment_size = dma_max_mapping_size(vdev->dev.parent);
303
304         return max_segment_size;
305 }
306 EXPORT_SYMBOL_GPL(virtio_max_dma_size);
307
308 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
309                                dma_addr_t *dma_handle, gfp_t flag,
310                                struct device *dma_dev)
311 {
312         if (vring_use_dma_api(vdev)) {
313                 return dma_alloc_coherent(dma_dev, size,
314                                           dma_handle, flag);
315         } else {
316                 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
317
318                 if (queue) {
319                         phys_addr_t phys_addr = virt_to_phys(queue);
320                         *dma_handle = (dma_addr_t)phys_addr;
321
322                         /*
323                          * Sanity check: make sure we dind't truncate
324                          * the address.  The only arches I can find that
325                          * have 64-bit phys_addr_t but 32-bit dma_addr_t
326                          * are certain non-highmem MIPS and x86
327                          * configurations, but these configurations
328                          * should never allocate physical pages above 32
329                          * bits, so this is fine.  Just in case, throw a
330                          * warning and abort if we end up with an
331                          * unrepresentable address.
332                          */
333                         if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
334                                 free_pages_exact(queue, PAGE_ALIGN(size));
335                                 return NULL;
336                         }
337                 }
338                 return queue;
339         }
340 }
341
342 static void vring_free_queue(struct virtio_device *vdev, size_t size,
343                              void *queue, dma_addr_t dma_handle,
344                              struct device *dma_dev)
345 {
346         if (vring_use_dma_api(vdev))
347                 dma_free_coherent(dma_dev, size, queue, dma_handle);
348         else
349                 free_pages_exact(queue, PAGE_ALIGN(size));
350 }
351
352 /*
353  * The DMA ops on various arches are rather gnarly right now, and
354  * making all of the arch DMA ops work on the vring device itself
355  * is a mess.
356  */
357 static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
358 {
359         return vq->dma_dev;
360 }
361
362 /* Map one sg entry. */
363 static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
364                             enum dma_data_direction direction, dma_addr_t *addr,
365                             u32 *len, bool premapped)
366 {
367         if (premapped) {
368                 *addr = sg_dma_address(sg);
369                 *len = sg_dma_len(sg);
370                 return 0;
371         }
372
373         *len = sg->length;
374
375         if (!vq->use_dma_api) {
376                 /*
377                  * If DMA is not used, KMSAN doesn't know that the scatterlist
378                  * is initialized by the hardware. Explicitly check/unpoison it
379                  * depending on the direction.
380                  */
381                 kmsan_handle_dma(sg_page(sg), sg->offset, sg->length, direction);
382                 *addr = (dma_addr_t)sg_phys(sg);
383                 return 0;
384         }
385
386         /*
387          * We can't use dma_map_sg, because we don't use scatterlists in
388          * the way it expects (we don't guarantee that the scatterlist
389          * will exist for the lifetime of the mapping).
390          */
391         *addr = dma_map_page(vring_dma_dev(vq),
392                             sg_page(sg), sg->offset, sg->length,
393                             direction);
394
395         if (dma_mapping_error(vring_dma_dev(vq), *addr))
396                 return -ENOMEM;
397
398         return 0;
399 }
400
401 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
402                                    void *cpu_addr, size_t size,
403                                    enum dma_data_direction direction)
404 {
405         if (!vq->use_dma_api)
406                 return (dma_addr_t)virt_to_phys(cpu_addr);
407
408         return dma_map_single(vring_dma_dev(vq),
409                               cpu_addr, size, direction);
410 }
411
412 static int vring_mapping_error(const struct vring_virtqueue *vq,
413                                dma_addr_t addr)
414 {
415         if (!vq->use_dma_api)
416                 return 0;
417
418         return dma_mapping_error(vring_dma_dev(vq), addr);
419 }
420
421 static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
422 {
423         vq->vq.num_free = num;
424
425         if (vq->packed_ring)
426                 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
427         else
428                 vq->last_used_idx = 0;
429
430         vq->event_triggered = false;
431         vq->num_added = 0;
432
433 #ifdef DEBUG
434         vq->in_use = false;
435         vq->last_add_time_valid = false;
436 #endif
437 }
438
439
440 /*
441  * Split ring specific functions - *_split().
442  */
443
444 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
445                                           struct vring_desc_extra *extra)
446 {
447         u16 flags;
448
449         flags = extra->flags;
450
451         if (flags & VRING_DESC_F_INDIRECT) {
452                 if (!vq->use_dma_api)
453                         goto out;
454
455                 dma_unmap_single(vring_dma_dev(vq),
456                                  extra->addr,
457                                  extra->len,
458                                  (flags & VRING_DESC_F_WRITE) ?
459                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
460         } else {
461                 if (!vring_need_unmap_buffer(vq, extra))
462                         goto out;
463
464                 dma_unmap_page(vring_dma_dev(vq),
465                                extra->addr,
466                                extra->len,
467                                (flags & VRING_DESC_F_WRITE) ?
468                                DMA_FROM_DEVICE : DMA_TO_DEVICE);
469         }
470
471 out:
472         return extra->next;
473 }
474
475 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
476                                                unsigned int total_sg,
477                                                gfp_t gfp)
478 {
479         struct vring_desc_extra *extra;
480         struct vring_desc *desc;
481         unsigned int i, size;
482
483         /*
484          * We require lowmem mappings for the descriptors because
485          * otherwise virt_to_phys will give us bogus addresses in the
486          * virtqueue.
487          */
488         gfp &= ~__GFP_HIGHMEM;
489
490         size = sizeof(*desc) * total_sg + sizeof(*extra) * total_sg;
491
492         desc = kmalloc(size, gfp);
493         if (!desc)
494                 return NULL;
495
496         extra = (struct vring_desc_extra *)&desc[total_sg];
497
498         for (i = 0; i < total_sg; i++)
499                 extra[i].next = i + 1;
500
501         return desc;
502 }
503
504 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
505                                                     struct vring_desc *desc,
506                                                     struct vring_desc_extra *extra,
507                                                     unsigned int i,
508                                                     dma_addr_t addr,
509                                                     unsigned int len,
510                                                     u16 flags, bool premapped)
511 {
512         u16 next;
513
514         desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
515         desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
516         desc[i].len = cpu_to_virtio32(vq->vdev, len);
517
518         extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr;
519         extra[i].len = len;
520         extra[i].flags = flags;
521
522         next = extra[i].next;
523
524         desc[i].next = cpu_to_virtio16(vq->vdev, next);
525
526         return next;
527 }
528
529 static inline int virtqueue_add_split(struct virtqueue *_vq,
530                                       struct scatterlist *sgs[],
531                                       unsigned int total_sg,
532                                       unsigned int out_sgs,
533                                       unsigned int in_sgs,
534                                       void *data,
535                                       void *ctx,
536                                       bool premapped,
537                                       gfp_t gfp)
538 {
539         struct vring_virtqueue *vq = to_vvq(_vq);
540         struct vring_desc_extra *extra;
541         struct scatterlist *sg;
542         struct vring_desc *desc;
543         unsigned int i, n, avail, descs_used, prev, err_idx;
544         int head;
545         bool indirect;
546
547         START_USE(vq);
548
549         BUG_ON(data == NULL);
550         BUG_ON(ctx && vq->indirect);
551
552         if (unlikely(vq->broken)) {
553                 END_USE(vq);
554                 return -EIO;
555         }
556
557         LAST_ADD_TIME_UPDATE(vq);
558
559         BUG_ON(total_sg == 0);
560
561         head = vq->free_head;
562
563         if (virtqueue_use_indirect(vq, total_sg))
564                 desc = alloc_indirect_split(_vq, total_sg, gfp);
565         else {
566                 desc = NULL;
567                 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
568         }
569
570         if (desc) {
571                 /* Use a single buffer which doesn't continue */
572                 indirect = true;
573                 /* Set up rest to use this indirect table. */
574                 i = 0;
575                 descs_used = 1;
576                 extra = (struct vring_desc_extra *)&desc[total_sg];
577         } else {
578                 indirect = false;
579                 desc = vq->split.vring.desc;
580                 extra = vq->split.desc_extra;
581                 i = head;
582                 descs_used = total_sg;
583         }
584
585         if (unlikely(vq->vq.num_free < descs_used)) {
586                 pr_debug("Can't add buf len %i - avail = %i\n",
587                          descs_used, vq->vq.num_free);
588                 /* FIXME: for historical reasons, we force a notify here if
589                  * there are outgoing parts to the buffer.  Presumably the
590                  * host should service the ring ASAP. */
591                 if (out_sgs)
592                         vq->notify(&vq->vq);
593                 if (indirect)
594                         kfree(desc);
595                 END_USE(vq);
596                 return -ENOSPC;
597         }
598
599         for (n = 0; n < out_sgs; n++) {
600                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
601                         dma_addr_t addr;
602                         u32 len;
603
604                         if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr, &len, premapped))
605                                 goto unmap_release;
606
607                         prev = i;
608                         /* Note that we trust indirect descriptor
609                          * table since it use stream DMA mapping.
610                          */
611                         i = virtqueue_add_desc_split(_vq, desc, extra, i, addr, len,
612                                                      VRING_DESC_F_NEXT,
613                                                      premapped);
614                 }
615         }
616         for (; n < (out_sgs + in_sgs); n++) {
617                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
618                         dma_addr_t addr;
619                         u32 len;
620
621                         if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr, &len, premapped))
622                                 goto unmap_release;
623
624                         prev = i;
625                         /* Note that we trust indirect descriptor
626                          * table since it use stream DMA mapping.
627                          */
628                         i = virtqueue_add_desc_split(_vq, desc, extra, i, addr, len,
629                                                      VRING_DESC_F_NEXT |
630                                                      VRING_DESC_F_WRITE,
631                                                      premapped);
632                 }
633         }
634         /* Last one doesn't continue. */
635         desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
636         if (!indirect && vring_need_unmap_buffer(vq, &extra[prev]))
637                 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
638                         ~VRING_DESC_F_NEXT;
639
640         if (indirect) {
641                 /* Now that the indirect table is filled in, map it. */
642                 dma_addr_t addr = vring_map_single(
643                         vq, desc, total_sg * sizeof(struct vring_desc),
644                         DMA_TO_DEVICE);
645                 if (vring_mapping_error(vq, addr))
646                         goto unmap_release;
647
648                 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
649                                          vq->split.desc_extra,
650                                          head, addr,
651                                          total_sg * sizeof(struct vring_desc),
652                                          VRING_DESC_F_INDIRECT, false);
653         }
654
655         /* We're using some buffers from the free list. */
656         vq->vq.num_free -= descs_used;
657
658         /* Update free pointer */
659         if (indirect)
660                 vq->free_head = vq->split.desc_extra[head].next;
661         else
662                 vq->free_head = i;
663
664         /* Store token and indirect buffer state. */
665         vq->split.desc_state[head].data = data;
666         if (indirect)
667                 vq->split.desc_state[head].indir_desc = desc;
668         else
669                 vq->split.desc_state[head].indir_desc = ctx;
670
671         /* Put entry in available array (but don't update avail->idx until they
672          * do sync). */
673         avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
674         vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
675
676         /* Descriptors and available array need to be set before we expose the
677          * new available array entries. */
678         virtio_wmb(vq->weak_barriers);
679         vq->split.avail_idx_shadow++;
680         vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
681                                                 vq->split.avail_idx_shadow);
682         vq->num_added++;
683
684         pr_debug("Added buffer head %i to %p\n", head, vq);
685         END_USE(vq);
686
687         /* This is very unlikely, but theoretically possible.  Kick
688          * just in case. */
689         if (unlikely(vq->num_added == (1 << 16) - 1))
690                 virtqueue_kick(_vq);
691
692         return 0;
693
694 unmap_release:
695         err_idx = i;
696
697         if (indirect)
698                 i = 0;
699         else
700                 i = head;
701
702         for (n = 0; n < total_sg; n++) {
703                 if (i == err_idx)
704                         break;
705
706                 i = vring_unmap_one_split(vq, &extra[i]);
707         }
708
709         if (indirect)
710                 kfree(desc);
711
712         END_USE(vq);
713         return -ENOMEM;
714 }
715
716 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
717 {
718         struct vring_virtqueue *vq = to_vvq(_vq);
719         u16 new, old;
720         bool needs_kick;
721
722         START_USE(vq);
723         /* We need to expose available array entries before checking avail
724          * event. */
725         virtio_mb(vq->weak_barriers);
726
727         old = vq->split.avail_idx_shadow - vq->num_added;
728         new = vq->split.avail_idx_shadow;
729         vq->num_added = 0;
730
731         LAST_ADD_TIME_CHECK(vq);
732         LAST_ADD_TIME_INVALID(vq);
733
734         if (vq->event) {
735                 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
736                                         vring_avail_event(&vq->split.vring)),
737                                               new, old);
738         } else {
739                 needs_kick = !(vq->split.vring.used->flags &
740                                         cpu_to_virtio16(_vq->vdev,
741                                                 VRING_USED_F_NO_NOTIFY));
742         }
743         END_USE(vq);
744         return needs_kick;
745 }
746
747 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
748                              void **ctx)
749 {
750         struct vring_desc_extra *extra;
751         unsigned int i, j;
752         __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
753
754         /* Clear data ptr. */
755         vq->split.desc_state[head].data = NULL;
756
757         extra = vq->split.desc_extra;
758
759         /* Put back on free list: unmap first-level descriptors and find end */
760         i = head;
761
762         while (vq->split.vring.desc[i].flags & nextflag) {
763                 vring_unmap_one_split(vq, &extra[i]);
764                 i = vq->split.desc_extra[i].next;
765                 vq->vq.num_free++;
766         }
767
768         vring_unmap_one_split(vq, &extra[i]);
769         vq->split.desc_extra[i].next = vq->free_head;
770         vq->free_head = head;
771
772         /* Plus final descriptor */
773         vq->vq.num_free++;
774
775         if (vq->indirect) {
776                 struct vring_desc *indir_desc =
777                                 vq->split.desc_state[head].indir_desc;
778                 u32 len, num;
779
780                 /* Free the indirect table, if any, now that it's unmapped. */
781                 if (!indir_desc)
782                         return;
783                 len = vq->split.desc_extra[head].len;
784
785                 BUG_ON(!(vq->split.desc_extra[head].flags &
786                                 VRING_DESC_F_INDIRECT));
787                 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
788
789                 num = len / sizeof(struct vring_desc);
790
791                 extra = (struct vring_desc_extra *)&indir_desc[num];
792
793                 if (vq->use_dma_api) {
794                         for (j = 0; j < num; j++)
795                                 vring_unmap_one_split(vq, &extra[j]);
796                 }
797
798                 kfree(indir_desc);
799                 vq->split.desc_state[head].indir_desc = NULL;
800         } else if (ctx) {
801                 *ctx = vq->split.desc_state[head].indir_desc;
802         }
803 }
804
805 static bool more_used_split(const struct vring_virtqueue *vq)
806 {
807         return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
808                         vq->split.vring.used->idx);
809 }
810
811 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
812                                          unsigned int *len,
813                                          void **ctx)
814 {
815         struct vring_virtqueue *vq = to_vvq(_vq);
816         void *ret;
817         unsigned int i;
818         u16 last_used;
819
820         START_USE(vq);
821
822         if (unlikely(vq->broken)) {
823                 END_USE(vq);
824                 return NULL;
825         }
826
827         if (!more_used_split(vq)) {
828                 pr_debug("No more buffers in queue\n");
829                 END_USE(vq);
830                 return NULL;
831         }
832
833         /* Only get used array entries after they have been exposed by host. */
834         virtio_rmb(vq->weak_barriers);
835
836         last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
837         i = virtio32_to_cpu(_vq->vdev,
838                         vq->split.vring.used->ring[last_used].id);
839         *len = virtio32_to_cpu(_vq->vdev,
840                         vq->split.vring.used->ring[last_used].len);
841
842         if (unlikely(i >= vq->split.vring.num)) {
843                 BAD_RING(vq, "id %u out of range\n", i);
844                 return NULL;
845         }
846         if (unlikely(!vq->split.desc_state[i].data)) {
847                 BAD_RING(vq, "id %u is not a head!\n", i);
848                 return NULL;
849         }
850
851         /* detach_buf_split clears data, so grab it now. */
852         ret = vq->split.desc_state[i].data;
853         detach_buf_split(vq, i, ctx);
854         vq->last_used_idx++;
855         /* If we expect an interrupt for the next entry, tell host
856          * by writing event index and flush out the write before
857          * the read in the next get_buf call. */
858         if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
859                 virtio_store_mb(vq->weak_barriers,
860                                 &vring_used_event(&vq->split.vring),
861                                 cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
862
863         LAST_ADD_TIME_INVALID(vq);
864
865         END_USE(vq);
866         return ret;
867 }
868
869 static void virtqueue_disable_cb_split(struct virtqueue *_vq)
870 {
871         struct vring_virtqueue *vq = to_vvq(_vq);
872
873         if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
874                 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
875
876                 /*
877                  * If device triggered an event already it won't trigger one again:
878                  * no need to disable.
879                  */
880                 if (vq->event_triggered)
881                         return;
882
883                 if (vq->event)
884                         /* TODO: this is a hack. Figure out a cleaner value to write. */
885                         vring_used_event(&vq->split.vring) = 0x0;
886                 else
887                         vq->split.vring.avail->flags =
888                                 cpu_to_virtio16(_vq->vdev,
889                                                 vq->split.avail_flags_shadow);
890         }
891 }
892
893 static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
894 {
895         struct vring_virtqueue *vq = to_vvq(_vq);
896         u16 last_used_idx;
897
898         START_USE(vq);
899
900         /* We optimistically turn back on interrupts, then check if there was
901          * more to do. */
902         /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
903          * either clear the flags bit or point the event index at the next
904          * entry. Always do both to keep code simple. */
905         if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
906                 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
907                 if (!vq->event)
908                         vq->split.vring.avail->flags =
909                                 cpu_to_virtio16(_vq->vdev,
910                                                 vq->split.avail_flags_shadow);
911         }
912         vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
913                         last_used_idx = vq->last_used_idx);
914         END_USE(vq);
915         return last_used_idx;
916 }
917
918 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx)
919 {
920         struct vring_virtqueue *vq = to_vvq(_vq);
921
922         return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
923                         vq->split.vring.used->idx);
924 }
925
926 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
927 {
928         struct vring_virtqueue *vq = to_vvq(_vq);
929         u16 bufs;
930
931         START_USE(vq);
932
933         /* We optimistically turn back on interrupts, then check if there was
934          * more to do. */
935         /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
936          * either clear the flags bit or point the event index at the next
937          * entry. Always update the event index to keep code simple. */
938         if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
939                 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
940                 if (!vq->event)
941                         vq->split.vring.avail->flags =
942                                 cpu_to_virtio16(_vq->vdev,
943                                                 vq->split.avail_flags_shadow);
944         }
945         /* TODO: tune this threshold */
946         bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
947
948         virtio_store_mb(vq->weak_barriers,
949                         &vring_used_event(&vq->split.vring),
950                         cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
951
952         if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
953                                         - vq->last_used_idx) > bufs)) {
954                 END_USE(vq);
955                 return false;
956         }
957
958         END_USE(vq);
959         return true;
960 }
961
962 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
963 {
964         struct vring_virtqueue *vq = to_vvq(_vq);
965         unsigned int i;
966         void *buf;
967
968         START_USE(vq);
969
970         for (i = 0; i < vq->split.vring.num; i++) {
971                 if (!vq->split.desc_state[i].data)
972                         continue;
973                 /* detach_buf_split clears data, so grab it now. */
974                 buf = vq->split.desc_state[i].data;
975                 detach_buf_split(vq, i, NULL);
976                 vq->split.avail_idx_shadow--;
977                 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
978                                 vq->split.avail_idx_shadow);
979                 END_USE(vq);
980                 return buf;
981         }
982         /* That should have freed everything. */
983         BUG_ON(vq->vq.num_free != vq->split.vring.num);
984
985         END_USE(vq);
986         return NULL;
987 }
988
989 static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
990                                        struct vring_virtqueue *vq)
991 {
992         struct virtio_device *vdev;
993
994         vdev = vq->vq.vdev;
995
996         vring_split->avail_flags_shadow = 0;
997         vring_split->avail_idx_shadow = 0;
998
999         /* No callback?  Tell other side not to bother us. */
1000         if (!vq->vq.callback) {
1001                 vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
1002                 if (!vq->event)
1003                         vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
1004                                         vring_split->avail_flags_shadow);
1005         }
1006 }
1007
1008 static void virtqueue_reinit_split(struct vring_virtqueue *vq)
1009 {
1010         int num;
1011
1012         num = vq->split.vring.num;
1013
1014         vq->split.vring.avail->flags = 0;
1015         vq->split.vring.avail->idx = 0;
1016
1017         /* reset avail event */
1018         vq->split.vring.avail->ring[num] = 0;
1019
1020         vq->split.vring.used->flags = 0;
1021         vq->split.vring.used->idx = 0;
1022
1023         /* reset used event */
1024         *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0;
1025
1026         virtqueue_init(vq, num);
1027
1028         virtqueue_vring_init_split(&vq->split, vq);
1029 }
1030
1031 static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
1032                                          struct vring_virtqueue_split *vring_split)
1033 {
1034         vq->split = *vring_split;
1035
1036         /* Put everything in free lists. */
1037         vq->free_head = 0;
1038 }
1039
1040 static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
1041 {
1042         struct vring_desc_state_split *state;
1043         struct vring_desc_extra *extra;
1044         u32 num = vring_split->vring.num;
1045
1046         state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL);
1047         if (!state)
1048                 goto err_state;
1049
1050         extra = vring_alloc_desc_extra(num);
1051         if (!extra)
1052                 goto err_extra;
1053
1054         memset(state, 0, num * sizeof(struct vring_desc_state_split));
1055
1056         vring_split->desc_state = state;
1057         vring_split->desc_extra = extra;
1058         return 0;
1059
1060 err_extra:
1061         kfree(state);
1062 err_state:
1063         return -ENOMEM;
1064 }
1065
1066 static void vring_free_split(struct vring_virtqueue_split *vring_split,
1067                              struct virtio_device *vdev, struct device *dma_dev)
1068 {
1069         vring_free_queue(vdev, vring_split->queue_size_in_bytes,
1070                          vring_split->vring.desc,
1071                          vring_split->queue_dma_addr,
1072                          dma_dev);
1073
1074         kfree(vring_split->desc_state);
1075         kfree(vring_split->desc_extra);
1076 }
1077
1078 static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
1079                                    struct virtio_device *vdev,
1080                                    u32 num,
1081                                    unsigned int vring_align,
1082                                    bool may_reduce_num,
1083                                    struct device *dma_dev)
1084 {
1085         void *queue = NULL;
1086         dma_addr_t dma_addr;
1087
1088         /* We assume num is a power of 2. */
1089         if (!is_power_of_2(num)) {
1090                 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
1091                 return -EINVAL;
1092         }
1093
1094         /* TODO: allocate each queue chunk individually */
1095         for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
1096                 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1097                                           &dma_addr,
1098                                           GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1099                                           dma_dev);
1100                 if (queue)
1101                         break;
1102                 if (!may_reduce_num)
1103                         return -ENOMEM;
1104         }
1105
1106         if (!num)
1107                 return -ENOMEM;
1108
1109         if (!queue) {
1110                 /* Try to get a single page. You are my only hope! */
1111                 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1112                                           &dma_addr, GFP_KERNEL | __GFP_ZERO,
1113                                           dma_dev);
1114         }
1115         if (!queue)
1116                 return -ENOMEM;
1117
1118         vring_init(&vring_split->vring, num, queue, vring_align);
1119
1120         vring_split->queue_dma_addr = dma_addr;
1121         vring_split->queue_size_in_bytes = vring_size(num, vring_align);
1122
1123         vring_split->vring_align = vring_align;
1124         vring_split->may_reduce_num = may_reduce_num;
1125
1126         return 0;
1127 }
1128
1129 static struct virtqueue *__vring_new_virtqueue_split(unsigned int index,
1130                                                struct vring_virtqueue_split *vring_split,
1131                                                struct virtio_device *vdev,
1132                                                bool weak_barriers,
1133                                                bool context,
1134                                                bool (*notify)(struct virtqueue *),
1135                                                void (*callback)(struct virtqueue *),
1136                                                const char *name,
1137                                                struct device *dma_dev)
1138 {
1139         struct vring_virtqueue *vq;
1140         int err;
1141
1142         vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1143         if (!vq)
1144                 return NULL;
1145
1146         vq->packed_ring = false;
1147         vq->vq.callback = callback;
1148         vq->vq.vdev = vdev;
1149         vq->vq.name = name;
1150         vq->vq.index = index;
1151         vq->vq.reset = false;
1152         vq->we_own_ring = false;
1153         vq->notify = notify;
1154         vq->weak_barriers = weak_barriers;
1155 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
1156         vq->broken = true;
1157 #else
1158         vq->broken = false;
1159 #endif
1160         vq->dma_dev = dma_dev;
1161         vq->use_dma_api = vring_use_dma_api(vdev);
1162
1163         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1164                 !context;
1165         vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1166
1167         if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1168                 vq->weak_barriers = false;
1169
1170         err = vring_alloc_state_extra_split(vring_split);
1171         if (err) {
1172                 kfree(vq);
1173                 return NULL;
1174         }
1175
1176         virtqueue_vring_init_split(vring_split, vq);
1177
1178         virtqueue_init(vq, vring_split->vring.num);
1179         virtqueue_vring_attach_split(vq, vring_split);
1180
1181         spin_lock(&vdev->vqs_list_lock);
1182         list_add_tail(&vq->vq.list, &vdev->vqs);
1183         spin_unlock(&vdev->vqs_list_lock);
1184         return &vq->vq;
1185 }
1186
1187 static struct virtqueue *vring_create_virtqueue_split(
1188         unsigned int index,
1189         unsigned int num,
1190         unsigned int vring_align,
1191         struct virtio_device *vdev,
1192         bool weak_barriers,
1193         bool may_reduce_num,
1194         bool context,
1195         bool (*notify)(struct virtqueue *),
1196         void (*callback)(struct virtqueue *),
1197         const char *name,
1198         struct device *dma_dev)
1199 {
1200         struct vring_virtqueue_split vring_split = {};
1201         struct virtqueue *vq;
1202         int err;
1203
1204         err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
1205                                       may_reduce_num, dma_dev);
1206         if (err)
1207                 return NULL;
1208
1209         vq = __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers,
1210                                    context, notify, callback, name, dma_dev);
1211         if (!vq) {
1212                 vring_free_split(&vring_split, vdev, dma_dev);
1213                 return NULL;
1214         }
1215
1216         to_vvq(vq)->we_own_ring = true;
1217
1218         return vq;
1219 }
1220
1221 static int virtqueue_resize_split(struct virtqueue *_vq, u32 num)
1222 {
1223         struct vring_virtqueue_split vring_split = {};
1224         struct vring_virtqueue *vq = to_vvq(_vq);
1225         struct virtio_device *vdev = _vq->vdev;
1226         int err;
1227
1228         err = vring_alloc_queue_split(&vring_split, vdev, num,
1229                                       vq->split.vring_align,
1230                                       vq->split.may_reduce_num,
1231                                       vring_dma_dev(vq));
1232         if (err)
1233                 goto err;
1234
1235         err = vring_alloc_state_extra_split(&vring_split);
1236         if (err)
1237                 goto err_state_extra;
1238
1239         vring_free(&vq->vq);
1240
1241         virtqueue_vring_init_split(&vring_split, vq);
1242
1243         virtqueue_init(vq, vring_split.vring.num);
1244         virtqueue_vring_attach_split(vq, &vring_split);
1245
1246         return 0;
1247
1248 err_state_extra:
1249         vring_free_split(&vring_split, vdev, vring_dma_dev(vq));
1250 err:
1251         virtqueue_reinit_split(vq);
1252         return -ENOMEM;
1253 }
1254
1255
1256 /*
1257  * Packed ring specific functions - *_packed().
1258  */
1259 static bool packed_used_wrap_counter(u16 last_used_idx)
1260 {
1261         return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1262 }
1263
1264 static u16 packed_last_used(u16 last_used_idx)
1265 {
1266         return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1267 }
1268
1269 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1270                                      const struct vring_desc_extra *extra)
1271 {
1272         u16 flags;
1273
1274         flags = extra->flags;
1275
1276         if (flags & VRING_DESC_F_INDIRECT) {
1277                 if (!vq->use_dma_api)
1278                         return;
1279
1280                 dma_unmap_single(vring_dma_dev(vq),
1281                                  extra->addr, extra->len,
1282                                  (flags & VRING_DESC_F_WRITE) ?
1283                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
1284         } else {
1285                 if (!vring_need_unmap_buffer(vq, extra))
1286                         return;
1287
1288                 dma_unmap_page(vring_dma_dev(vq),
1289                                extra->addr, extra->len,
1290                                (flags & VRING_DESC_F_WRITE) ?
1291                                DMA_FROM_DEVICE : DMA_TO_DEVICE);
1292         }
1293 }
1294
1295 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1296                                                        gfp_t gfp)
1297 {
1298         struct vring_desc_extra *extra;
1299         struct vring_packed_desc *desc;
1300         int i, size;
1301
1302         /*
1303          * We require lowmem mappings for the descriptors because
1304          * otherwise virt_to_phys will give us bogus addresses in the
1305          * virtqueue.
1306          */
1307         gfp &= ~__GFP_HIGHMEM;
1308
1309         size = (sizeof(*desc) + sizeof(*extra)) * total_sg;
1310
1311         desc = kmalloc(size, gfp);
1312         if (!desc)
1313                 return NULL;
1314
1315         extra = (struct vring_desc_extra *)&desc[total_sg];
1316
1317         for (i = 0; i < total_sg; i++)
1318                 extra[i].next = i + 1;
1319
1320         return desc;
1321 }
1322
1323 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1324                                          struct scatterlist *sgs[],
1325                                          unsigned int total_sg,
1326                                          unsigned int out_sgs,
1327                                          unsigned int in_sgs,
1328                                          void *data,
1329                                          bool premapped,
1330                                          gfp_t gfp)
1331 {
1332         struct vring_desc_extra *extra;
1333         struct vring_packed_desc *desc;
1334         struct scatterlist *sg;
1335         unsigned int i, n, err_idx, len;
1336         u16 head, id;
1337         dma_addr_t addr;
1338
1339         head = vq->packed.next_avail_idx;
1340         desc = alloc_indirect_packed(total_sg, gfp);
1341         if (!desc)
1342                 return -ENOMEM;
1343
1344         extra = (struct vring_desc_extra *)&desc[total_sg];
1345
1346         if (unlikely(vq->vq.num_free < 1)) {
1347                 pr_debug("Can't add buf len 1 - avail = 0\n");
1348                 kfree(desc);
1349                 END_USE(vq);
1350                 return -ENOSPC;
1351         }
1352
1353         i = 0;
1354         id = vq->free_head;
1355         BUG_ON(id == vq->packed.vring.num);
1356
1357         for (n = 0; n < out_sgs + in_sgs; n++) {
1358                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1359                         if (vring_map_one_sg(vq, sg, n < out_sgs ?
1360                                              DMA_TO_DEVICE : DMA_FROM_DEVICE,
1361                                              &addr, &len, premapped))
1362                                 goto unmap_release;
1363
1364                         desc[i].flags = cpu_to_le16(n < out_sgs ?
1365                                                 0 : VRING_DESC_F_WRITE);
1366                         desc[i].addr = cpu_to_le64(addr);
1367                         desc[i].len = cpu_to_le32(len);
1368
1369                         if (unlikely(vq->use_dma_api)) {
1370                                 extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr;
1371                                 extra[i].len = len;
1372                                 extra[i].flags = n < out_sgs ?  0 : VRING_DESC_F_WRITE;
1373                         }
1374
1375                         i++;
1376                 }
1377         }
1378
1379         /* Now that the indirect table is filled in, map it. */
1380         addr = vring_map_single(vq, desc,
1381                         total_sg * sizeof(struct vring_packed_desc),
1382                         DMA_TO_DEVICE);
1383         if (vring_mapping_error(vq, addr))
1384                 goto unmap_release;
1385
1386         vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1387         vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1388                                 sizeof(struct vring_packed_desc));
1389         vq->packed.vring.desc[head].id = cpu_to_le16(id);
1390
1391         if (vq->use_dma_api) {
1392                 vq->packed.desc_extra[id].addr = addr;
1393                 vq->packed.desc_extra[id].len = total_sg *
1394                                 sizeof(struct vring_packed_desc);
1395                 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1396                                                   vq->packed.avail_used_flags;
1397         }
1398
1399         /*
1400          * A driver MUST NOT make the first descriptor in the list
1401          * available before all subsequent descriptors comprising
1402          * the list are made available.
1403          */
1404         virtio_wmb(vq->weak_barriers);
1405         vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1406                                                 vq->packed.avail_used_flags);
1407
1408         /* We're using some buffers from the free list. */
1409         vq->vq.num_free -= 1;
1410
1411         /* Update free pointer */
1412         n = head + 1;
1413         if (n >= vq->packed.vring.num) {
1414                 n = 0;
1415                 vq->packed.avail_wrap_counter ^= 1;
1416                 vq->packed.avail_used_flags ^=
1417                                 1 << VRING_PACKED_DESC_F_AVAIL |
1418                                 1 << VRING_PACKED_DESC_F_USED;
1419         }
1420         vq->packed.next_avail_idx = n;
1421         vq->free_head = vq->packed.desc_extra[id].next;
1422
1423         /* Store token and indirect buffer state. */
1424         vq->packed.desc_state[id].num = 1;
1425         vq->packed.desc_state[id].data = data;
1426         vq->packed.desc_state[id].indir_desc = desc;
1427         vq->packed.desc_state[id].last = id;
1428
1429         vq->num_added += 1;
1430
1431         pr_debug("Added buffer head %i to %p\n", head, vq);
1432         END_USE(vq);
1433
1434         return 0;
1435
1436 unmap_release:
1437         err_idx = i;
1438
1439         for (i = 0; i < err_idx; i++)
1440                 vring_unmap_extra_packed(vq, &extra[i]);
1441
1442         kfree(desc);
1443
1444         END_USE(vq);
1445         return -ENOMEM;
1446 }
1447
1448 static inline int virtqueue_add_packed(struct virtqueue *_vq,
1449                                        struct scatterlist *sgs[],
1450                                        unsigned int total_sg,
1451                                        unsigned int out_sgs,
1452                                        unsigned int in_sgs,
1453                                        void *data,
1454                                        void *ctx,
1455                                        bool premapped,
1456                                        gfp_t gfp)
1457 {
1458         struct vring_virtqueue *vq = to_vvq(_vq);
1459         struct vring_packed_desc *desc;
1460         struct scatterlist *sg;
1461         unsigned int i, n, c, descs_used, err_idx, len;
1462         __le16 head_flags, flags;
1463         u16 head, id, prev, curr, avail_used_flags;
1464         int err;
1465
1466         START_USE(vq);
1467
1468         BUG_ON(data == NULL);
1469         BUG_ON(ctx && vq->indirect);
1470
1471         if (unlikely(vq->broken)) {
1472                 END_USE(vq);
1473                 return -EIO;
1474         }
1475
1476         LAST_ADD_TIME_UPDATE(vq);
1477
1478         BUG_ON(total_sg == 0);
1479
1480         if (virtqueue_use_indirect(vq, total_sg)) {
1481                 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1482                                                     in_sgs, data, premapped, gfp);
1483                 if (err != -ENOMEM) {
1484                         END_USE(vq);
1485                         return err;
1486                 }
1487
1488                 /* fall back on direct */
1489         }
1490
1491         head = vq->packed.next_avail_idx;
1492         avail_used_flags = vq->packed.avail_used_flags;
1493
1494         WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1495
1496         desc = vq->packed.vring.desc;
1497         i = head;
1498         descs_used = total_sg;
1499
1500         if (unlikely(vq->vq.num_free < descs_used)) {
1501                 pr_debug("Can't add buf len %i - avail = %i\n",
1502                          descs_used, vq->vq.num_free);
1503                 END_USE(vq);
1504                 return -ENOSPC;
1505         }
1506
1507         id = vq->free_head;
1508         BUG_ON(id == vq->packed.vring.num);
1509
1510         curr = id;
1511         c = 0;
1512         for (n = 0; n < out_sgs + in_sgs; n++) {
1513                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1514                         dma_addr_t addr;
1515
1516                         if (vring_map_one_sg(vq, sg, n < out_sgs ?
1517                                              DMA_TO_DEVICE : DMA_FROM_DEVICE,
1518                                              &addr, &len, premapped))
1519                                 goto unmap_release;
1520
1521                         flags = cpu_to_le16(vq->packed.avail_used_flags |
1522                                     (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1523                                     (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1524                         if (i == head)
1525                                 head_flags = flags;
1526                         else
1527                                 desc[i].flags = flags;
1528
1529                         desc[i].addr = cpu_to_le64(addr);
1530                         desc[i].len = cpu_to_le32(len);
1531                         desc[i].id = cpu_to_le16(id);
1532
1533                         if (unlikely(vq->use_dma_api)) {
1534                                 vq->packed.desc_extra[curr].addr = premapped ?
1535                                         DMA_MAPPING_ERROR : addr;
1536                                 vq->packed.desc_extra[curr].len = len;
1537                                 vq->packed.desc_extra[curr].flags =
1538                                         le16_to_cpu(flags);
1539                         }
1540                         prev = curr;
1541                         curr = vq->packed.desc_extra[curr].next;
1542
1543                         if ((unlikely(++i >= vq->packed.vring.num))) {
1544                                 i = 0;
1545                                 vq->packed.avail_used_flags ^=
1546                                         1 << VRING_PACKED_DESC_F_AVAIL |
1547                                         1 << VRING_PACKED_DESC_F_USED;
1548                         }
1549                 }
1550         }
1551
1552         if (i <= head)
1553                 vq->packed.avail_wrap_counter ^= 1;
1554
1555         /* We're using some buffers from the free list. */
1556         vq->vq.num_free -= descs_used;
1557
1558         /* Update free pointer */
1559         vq->packed.next_avail_idx = i;
1560         vq->free_head = curr;
1561
1562         /* Store token. */
1563         vq->packed.desc_state[id].num = descs_used;
1564         vq->packed.desc_state[id].data = data;
1565         vq->packed.desc_state[id].indir_desc = ctx;
1566         vq->packed.desc_state[id].last = prev;
1567
1568         /*
1569          * A driver MUST NOT make the first descriptor in the list
1570          * available before all subsequent descriptors comprising
1571          * the list are made available.
1572          */
1573         virtio_wmb(vq->weak_barriers);
1574         vq->packed.vring.desc[head].flags = head_flags;
1575         vq->num_added += descs_used;
1576
1577         pr_debug("Added buffer head %i to %p\n", head, vq);
1578         END_USE(vq);
1579
1580         return 0;
1581
1582 unmap_release:
1583         err_idx = i;
1584         i = head;
1585         curr = vq->free_head;
1586
1587         vq->packed.avail_used_flags = avail_used_flags;
1588
1589         for (n = 0; n < total_sg; n++) {
1590                 if (i == err_idx)
1591                         break;
1592                 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
1593                 curr = vq->packed.desc_extra[curr].next;
1594                 i++;
1595                 if (i >= vq->packed.vring.num)
1596                         i = 0;
1597         }
1598
1599         END_USE(vq);
1600         return -EIO;
1601 }
1602
1603 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1604 {
1605         struct vring_virtqueue *vq = to_vvq(_vq);
1606         u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1607         bool needs_kick;
1608         union {
1609                 struct {
1610                         __le16 off_wrap;
1611                         __le16 flags;
1612                 };
1613                 u32 u32;
1614         } snapshot;
1615
1616         START_USE(vq);
1617
1618         /*
1619          * We need to expose the new flags value before checking notification
1620          * suppressions.
1621          */
1622         virtio_mb(vq->weak_barriers);
1623
1624         old = vq->packed.next_avail_idx - vq->num_added;
1625         new = vq->packed.next_avail_idx;
1626         vq->num_added = 0;
1627
1628         snapshot.u32 = *(u32 *)vq->packed.vring.device;
1629         flags = le16_to_cpu(snapshot.flags);
1630
1631         LAST_ADD_TIME_CHECK(vq);
1632         LAST_ADD_TIME_INVALID(vq);
1633
1634         if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1635                 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1636                 goto out;
1637         }
1638
1639         off_wrap = le16_to_cpu(snapshot.off_wrap);
1640
1641         wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1642         event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1643         if (wrap_counter != vq->packed.avail_wrap_counter)
1644                 event_idx -= vq->packed.vring.num;
1645
1646         needs_kick = vring_need_event(event_idx, new, old);
1647 out:
1648         END_USE(vq);
1649         return needs_kick;
1650 }
1651
1652 static void detach_buf_packed(struct vring_virtqueue *vq,
1653                               unsigned int id, void **ctx)
1654 {
1655         struct vring_desc_state_packed *state = NULL;
1656         struct vring_packed_desc *desc;
1657         unsigned int i, curr;
1658
1659         state = &vq->packed.desc_state[id];
1660
1661         /* Clear data ptr. */
1662         state->data = NULL;
1663
1664         vq->packed.desc_extra[state->last].next = vq->free_head;
1665         vq->free_head = id;
1666         vq->vq.num_free += state->num;
1667
1668         if (unlikely(vq->use_dma_api)) {
1669                 curr = id;
1670                 for (i = 0; i < state->num; i++) {
1671                         vring_unmap_extra_packed(vq,
1672                                                  &vq->packed.desc_extra[curr]);
1673                         curr = vq->packed.desc_extra[curr].next;
1674                 }
1675         }
1676
1677         if (vq->indirect) {
1678                 struct vring_desc_extra *extra;
1679                 u32 len, num;
1680
1681                 /* Free the indirect table, if any, now that it's unmapped. */
1682                 desc = state->indir_desc;
1683                 if (!desc)
1684                         return;
1685
1686                 if (vq->use_dma_api) {
1687                         len = vq->packed.desc_extra[id].len;
1688                         num = len / sizeof(struct vring_packed_desc);
1689
1690                         extra = (struct vring_desc_extra *)&desc[num];
1691
1692                         for (i = 0; i < num; i++)
1693                                 vring_unmap_extra_packed(vq, &extra[i]);
1694                 }
1695                 kfree(desc);
1696                 state->indir_desc = NULL;
1697         } else if (ctx) {
1698                 *ctx = state->indir_desc;
1699         }
1700 }
1701
1702 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1703                                        u16 idx, bool used_wrap_counter)
1704 {
1705         bool avail, used;
1706         u16 flags;
1707
1708         flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1709         avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1710         used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1711
1712         return avail == used && used == used_wrap_counter;
1713 }
1714
1715 static bool more_used_packed(const struct vring_virtqueue *vq)
1716 {
1717         u16 last_used;
1718         u16 last_used_idx;
1719         bool used_wrap_counter;
1720
1721         last_used_idx = READ_ONCE(vq->last_used_idx);
1722         last_used = packed_last_used(last_used_idx);
1723         used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1724         return is_used_desc_packed(vq, last_used, used_wrap_counter);
1725 }
1726
1727 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1728                                           unsigned int *len,
1729                                           void **ctx)
1730 {
1731         struct vring_virtqueue *vq = to_vvq(_vq);
1732         u16 last_used, id, last_used_idx;
1733         bool used_wrap_counter;
1734         void *ret;
1735
1736         START_USE(vq);
1737
1738         if (unlikely(vq->broken)) {
1739                 END_USE(vq);
1740                 return NULL;
1741         }
1742
1743         if (!more_used_packed(vq)) {
1744                 pr_debug("No more buffers in queue\n");
1745                 END_USE(vq);
1746                 return NULL;
1747         }
1748
1749         /* Only get used elements after they have been exposed by host. */
1750         virtio_rmb(vq->weak_barriers);
1751
1752         last_used_idx = READ_ONCE(vq->last_used_idx);
1753         used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1754         last_used = packed_last_used(last_used_idx);
1755         id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1756         *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1757
1758         if (unlikely(id >= vq->packed.vring.num)) {
1759                 BAD_RING(vq, "id %u out of range\n", id);
1760                 return NULL;
1761         }
1762         if (unlikely(!vq->packed.desc_state[id].data)) {
1763                 BAD_RING(vq, "id %u is not a head!\n", id);
1764                 return NULL;
1765         }
1766
1767         /* detach_buf_packed clears data, so grab it now. */
1768         ret = vq->packed.desc_state[id].data;
1769         detach_buf_packed(vq, id, ctx);
1770
1771         last_used += vq->packed.desc_state[id].num;
1772         if (unlikely(last_used >= vq->packed.vring.num)) {
1773                 last_used -= vq->packed.vring.num;
1774                 used_wrap_counter ^= 1;
1775         }
1776
1777         last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1778         WRITE_ONCE(vq->last_used_idx, last_used);
1779
1780         /*
1781          * If we expect an interrupt for the next entry, tell host
1782          * by writing event index and flush out the write before
1783          * the read in the next get_buf call.
1784          */
1785         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1786                 virtio_store_mb(vq->weak_barriers,
1787                                 &vq->packed.vring.driver->off_wrap,
1788                                 cpu_to_le16(vq->last_used_idx));
1789
1790         LAST_ADD_TIME_INVALID(vq);
1791
1792         END_USE(vq);
1793         return ret;
1794 }
1795
1796 static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1797 {
1798         struct vring_virtqueue *vq = to_vvq(_vq);
1799
1800         if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1801                 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1802
1803                 /*
1804                  * If device triggered an event already it won't trigger one again:
1805                  * no need to disable.
1806                  */
1807                 if (vq->event_triggered)
1808                         return;
1809
1810                 vq->packed.vring.driver->flags =
1811                         cpu_to_le16(vq->packed.event_flags_shadow);
1812         }
1813 }
1814
1815 static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1816 {
1817         struct vring_virtqueue *vq = to_vvq(_vq);
1818
1819         START_USE(vq);
1820
1821         /*
1822          * We optimistically turn back on interrupts, then check if there was
1823          * more to do.
1824          */
1825
1826         if (vq->event) {
1827                 vq->packed.vring.driver->off_wrap =
1828                         cpu_to_le16(vq->last_used_idx);
1829                 /*
1830                  * We need to update event offset and event wrap
1831                  * counter first before updating event flags.
1832                  */
1833                 virtio_wmb(vq->weak_barriers);
1834         }
1835
1836         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1837                 vq->packed.event_flags_shadow = vq->event ?
1838                                 VRING_PACKED_EVENT_FLAG_DESC :
1839                                 VRING_PACKED_EVENT_FLAG_ENABLE;
1840                 vq->packed.vring.driver->flags =
1841                                 cpu_to_le16(vq->packed.event_flags_shadow);
1842         }
1843
1844         END_USE(vq);
1845         return vq->last_used_idx;
1846 }
1847
1848 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1849 {
1850         struct vring_virtqueue *vq = to_vvq(_vq);
1851         bool wrap_counter;
1852         u16 used_idx;
1853
1854         wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1855         used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1856
1857         return is_used_desc_packed(vq, used_idx, wrap_counter);
1858 }
1859
1860 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1861 {
1862         struct vring_virtqueue *vq = to_vvq(_vq);
1863         u16 used_idx, wrap_counter, last_used_idx;
1864         u16 bufs;
1865
1866         START_USE(vq);
1867
1868         /*
1869          * We optimistically turn back on interrupts, then check if there was
1870          * more to do.
1871          */
1872
1873         if (vq->event) {
1874                 /* TODO: tune this threshold */
1875                 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1876                 last_used_idx = READ_ONCE(vq->last_used_idx);
1877                 wrap_counter = packed_used_wrap_counter(last_used_idx);
1878
1879                 used_idx = packed_last_used(last_used_idx) + bufs;
1880                 if (used_idx >= vq->packed.vring.num) {
1881                         used_idx -= vq->packed.vring.num;
1882                         wrap_counter ^= 1;
1883                 }
1884
1885                 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1886                         (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1887
1888                 /*
1889                  * We need to update event offset and event wrap
1890                  * counter first before updating event flags.
1891                  */
1892                 virtio_wmb(vq->weak_barriers);
1893         }
1894
1895         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1896                 vq->packed.event_flags_shadow = vq->event ?
1897                                 VRING_PACKED_EVENT_FLAG_DESC :
1898                                 VRING_PACKED_EVENT_FLAG_ENABLE;
1899                 vq->packed.vring.driver->flags =
1900                                 cpu_to_le16(vq->packed.event_flags_shadow);
1901         }
1902
1903         /*
1904          * We need to update event suppression structure first
1905          * before re-checking for more used buffers.
1906          */
1907         virtio_mb(vq->weak_barriers);
1908
1909         last_used_idx = READ_ONCE(vq->last_used_idx);
1910         wrap_counter = packed_used_wrap_counter(last_used_idx);
1911         used_idx = packed_last_used(last_used_idx);
1912         if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
1913                 END_USE(vq);
1914                 return false;
1915         }
1916
1917         END_USE(vq);
1918         return true;
1919 }
1920
1921 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1922 {
1923         struct vring_virtqueue *vq = to_vvq(_vq);
1924         unsigned int i;
1925         void *buf;
1926
1927         START_USE(vq);
1928
1929         for (i = 0; i < vq->packed.vring.num; i++) {
1930                 if (!vq->packed.desc_state[i].data)
1931                         continue;
1932                 /* detach_buf clears data, so grab it now. */
1933                 buf = vq->packed.desc_state[i].data;
1934                 detach_buf_packed(vq, i, NULL);
1935                 END_USE(vq);
1936                 return buf;
1937         }
1938         /* That should have freed everything. */
1939         BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1940
1941         END_USE(vq);
1942         return NULL;
1943 }
1944
1945 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
1946 {
1947         struct vring_desc_extra *desc_extra;
1948         unsigned int i;
1949
1950         desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1951                                    GFP_KERNEL);
1952         if (!desc_extra)
1953                 return NULL;
1954
1955         memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1956
1957         for (i = 0; i < num - 1; i++)
1958                 desc_extra[i].next = i + 1;
1959
1960         return desc_extra;
1961 }
1962
1963 static void vring_free_packed(struct vring_virtqueue_packed *vring_packed,
1964                               struct virtio_device *vdev,
1965                               struct device *dma_dev)
1966 {
1967         if (vring_packed->vring.desc)
1968                 vring_free_queue(vdev, vring_packed->ring_size_in_bytes,
1969                                  vring_packed->vring.desc,
1970                                  vring_packed->ring_dma_addr,
1971                                  dma_dev);
1972
1973         if (vring_packed->vring.driver)
1974                 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1975                                  vring_packed->vring.driver,
1976                                  vring_packed->driver_event_dma_addr,
1977                                  dma_dev);
1978
1979         if (vring_packed->vring.device)
1980                 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1981                                  vring_packed->vring.device,
1982                                  vring_packed->device_event_dma_addr,
1983                                  dma_dev);
1984
1985         kfree(vring_packed->desc_state);
1986         kfree(vring_packed->desc_extra);
1987 }
1988
1989 static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed,
1990                                     struct virtio_device *vdev,
1991                                     u32 num, struct device *dma_dev)
1992 {
1993         struct vring_packed_desc *ring;
1994         struct vring_packed_desc_event *driver, *device;
1995         dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1996         size_t ring_size_in_bytes, event_size_in_bytes;
1997
1998         ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1999
2000         ring = vring_alloc_queue(vdev, ring_size_in_bytes,
2001                                  &ring_dma_addr,
2002                                  GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
2003                                  dma_dev);
2004         if (!ring)
2005                 goto err;
2006
2007         vring_packed->vring.desc         = ring;
2008         vring_packed->ring_dma_addr      = ring_dma_addr;
2009         vring_packed->ring_size_in_bytes = ring_size_in_bytes;
2010
2011         event_size_in_bytes = sizeof(struct vring_packed_desc_event);
2012
2013         driver = vring_alloc_queue(vdev, event_size_in_bytes,
2014                                    &driver_event_dma_addr,
2015                                    GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
2016                                    dma_dev);
2017         if (!driver)
2018                 goto err;
2019
2020         vring_packed->vring.driver          = driver;
2021         vring_packed->event_size_in_bytes   = event_size_in_bytes;
2022         vring_packed->driver_event_dma_addr = driver_event_dma_addr;
2023
2024         device = vring_alloc_queue(vdev, event_size_in_bytes,
2025                                    &device_event_dma_addr,
2026                                    GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
2027                                    dma_dev);
2028         if (!device)
2029                 goto err;
2030
2031         vring_packed->vring.device          = device;
2032         vring_packed->device_event_dma_addr = device_event_dma_addr;
2033
2034         vring_packed->vring.num = num;
2035
2036         return 0;
2037
2038 err:
2039         vring_free_packed(vring_packed, vdev, dma_dev);
2040         return -ENOMEM;
2041 }
2042
2043 static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed)
2044 {
2045         struct vring_desc_state_packed *state;
2046         struct vring_desc_extra *extra;
2047         u32 num = vring_packed->vring.num;
2048
2049         state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL);
2050         if (!state)
2051                 goto err_desc_state;
2052
2053         memset(state, 0, num * sizeof(struct vring_desc_state_packed));
2054
2055         extra = vring_alloc_desc_extra(num);
2056         if (!extra)
2057                 goto err_desc_extra;
2058
2059         vring_packed->desc_state = state;
2060         vring_packed->desc_extra = extra;
2061
2062         return 0;
2063
2064 err_desc_extra:
2065         kfree(state);
2066 err_desc_state:
2067         return -ENOMEM;
2068 }
2069
2070 static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed,
2071                                         bool callback)
2072 {
2073         vring_packed->next_avail_idx = 0;
2074         vring_packed->avail_wrap_counter = 1;
2075         vring_packed->event_flags_shadow = 0;
2076         vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
2077
2078         /* No callback?  Tell other side not to bother us. */
2079         if (!callback) {
2080                 vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
2081                 vring_packed->vring.driver->flags =
2082                         cpu_to_le16(vring_packed->event_flags_shadow);
2083         }
2084 }
2085
2086 static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq,
2087                                           struct vring_virtqueue_packed *vring_packed)
2088 {
2089         vq->packed = *vring_packed;
2090
2091         /* Put everything in free lists. */
2092         vq->free_head = 0;
2093 }
2094
2095 static void virtqueue_reinit_packed(struct vring_virtqueue *vq)
2096 {
2097         memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes);
2098         memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes);
2099
2100         /* we need to reset the desc.flags. For more, see is_used_desc_packed() */
2101         memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes);
2102
2103         virtqueue_init(vq, vq->packed.vring.num);
2104         virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback);
2105 }
2106
2107 static struct virtqueue *__vring_new_virtqueue_packed(unsigned int index,
2108                                                struct vring_virtqueue_packed *vring_packed,
2109                                                struct virtio_device *vdev,
2110                                                bool weak_barriers,
2111                                                bool context,
2112                                                bool (*notify)(struct virtqueue *),
2113                                                void (*callback)(struct virtqueue *),
2114                                                const char *name,
2115                                                struct device *dma_dev)
2116 {
2117         struct vring_virtqueue *vq;
2118         int err;
2119
2120         vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2121         if (!vq)
2122                 return NULL;
2123
2124         vq->vq.callback = callback;
2125         vq->vq.vdev = vdev;
2126         vq->vq.name = name;
2127         vq->vq.index = index;
2128         vq->vq.reset = false;
2129         vq->we_own_ring = false;
2130         vq->notify = notify;
2131         vq->weak_barriers = weak_barriers;
2132 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2133         vq->broken = true;
2134 #else
2135         vq->broken = false;
2136 #endif
2137         vq->packed_ring = true;
2138         vq->dma_dev = dma_dev;
2139         vq->use_dma_api = vring_use_dma_api(vdev);
2140
2141         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2142                 !context;
2143         vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2144
2145         if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2146                 vq->weak_barriers = false;
2147
2148         err = vring_alloc_state_extra_packed(vring_packed);
2149         if (err) {
2150                 kfree(vq);
2151                 return NULL;
2152         }
2153
2154         virtqueue_vring_init_packed(vring_packed, !!callback);
2155
2156         virtqueue_init(vq, vring_packed->vring.num);
2157         virtqueue_vring_attach_packed(vq, vring_packed);
2158
2159         spin_lock(&vdev->vqs_list_lock);
2160         list_add_tail(&vq->vq.list, &vdev->vqs);
2161         spin_unlock(&vdev->vqs_list_lock);
2162         return &vq->vq;
2163 }
2164
2165 static struct virtqueue *vring_create_virtqueue_packed(
2166         unsigned int index,
2167         unsigned int num,
2168         unsigned int vring_align,
2169         struct virtio_device *vdev,
2170         bool weak_barriers,
2171         bool may_reduce_num,
2172         bool context,
2173         bool (*notify)(struct virtqueue *),
2174         void (*callback)(struct virtqueue *),
2175         const char *name,
2176         struct device *dma_dev)
2177 {
2178         struct vring_virtqueue_packed vring_packed = {};
2179         struct virtqueue *vq;
2180
2181         if (vring_alloc_queue_packed(&vring_packed, vdev, num, dma_dev))
2182                 return NULL;
2183
2184         vq = __vring_new_virtqueue_packed(index, &vring_packed, vdev, weak_barriers,
2185                                         context, notify, callback, name, dma_dev);
2186         if (!vq) {
2187                 vring_free_packed(&vring_packed, vdev, dma_dev);
2188                 return NULL;
2189         }
2190
2191         to_vvq(vq)->we_own_ring = true;
2192
2193         return vq;
2194 }
2195
2196 static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
2197 {
2198         struct vring_virtqueue_packed vring_packed = {};
2199         struct vring_virtqueue *vq = to_vvq(_vq);
2200         struct virtio_device *vdev = _vq->vdev;
2201         int err;
2202
2203         if (vring_alloc_queue_packed(&vring_packed, vdev, num, vring_dma_dev(vq)))
2204                 goto err_ring;
2205
2206         err = vring_alloc_state_extra_packed(&vring_packed);
2207         if (err)
2208                 goto err_state_extra;
2209
2210         vring_free(&vq->vq);
2211
2212         virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback);
2213
2214         virtqueue_init(vq, vring_packed.vring.num);
2215         virtqueue_vring_attach_packed(vq, &vring_packed);
2216
2217         return 0;
2218
2219 err_state_extra:
2220         vring_free_packed(&vring_packed, vdev, vring_dma_dev(vq));
2221 err_ring:
2222         virtqueue_reinit_packed(vq);
2223         return -ENOMEM;
2224 }
2225
2226 static int virtqueue_disable_and_recycle(struct virtqueue *_vq,
2227                                          void (*recycle)(struct virtqueue *vq, void *buf))
2228 {
2229         struct vring_virtqueue *vq = to_vvq(_vq);
2230         struct virtio_device *vdev = vq->vq.vdev;
2231         void *buf;
2232         int err;
2233
2234         if (!vq->we_own_ring)
2235                 return -EPERM;
2236
2237         if (!vdev->config->disable_vq_and_reset)
2238                 return -ENOENT;
2239
2240         if (!vdev->config->enable_vq_after_reset)
2241                 return -ENOENT;
2242
2243         err = vdev->config->disable_vq_and_reset(_vq);
2244         if (err)
2245                 return err;
2246
2247         while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
2248                 recycle(_vq, buf);
2249
2250         return 0;
2251 }
2252
2253 static int virtqueue_enable_after_reset(struct virtqueue *_vq)
2254 {
2255         struct vring_virtqueue *vq = to_vvq(_vq);
2256         struct virtio_device *vdev = vq->vq.vdev;
2257
2258         if (vdev->config->enable_vq_after_reset(_vq))
2259                 return -EBUSY;
2260
2261         return 0;
2262 }
2263
2264 /*
2265  * Generic functions and exported symbols.
2266  */
2267
2268 static inline int virtqueue_add(struct virtqueue *_vq,
2269                                 struct scatterlist *sgs[],
2270                                 unsigned int total_sg,
2271                                 unsigned int out_sgs,
2272                                 unsigned int in_sgs,
2273                                 void *data,
2274                                 void *ctx,
2275                                 bool premapped,
2276                                 gfp_t gfp)
2277 {
2278         struct vring_virtqueue *vq = to_vvq(_vq);
2279
2280         return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
2281                                         out_sgs, in_sgs, data, ctx, premapped, gfp) :
2282                                  virtqueue_add_split(_vq, sgs, total_sg,
2283                                         out_sgs, in_sgs, data, ctx, premapped, gfp);
2284 }
2285
2286 /**
2287  * virtqueue_add_sgs - expose buffers to other end
2288  * @_vq: the struct virtqueue we're talking about.
2289  * @sgs: array of terminated scatterlists.
2290  * @out_sgs: the number of scatterlists readable by other side
2291  * @in_sgs: the number of scatterlists which are writable (after readable ones)
2292  * @data: the token identifying the buffer.
2293  * @gfp: how to do memory allocations (if necessary).
2294  *
2295  * Caller must ensure we don't call this with other virtqueue operations
2296  * at the same time (except where noted).
2297  *
2298  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2299  */
2300 int virtqueue_add_sgs(struct virtqueue *_vq,
2301                       struct scatterlist *sgs[],
2302                       unsigned int out_sgs,
2303                       unsigned int in_sgs,
2304                       void *data,
2305                       gfp_t gfp)
2306 {
2307         unsigned int i, total_sg = 0;
2308
2309         /* Count them first. */
2310         for (i = 0; i < out_sgs + in_sgs; i++) {
2311                 struct scatterlist *sg;
2312
2313                 for (sg = sgs[i]; sg; sg = sg_next(sg))
2314                         total_sg++;
2315         }
2316         return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
2317                              data, NULL, false, gfp);
2318 }
2319 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
2320
2321 /**
2322  * virtqueue_add_outbuf - expose output buffers to other end
2323  * @vq: the struct virtqueue we're talking about.
2324  * @sg: scatterlist (must be well-formed and terminated!)
2325  * @num: the number of entries in @sg readable by other side
2326  * @data: the token identifying the buffer.
2327  * @gfp: how to do memory allocations (if necessary).
2328  *
2329  * Caller must ensure we don't call this with other virtqueue operations
2330  * at the same time (except where noted).
2331  *
2332  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2333  */
2334 int virtqueue_add_outbuf(struct virtqueue *vq,
2335                          struct scatterlist *sg, unsigned int num,
2336                          void *data,
2337                          gfp_t gfp)
2338 {
2339         return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, false, gfp);
2340 }
2341 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
2342
2343 /**
2344  * virtqueue_add_outbuf_premapped - expose output buffers to other end
2345  * @vq: the struct virtqueue we're talking about.
2346  * @sg: scatterlist (must be well-formed and terminated!)
2347  * @num: the number of entries in @sg readable by other side
2348  * @data: the token identifying the buffer.
2349  * @gfp: how to do memory allocations (if necessary).
2350  *
2351  * Caller must ensure we don't call this with other virtqueue operations
2352  * at the same time (except where noted).
2353  *
2354  * Return:
2355  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2356  */
2357 int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
2358                                    struct scatterlist *sg, unsigned int num,
2359                                    void *data,
2360                                    gfp_t gfp)
2361 {
2362         return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, true, gfp);
2363 }
2364 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf_premapped);
2365
2366 /**
2367  * virtqueue_add_inbuf - expose input buffers to other end
2368  * @vq: the struct virtqueue we're talking about.
2369  * @sg: scatterlist (must be well-formed and terminated!)
2370  * @num: the number of entries in @sg writable by other side
2371  * @data: the token identifying the buffer.
2372  * @gfp: how to do memory allocations (if necessary).
2373  *
2374  * Caller must ensure we don't call this with other virtqueue operations
2375  * at the same time (except where noted).
2376  *
2377  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2378  */
2379 int virtqueue_add_inbuf(struct virtqueue *vq,
2380                         struct scatterlist *sg, unsigned int num,
2381                         void *data,
2382                         gfp_t gfp)
2383 {
2384         return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp);
2385 }
2386 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
2387
2388 /**
2389  * virtqueue_add_inbuf_ctx - expose input buffers to other end
2390  * @vq: the struct virtqueue we're talking about.
2391  * @sg: scatterlist (must be well-formed and terminated!)
2392  * @num: the number of entries in @sg writable by other side
2393  * @data: the token identifying the buffer.
2394  * @ctx: extra context for the token
2395  * @gfp: how to do memory allocations (if necessary).
2396  *
2397  * Caller must ensure we don't call this with other virtqueue operations
2398  * at the same time (except where noted).
2399  *
2400  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2401  */
2402 int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
2403                         struct scatterlist *sg, unsigned int num,
2404                         void *data,
2405                         void *ctx,
2406                         gfp_t gfp)
2407 {
2408         return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, false, gfp);
2409 }
2410 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
2411
2412 /**
2413  * virtqueue_add_inbuf_premapped - expose input buffers to other end
2414  * @vq: the struct virtqueue we're talking about.
2415  * @sg: scatterlist (must be well-formed and terminated!)
2416  * @num: the number of entries in @sg writable by other side
2417  * @data: the token identifying the buffer.
2418  * @ctx: extra context for the token
2419  * @gfp: how to do memory allocations (if necessary).
2420  *
2421  * Caller must ensure we don't call this with other virtqueue operations
2422  * at the same time (except where noted).
2423  *
2424  * Return:
2425  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2426  */
2427 int virtqueue_add_inbuf_premapped(struct virtqueue *vq,
2428                                   struct scatterlist *sg, unsigned int num,
2429                                   void *data,
2430                                   void *ctx,
2431                                   gfp_t gfp)
2432 {
2433         return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, true, gfp);
2434 }
2435 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_premapped);
2436
2437 /**
2438  * virtqueue_dma_dev - get the dma dev
2439  * @_vq: the struct virtqueue we're talking about.
2440  *
2441  * Returns the dma dev. That can been used for dma api.
2442  */
2443 struct device *virtqueue_dma_dev(struct virtqueue *_vq)
2444 {
2445         struct vring_virtqueue *vq = to_vvq(_vq);
2446
2447         if (vq->use_dma_api)
2448                 return vring_dma_dev(vq);
2449         else
2450                 return NULL;
2451 }
2452 EXPORT_SYMBOL_GPL(virtqueue_dma_dev);
2453
2454 /**
2455  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
2456  * @_vq: the struct virtqueue
2457  *
2458  * Instead of virtqueue_kick(), you can do:
2459  *      if (virtqueue_kick_prepare(vq))
2460  *              virtqueue_notify(vq);
2461  *
2462  * This is sometimes useful because the virtqueue_kick_prepare() needs
2463  * to be serialized, but the actual virtqueue_notify() call does not.
2464  */
2465 bool virtqueue_kick_prepare(struct virtqueue *_vq)
2466 {
2467         struct vring_virtqueue *vq = to_vvq(_vq);
2468
2469         return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
2470                                  virtqueue_kick_prepare_split(_vq);
2471 }
2472 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
2473
2474 /**
2475  * virtqueue_notify - second half of split virtqueue_kick call.
2476  * @_vq: the struct virtqueue
2477  *
2478  * This does not need to be serialized.
2479  *
2480  * Returns false if host notify failed or queue is broken, otherwise true.
2481  */
2482 bool virtqueue_notify(struct virtqueue *_vq)
2483 {
2484         struct vring_virtqueue *vq = to_vvq(_vq);
2485
2486         if (unlikely(vq->broken))
2487                 return false;
2488
2489         /* Prod other side to tell it about changes. */
2490         if (!vq->notify(_vq)) {
2491                 vq->broken = true;
2492                 return false;
2493         }
2494         return true;
2495 }
2496 EXPORT_SYMBOL_GPL(virtqueue_notify);
2497
2498 /**
2499  * virtqueue_kick - update after add_buf
2500  * @vq: the struct virtqueue
2501  *
2502  * After one or more virtqueue_add_* calls, invoke this to kick
2503  * the other side.
2504  *
2505  * Caller must ensure we don't call this with other virtqueue
2506  * operations at the same time (except where noted).
2507  *
2508  * Returns false if kick failed, otherwise true.
2509  */
2510 bool virtqueue_kick(struct virtqueue *vq)
2511 {
2512         if (virtqueue_kick_prepare(vq))
2513                 return virtqueue_notify(vq);
2514         return true;
2515 }
2516 EXPORT_SYMBOL_GPL(virtqueue_kick);
2517
2518 /**
2519  * virtqueue_get_buf_ctx - get the next used buffer
2520  * @_vq: the struct virtqueue we're talking about.
2521  * @len: the length written into the buffer
2522  * @ctx: extra context for the token
2523  *
2524  * If the device wrote data into the buffer, @len will be set to the
2525  * amount written.  This means you don't need to clear the buffer
2526  * beforehand to ensure there's no data leakage in the case of short
2527  * writes.
2528  *
2529  * Caller must ensure we don't call this with other virtqueue
2530  * operations at the same time (except where noted).
2531  *
2532  * Returns NULL if there are no used buffers, or the "data" token
2533  * handed to virtqueue_add_*().
2534  */
2535 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
2536                             void **ctx)
2537 {
2538         struct vring_virtqueue *vq = to_vvq(_vq);
2539
2540         return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
2541                                  virtqueue_get_buf_ctx_split(_vq, len, ctx);
2542 }
2543 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
2544
2545 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
2546 {
2547         return virtqueue_get_buf_ctx(_vq, len, NULL);
2548 }
2549 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
2550 /**
2551  * virtqueue_disable_cb - disable callbacks
2552  * @_vq: the struct virtqueue we're talking about.
2553  *
2554  * Note that this is not necessarily synchronous, hence unreliable and only
2555  * useful as an optimization.
2556  *
2557  * Unlike other operations, this need not be serialized.
2558  */
2559 void virtqueue_disable_cb(struct virtqueue *_vq)
2560 {
2561         struct vring_virtqueue *vq = to_vvq(_vq);
2562
2563         if (vq->packed_ring)
2564                 virtqueue_disable_cb_packed(_vq);
2565         else
2566                 virtqueue_disable_cb_split(_vq);
2567 }
2568 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2569
2570 /**
2571  * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
2572  * @_vq: the struct virtqueue we're talking about.
2573  *
2574  * This re-enables callbacks; it returns current queue state
2575  * in an opaque unsigned value. This value should be later tested by
2576  * virtqueue_poll, to detect a possible race between the driver checking for
2577  * more work, and enabling callbacks.
2578  *
2579  * Caller must ensure we don't call this with other virtqueue
2580  * operations at the same time (except where noted).
2581  */
2582 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
2583 {
2584         struct vring_virtqueue *vq = to_vvq(_vq);
2585
2586         if (vq->event_triggered)
2587                 vq->event_triggered = false;
2588
2589         return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2590                                  virtqueue_enable_cb_prepare_split(_vq);
2591 }
2592 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2593
2594 /**
2595  * virtqueue_poll - query pending used buffers
2596  * @_vq: the struct virtqueue we're talking about.
2597  * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2598  *
2599  * Returns "true" if there are pending used buffers in the queue.
2600  *
2601  * This does not need to be serialized.
2602  */
2603 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
2604 {
2605         struct vring_virtqueue *vq = to_vvq(_vq);
2606
2607         if (unlikely(vq->broken))
2608                 return false;
2609
2610         virtio_mb(vq->weak_barriers);
2611         return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2612                                  virtqueue_poll_split(_vq, last_used_idx);
2613 }
2614 EXPORT_SYMBOL_GPL(virtqueue_poll);
2615
2616 /**
2617  * virtqueue_enable_cb - restart callbacks after disable_cb.
2618  * @_vq: the struct virtqueue we're talking about.
2619  *
2620  * This re-enables callbacks; it returns "false" if there are pending
2621  * buffers in the queue, to detect a possible race between the driver
2622  * checking for more work, and enabling callbacks.
2623  *
2624  * Caller must ensure we don't call this with other virtqueue
2625  * operations at the same time (except where noted).
2626  */
2627 bool virtqueue_enable_cb(struct virtqueue *_vq)
2628 {
2629         unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
2630
2631         return !virtqueue_poll(_vq, last_used_idx);
2632 }
2633 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2634
2635 /**
2636  * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
2637  * @_vq: the struct virtqueue we're talking about.
2638  *
2639  * This re-enables callbacks but hints to the other side to delay
2640  * interrupts until most of the available buffers have been processed;
2641  * it returns "false" if there are many pending buffers in the queue,
2642  * to detect a possible race between the driver checking for more work,
2643  * and enabling callbacks.
2644  *
2645  * Caller must ensure we don't call this with other virtqueue
2646  * operations at the same time (except where noted).
2647  */
2648 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2649 {
2650         struct vring_virtqueue *vq = to_vvq(_vq);
2651
2652         if (vq->event_triggered)
2653                 vq->event_triggered = false;
2654
2655         return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2656                                  virtqueue_enable_cb_delayed_split(_vq);
2657 }
2658 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2659
2660 /**
2661  * virtqueue_detach_unused_buf - detach first unused buffer
2662  * @_vq: the struct virtqueue we're talking about.
2663  *
2664  * Returns NULL or the "data" token handed to virtqueue_add_*().
2665  * This is not valid on an active queue; it is useful for device
2666  * shutdown or the reset queue.
2667  */
2668 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2669 {
2670         struct vring_virtqueue *vq = to_vvq(_vq);
2671
2672         return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2673                                  virtqueue_detach_unused_buf_split(_vq);
2674 }
2675 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2676
2677 static inline bool more_used(const struct vring_virtqueue *vq)
2678 {
2679         return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2680 }
2681
2682 /**
2683  * vring_interrupt - notify a virtqueue on an interrupt
2684  * @irq: the IRQ number (ignored)
2685  * @_vq: the struct virtqueue to notify
2686  *
2687  * Calls the callback function of @_vq to process the virtqueue
2688  * notification.
2689  */
2690 irqreturn_t vring_interrupt(int irq, void *_vq)
2691 {
2692         struct vring_virtqueue *vq = to_vvq(_vq);
2693
2694         if (!more_used(vq)) {
2695                 pr_debug("virtqueue interrupt with no work for %p\n", vq);
2696                 return IRQ_NONE;
2697         }
2698
2699         if (unlikely(vq->broken)) {
2700 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2701                 dev_warn_once(&vq->vq.vdev->dev,
2702                               "virtio vring IRQ raised before DRIVER_OK");
2703                 return IRQ_NONE;
2704 #else
2705                 return IRQ_HANDLED;
2706 #endif
2707         }
2708
2709         /* Just a hint for performance: so it's ok that this can be racy! */
2710         if (vq->event)
2711                 data_race(vq->event_triggered = true);
2712
2713         pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2714         if (vq->vq.callback)
2715                 vq->vq.callback(&vq->vq);
2716
2717         return IRQ_HANDLED;
2718 }
2719 EXPORT_SYMBOL_GPL(vring_interrupt);
2720
2721 struct virtqueue *vring_create_virtqueue(
2722         unsigned int index,
2723         unsigned int num,
2724         unsigned int vring_align,
2725         struct virtio_device *vdev,
2726         bool weak_barriers,
2727         bool may_reduce_num,
2728         bool context,
2729         bool (*notify)(struct virtqueue *),
2730         void (*callback)(struct virtqueue *),
2731         const char *name)
2732 {
2733
2734         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2735                 return vring_create_virtqueue_packed(index, num, vring_align,
2736                                 vdev, weak_barriers, may_reduce_num,
2737                                 context, notify, callback, name, vdev->dev.parent);
2738
2739         return vring_create_virtqueue_split(index, num, vring_align,
2740                         vdev, weak_barriers, may_reduce_num,
2741                         context, notify, callback, name, vdev->dev.parent);
2742 }
2743 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2744
2745 struct virtqueue *vring_create_virtqueue_dma(
2746         unsigned int index,
2747         unsigned int num,
2748         unsigned int vring_align,
2749         struct virtio_device *vdev,
2750         bool weak_barriers,
2751         bool may_reduce_num,
2752         bool context,
2753         bool (*notify)(struct virtqueue *),
2754         void (*callback)(struct virtqueue *),
2755         const char *name,
2756         struct device *dma_dev)
2757 {
2758
2759         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2760                 return vring_create_virtqueue_packed(index, num, vring_align,
2761                                 vdev, weak_barriers, may_reduce_num,
2762                                 context, notify, callback, name, dma_dev);
2763
2764         return vring_create_virtqueue_split(index, num, vring_align,
2765                         vdev, weak_barriers, may_reduce_num,
2766                         context, notify, callback, name, dma_dev);
2767 }
2768 EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma);
2769
2770 /**
2771  * virtqueue_resize - resize the vring of vq
2772  * @_vq: the struct virtqueue we're talking about.
2773  * @num: new ring num
2774  * @recycle: callback to recycle unused buffers
2775  * @recycle_done: callback to be invoked when recycle for all unused buffers done
2776  *
2777  * When it is really necessary to create a new vring, it will set the current vq
2778  * into the reset state. Then call the passed callback to recycle the buffer
2779  * that is no longer used. Only after the new vring is successfully created, the
2780  * old vring will be released.
2781  *
2782  * Caller must ensure we don't call this with other virtqueue operations
2783  * at the same time (except where noted).
2784  *
2785  * Returns zero or a negative error.
2786  * 0: success.
2787  * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size.
2788  *  vq can still work normally
2789  * -EBUSY: Failed to sync with device, vq may not work properly
2790  * -ENOENT: Transport or device not supported
2791  * -E2BIG/-EINVAL: num error
2792  * -EPERM: Operation not permitted
2793  *
2794  */
2795 int virtqueue_resize(struct virtqueue *_vq, u32 num,
2796                      void (*recycle)(struct virtqueue *vq, void *buf),
2797                      void (*recycle_done)(struct virtqueue *vq))
2798 {
2799         struct vring_virtqueue *vq = to_vvq(_vq);
2800         int err;
2801
2802         if (num > vq->vq.num_max)
2803                 return -E2BIG;
2804
2805         if (!num)
2806                 return -EINVAL;
2807
2808         if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num)
2809                 return 0;
2810
2811         err = virtqueue_disable_and_recycle(_vq, recycle);
2812         if (err)
2813                 return err;
2814         if (recycle_done)
2815                 recycle_done(_vq);
2816
2817         if (vq->packed_ring)
2818                 err = virtqueue_resize_packed(_vq, num);
2819         else
2820                 err = virtqueue_resize_split(_vq, num);
2821
2822         return virtqueue_enable_after_reset(_vq);
2823 }
2824 EXPORT_SYMBOL_GPL(virtqueue_resize);
2825
2826 /**
2827  * virtqueue_reset - detach and recycle all unused buffers
2828  * @_vq: the struct virtqueue we're talking about.
2829  * @recycle: callback to recycle unused buffers
2830  * @recycle_done: callback to be invoked when recycle for all unused buffers done
2831  *
2832  * Caller must ensure we don't call this with other virtqueue operations
2833  * at the same time (except where noted).
2834  *
2835  * Returns zero or a negative error.
2836  * 0: success.
2837  * -EBUSY: Failed to sync with device, vq may not work properly
2838  * -ENOENT: Transport or device not supported
2839  * -EPERM: Operation not permitted
2840  */
2841 int virtqueue_reset(struct virtqueue *_vq,
2842                     void (*recycle)(struct virtqueue *vq, void *buf),
2843                     void (*recycle_done)(struct virtqueue *vq))
2844 {
2845         struct vring_virtqueue *vq = to_vvq(_vq);
2846         int err;
2847
2848         err = virtqueue_disable_and_recycle(_vq, recycle);
2849         if (err)
2850                 return err;
2851         if (recycle_done)
2852                 recycle_done(_vq);
2853
2854         if (vq->packed_ring)
2855                 virtqueue_reinit_packed(vq);
2856         else
2857                 virtqueue_reinit_split(vq);
2858
2859         return virtqueue_enable_after_reset(_vq);
2860 }
2861 EXPORT_SYMBOL_GPL(virtqueue_reset);
2862
2863 struct virtqueue *vring_new_virtqueue(unsigned int index,
2864                                       unsigned int num,
2865                                       unsigned int vring_align,
2866                                       struct virtio_device *vdev,
2867                                       bool weak_barriers,
2868                                       bool context,
2869                                       void *pages,
2870                                       bool (*notify)(struct virtqueue *vq),
2871                                       void (*callback)(struct virtqueue *vq),
2872                                       const char *name)
2873 {
2874         struct vring_virtqueue_split vring_split = {};
2875
2876         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2877                 struct vring_virtqueue_packed vring_packed = {};
2878
2879                 vring_packed.vring.num = num;
2880                 vring_packed.vring.desc = pages;
2881                 return __vring_new_virtqueue_packed(index, &vring_packed,
2882                                                     vdev, weak_barriers,
2883                                                     context, notify, callback,
2884                                                     name, vdev->dev.parent);
2885         }
2886
2887         vring_init(&vring_split.vring, num, pages, vring_align);
2888         return __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers,
2889                                      context, notify, callback, name,
2890                                      vdev->dev.parent);
2891 }
2892 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2893
2894 static void vring_free(struct virtqueue *_vq)
2895 {
2896         struct vring_virtqueue *vq = to_vvq(_vq);
2897
2898         if (vq->we_own_ring) {
2899                 if (vq->packed_ring) {
2900                         vring_free_queue(vq->vq.vdev,
2901                                          vq->packed.ring_size_in_bytes,
2902                                          vq->packed.vring.desc,
2903                                          vq->packed.ring_dma_addr,
2904                                          vring_dma_dev(vq));
2905
2906                         vring_free_queue(vq->vq.vdev,
2907                                          vq->packed.event_size_in_bytes,
2908                                          vq->packed.vring.driver,
2909                                          vq->packed.driver_event_dma_addr,
2910                                          vring_dma_dev(vq));
2911
2912                         vring_free_queue(vq->vq.vdev,
2913                                          vq->packed.event_size_in_bytes,
2914                                          vq->packed.vring.device,
2915                                          vq->packed.device_event_dma_addr,
2916                                          vring_dma_dev(vq));
2917
2918                         kfree(vq->packed.desc_state);
2919                         kfree(vq->packed.desc_extra);
2920                 } else {
2921                         vring_free_queue(vq->vq.vdev,
2922                                          vq->split.queue_size_in_bytes,
2923                                          vq->split.vring.desc,
2924                                          vq->split.queue_dma_addr,
2925                                          vring_dma_dev(vq));
2926                 }
2927         }
2928         if (!vq->packed_ring) {
2929                 kfree(vq->split.desc_state);
2930                 kfree(vq->split.desc_extra);
2931         }
2932 }
2933
2934 void vring_del_virtqueue(struct virtqueue *_vq)
2935 {
2936         struct vring_virtqueue *vq = to_vvq(_vq);
2937
2938         spin_lock(&vq->vq.vdev->vqs_list_lock);
2939         list_del(&_vq->list);
2940         spin_unlock(&vq->vq.vdev->vqs_list_lock);
2941
2942         vring_free(_vq);
2943
2944         kfree(vq);
2945 }
2946 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2947
2948 u32 vring_notification_data(struct virtqueue *_vq)
2949 {
2950         struct vring_virtqueue *vq = to_vvq(_vq);
2951         u16 next;
2952
2953         if (vq->packed_ring)
2954                 next = (vq->packed.next_avail_idx &
2955                                 ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) |
2956                         vq->packed.avail_wrap_counter <<
2957                                 VRING_PACKED_EVENT_F_WRAP_CTR;
2958         else
2959                 next = vq->split.avail_idx_shadow;
2960
2961         return next << 16 | _vq->index;
2962 }
2963 EXPORT_SYMBOL_GPL(vring_notification_data);
2964
2965 /* Manipulates transport-specific feature bits. */
2966 void vring_transport_features(struct virtio_device *vdev)
2967 {
2968         unsigned int i;
2969
2970         for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2971                 switch (i) {
2972                 case VIRTIO_RING_F_INDIRECT_DESC:
2973                         break;
2974                 case VIRTIO_RING_F_EVENT_IDX:
2975                         break;
2976                 case VIRTIO_F_VERSION_1:
2977                         break;
2978                 case VIRTIO_F_ACCESS_PLATFORM:
2979                         break;
2980                 case VIRTIO_F_RING_PACKED:
2981                         break;
2982                 case VIRTIO_F_ORDER_PLATFORM:
2983                         break;
2984                 case VIRTIO_F_NOTIFICATION_DATA:
2985                         break;
2986                 default:
2987                         /* We don't understand this bit. */
2988                         __virtio_clear_bit(vdev, i);
2989                 }
2990         }
2991 }
2992 EXPORT_SYMBOL_GPL(vring_transport_features);
2993
2994 /**
2995  * virtqueue_get_vring_size - return the size of the virtqueue's vring
2996  * @_vq: the struct virtqueue containing the vring of interest.
2997  *
2998  * Returns the size of the vring.  This is mainly used for boasting to
2999  * userspace.  Unlike other operations, this need not be serialized.
3000  */
3001 unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq)
3002 {
3003
3004         const struct vring_virtqueue *vq = to_vvq(_vq);
3005
3006         return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
3007 }
3008 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
3009
3010 /*
3011  * This function should only be called by the core, not directly by the driver.
3012  */
3013 void __virtqueue_break(struct virtqueue *_vq)
3014 {
3015         struct vring_virtqueue *vq = to_vvq(_vq);
3016
3017         /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3018         WRITE_ONCE(vq->broken, true);
3019 }
3020 EXPORT_SYMBOL_GPL(__virtqueue_break);
3021
3022 /*
3023  * This function should only be called by the core, not directly by the driver.
3024  */
3025 void __virtqueue_unbreak(struct virtqueue *_vq)
3026 {
3027         struct vring_virtqueue *vq = to_vvq(_vq);
3028
3029         /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3030         WRITE_ONCE(vq->broken, false);
3031 }
3032 EXPORT_SYMBOL_GPL(__virtqueue_unbreak);
3033
3034 bool virtqueue_is_broken(const struct virtqueue *_vq)
3035 {
3036         const struct vring_virtqueue *vq = to_vvq(_vq);
3037
3038         return READ_ONCE(vq->broken);
3039 }
3040 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
3041
3042 /*
3043  * This should prevent the device from being used, allowing drivers to
3044  * recover.  You may need to grab appropriate locks to flush.
3045  */
3046 void virtio_break_device(struct virtio_device *dev)
3047 {
3048         struct virtqueue *_vq;
3049
3050         spin_lock(&dev->vqs_list_lock);
3051         list_for_each_entry(_vq, &dev->vqs, list) {
3052                 struct vring_virtqueue *vq = to_vvq(_vq);
3053
3054                 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3055                 WRITE_ONCE(vq->broken, true);
3056         }
3057         spin_unlock(&dev->vqs_list_lock);
3058 }
3059 EXPORT_SYMBOL_GPL(virtio_break_device);
3060
3061 /*
3062  * This should allow the device to be used by the driver. You may
3063  * need to grab appropriate locks to flush the write to
3064  * vq->broken. This should only be used in some specific case e.g
3065  * (probing and restoring). This function should only be called by the
3066  * core, not directly by the driver.
3067  */
3068 void __virtio_unbreak_device(struct virtio_device *dev)
3069 {
3070         struct virtqueue *_vq;
3071
3072         spin_lock(&dev->vqs_list_lock);
3073         list_for_each_entry(_vq, &dev->vqs, list) {
3074                 struct vring_virtqueue *vq = to_vvq(_vq);
3075
3076                 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3077                 WRITE_ONCE(vq->broken, false);
3078         }
3079         spin_unlock(&dev->vqs_list_lock);
3080 }
3081 EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
3082
3083 dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq)
3084 {
3085         const struct vring_virtqueue *vq = to_vvq(_vq);
3086
3087         BUG_ON(!vq->we_own_ring);
3088
3089         if (vq->packed_ring)
3090                 return vq->packed.ring_dma_addr;
3091
3092         return vq->split.queue_dma_addr;
3093 }
3094 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
3095
3096 dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq)
3097 {
3098         const struct vring_virtqueue *vq = to_vvq(_vq);
3099
3100         BUG_ON(!vq->we_own_ring);
3101
3102         if (vq->packed_ring)
3103                 return vq->packed.driver_event_dma_addr;
3104
3105         return vq->split.queue_dma_addr +
3106                 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
3107 }
3108 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
3109
3110 dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq)
3111 {
3112         const struct vring_virtqueue *vq = to_vvq(_vq);
3113
3114         BUG_ON(!vq->we_own_ring);
3115
3116         if (vq->packed_ring)
3117                 return vq->packed.device_event_dma_addr;
3118
3119         return vq->split.queue_dma_addr +
3120                 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
3121 }
3122 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
3123
3124 /* Only available for split ring */
3125 const struct vring *virtqueue_get_vring(const struct virtqueue *vq)
3126 {
3127         return &to_vvq(vq)->split.vring;
3128 }
3129 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
3130
3131 /**
3132  * virtqueue_dma_map_single_attrs - map DMA for _vq
3133  * @_vq: the struct virtqueue we're talking about.
3134  * @ptr: the pointer of the buffer to do dma
3135  * @size: the size of the buffer to do dma
3136  * @dir: DMA direction
3137  * @attrs: DMA Attrs
3138  *
3139  * The caller calls this to do dma mapping in advance. The DMA address can be
3140  * passed to this _vq when it is in pre-mapped mode.
3141  *
3142  * return DMA address. Caller should check that by virtqueue_dma_mapping_error().
3143  */
3144 dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr,
3145                                           size_t size,
3146                                           enum dma_data_direction dir,
3147                                           unsigned long attrs)
3148 {
3149         struct vring_virtqueue *vq = to_vvq(_vq);
3150
3151         if (!vq->use_dma_api) {
3152                 kmsan_handle_dma(virt_to_page(ptr), offset_in_page(ptr), size, dir);
3153                 return (dma_addr_t)virt_to_phys(ptr);
3154         }
3155
3156         return dma_map_single_attrs(vring_dma_dev(vq), ptr, size, dir, attrs);
3157 }
3158 EXPORT_SYMBOL_GPL(virtqueue_dma_map_single_attrs);
3159
3160 /**
3161  * virtqueue_dma_unmap_single_attrs - unmap DMA for _vq
3162  * @_vq: the struct virtqueue we're talking about.
3163  * @addr: the dma address to unmap
3164  * @size: the size of the buffer
3165  * @dir: DMA direction
3166  * @attrs: DMA Attrs
3167  *
3168  * Unmap the address that is mapped by the virtqueue_dma_map_* APIs.
3169  *
3170  */
3171 void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr,
3172                                       size_t size, enum dma_data_direction dir,
3173                                       unsigned long attrs)
3174 {
3175         struct vring_virtqueue *vq = to_vvq(_vq);
3176
3177         if (!vq->use_dma_api)
3178                 return;
3179
3180         dma_unmap_single_attrs(vring_dma_dev(vq), addr, size, dir, attrs);
3181 }
3182 EXPORT_SYMBOL_GPL(virtqueue_dma_unmap_single_attrs);
3183
3184 /**
3185  * virtqueue_dma_mapping_error - check dma address
3186  * @_vq: the struct virtqueue we're talking about.
3187  * @addr: DMA address
3188  *
3189  * Returns 0 means dma valid. Other means invalid dma address.
3190  */
3191 int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr)
3192 {
3193         struct vring_virtqueue *vq = to_vvq(_vq);
3194
3195         if (!vq->use_dma_api)
3196                 return 0;
3197
3198         return dma_mapping_error(vring_dma_dev(vq), addr);
3199 }
3200 EXPORT_SYMBOL_GPL(virtqueue_dma_mapping_error);
3201
3202 /**
3203  * virtqueue_dma_need_sync - check a dma address needs sync
3204  * @_vq: the struct virtqueue we're talking about.
3205  * @addr: DMA address
3206  *
3207  * Check if the dma address mapped by the virtqueue_dma_map_* APIs needs to be
3208  * synchronized
3209  *
3210  * return bool
3211  */
3212 bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr)
3213 {
3214         struct vring_virtqueue *vq = to_vvq(_vq);
3215
3216         if (!vq->use_dma_api)
3217                 return false;
3218
3219         return dma_need_sync(vring_dma_dev(vq), addr);
3220 }
3221 EXPORT_SYMBOL_GPL(virtqueue_dma_need_sync);
3222
3223 /**
3224  * virtqueue_dma_sync_single_range_for_cpu - dma sync for cpu
3225  * @_vq: the struct virtqueue we're talking about.
3226  * @addr: DMA address
3227  * @offset: DMA address offset
3228  * @size: buf size for sync
3229  * @dir: DMA direction
3230  *
3231  * Before calling this function, use virtqueue_dma_need_sync() to confirm that
3232  * the DMA address really needs to be synchronized
3233  *
3234  */
3235 void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq,
3236                                              dma_addr_t addr,
3237                                              unsigned long offset, size_t size,
3238                                              enum dma_data_direction dir)
3239 {
3240         struct vring_virtqueue *vq = to_vvq(_vq);
3241         struct device *dev = vring_dma_dev(vq);
3242
3243         if (!vq->use_dma_api)
3244                 return;
3245
3246         dma_sync_single_range_for_cpu(dev, addr, offset, size, dir);
3247 }
3248 EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_cpu);
3249
3250 /**
3251  * virtqueue_dma_sync_single_range_for_device - dma sync for device
3252  * @_vq: the struct virtqueue we're talking about.
3253  * @addr: DMA address
3254  * @offset: DMA address offset
3255  * @size: buf size for sync
3256  * @dir: DMA direction
3257  *
3258  * Before calling this function, use virtqueue_dma_need_sync() to confirm that
3259  * the DMA address really needs to be synchronized
3260  */
3261 void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq,
3262                                                 dma_addr_t addr,
3263                                                 unsigned long offset, size_t size,
3264                                                 enum dma_data_direction dir)
3265 {
3266         struct vring_virtqueue *vq = to_vvq(_vq);
3267         struct device *dev = vring_dma_dev(vq);
3268
3269         if (!vq->use_dma_api)
3270                 return;
3271
3272         dma_sync_single_range_for_device(dev, addr, offset, size, dir);
3273 }
3274 EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_device);
3275
3276 MODULE_DESCRIPTION("Virtio ring implementation");
3277 MODULE_LICENSE("GPL");
This page took 0.216917 seconds and 4 git commands to generate.