]> Git Repo - linux.git/blob - drivers/vdpa/vdpa_user/vduse_dev.c
Linux 6.14-rc3
[linux.git] / drivers / vdpa / vdpa_user / vduse_dev.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VDUSE: vDPA Device in Userspace
4  *
5  * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6  *
7  * Author: Xie Yongji <[email protected]>
8  *
9  */
10
11 #include "linux/virtio_net.h"
12 #include <linux/init.h>
13 #include <linux/module.h>
14 #include <linux/cdev.h>
15 #include <linux/device.h>
16 #include <linux/eventfd.h>
17 #include <linux/slab.h>
18 #include <linux/wait.h>
19 #include <linux/dma-map-ops.h>
20 #include <linux/poll.h>
21 #include <linux/file.h>
22 #include <linux/uio.h>
23 #include <linux/vdpa.h>
24 #include <linux/nospec.h>
25 #include <linux/vmalloc.h>
26 #include <linux/sched/mm.h>
27 #include <uapi/linux/vduse.h>
28 #include <uapi/linux/vdpa.h>
29 #include <uapi/linux/virtio_config.h>
30 #include <uapi/linux/virtio_ids.h>
31 #include <uapi/linux/virtio_blk.h>
32 #include <uapi/linux/virtio_ring.h>
33 #include <linux/mod_devicetable.h>
34
35 #include "iova_domain.h"
36
37 #define DRV_AUTHOR   "Yongji Xie <[email protected]>"
38 #define DRV_DESC     "vDPA Device in Userspace"
39 #define DRV_LICENSE  "GPL v2"
40
41 #define VDUSE_DEV_MAX (1U << MINORBITS)
42 #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
43 #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
44 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
45 /* 128 MB reserved for virtqueue creation */
46 #define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024)
47 #define VDUSE_MSG_DEFAULT_TIMEOUT 30
48
49 #define IRQ_UNBOUND -1
50
51 struct vduse_virtqueue {
52         u16 index;
53         u16 num_max;
54         u32 num;
55         u64 desc_addr;
56         u64 driver_addr;
57         u64 device_addr;
58         struct vdpa_vq_state state;
59         bool ready;
60         bool kicked;
61         spinlock_t kick_lock;
62         spinlock_t irq_lock;
63         struct eventfd_ctx *kickfd;
64         struct vdpa_callback cb;
65         struct work_struct inject;
66         struct work_struct kick;
67         int irq_effective_cpu;
68         struct cpumask irq_affinity;
69         struct kobject kobj;
70 };
71
72 struct vduse_dev;
73
74 struct vduse_vdpa {
75         struct vdpa_device vdpa;
76         struct vduse_dev *dev;
77 };
78
79 struct vduse_umem {
80         unsigned long iova;
81         unsigned long npages;
82         struct page **pages;
83         struct mm_struct *mm;
84 };
85
86 struct vduse_dev {
87         struct vduse_vdpa *vdev;
88         struct device *dev;
89         struct vduse_virtqueue **vqs;
90         struct vduse_iova_domain *domain;
91         char *name;
92         struct mutex lock;
93         spinlock_t msg_lock;
94         u64 msg_unique;
95         u32 msg_timeout;
96         wait_queue_head_t waitq;
97         struct list_head send_list;
98         struct list_head recv_list;
99         struct vdpa_callback config_cb;
100         struct work_struct inject;
101         spinlock_t irq_lock;
102         struct rw_semaphore rwsem;
103         int minor;
104         bool broken;
105         bool connected;
106         u64 api_version;
107         u64 device_features;
108         u64 driver_features;
109         u32 device_id;
110         u32 vendor_id;
111         u32 generation;
112         u32 config_size;
113         void *config;
114         u8 status;
115         u32 vq_num;
116         u32 vq_align;
117         struct vduse_umem *umem;
118         struct mutex mem_lock;
119         unsigned int bounce_size;
120         struct mutex domain_lock;
121 };
122
123 struct vduse_dev_msg {
124         struct vduse_dev_request req;
125         struct vduse_dev_response resp;
126         struct list_head list;
127         wait_queue_head_t waitq;
128         bool completed;
129 };
130
131 struct vduse_control {
132         u64 api_version;
133 };
134
135 static DEFINE_MUTEX(vduse_lock);
136 static DEFINE_IDR(vduse_idr);
137
138 static dev_t vduse_major;
139 static struct cdev vduse_ctrl_cdev;
140 static struct cdev vduse_cdev;
141 static struct workqueue_struct *vduse_irq_wq;
142 static struct workqueue_struct *vduse_irq_bound_wq;
143
144 static u32 allowed_device_id[] = {
145         VIRTIO_ID_BLOCK,
146         VIRTIO_ID_NET,
147 };
148
149 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
150 {
151         struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
152
153         return vdev->dev;
154 }
155
156 static inline struct vduse_dev *dev_to_vduse(struct device *dev)
157 {
158         struct vdpa_device *vdpa = dev_to_vdpa(dev);
159
160         return vdpa_to_vduse(vdpa);
161 }
162
163 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
164                                             uint32_t request_id)
165 {
166         struct vduse_dev_msg *msg;
167
168         list_for_each_entry(msg, head, list) {
169                 if (msg->req.request_id == request_id) {
170                         list_del(&msg->list);
171                         return msg;
172                 }
173         }
174
175         return NULL;
176 }
177
178 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
179 {
180         struct vduse_dev_msg *msg = NULL;
181
182         if (!list_empty(head)) {
183                 msg = list_first_entry(head, struct vduse_dev_msg, list);
184                 list_del(&msg->list);
185         }
186
187         return msg;
188 }
189
190 static void vduse_enqueue_msg(struct list_head *head,
191                               struct vduse_dev_msg *msg)
192 {
193         list_add_tail(&msg->list, head);
194 }
195
196 static void vduse_dev_broken(struct vduse_dev *dev)
197 {
198         struct vduse_dev_msg *msg, *tmp;
199
200         if (unlikely(dev->broken))
201                 return;
202
203         list_splice_init(&dev->recv_list, &dev->send_list);
204         list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
205                 list_del(&msg->list);
206                 msg->completed = 1;
207                 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
208                 wake_up(&msg->waitq);
209         }
210         dev->broken = true;
211         wake_up(&dev->waitq);
212 }
213
214 static int vduse_dev_msg_sync(struct vduse_dev *dev,
215                               struct vduse_dev_msg *msg)
216 {
217         int ret;
218
219         if (unlikely(dev->broken))
220                 return -EIO;
221
222         init_waitqueue_head(&msg->waitq);
223         spin_lock(&dev->msg_lock);
224         if (unlikely(dev->broken)) {
225                 spin_unlock(&dev->msg_lock);
226                 return -EIO;
227         }
228         msg->req.request_id = dev->msg_unique++;
229         vduse_enqueue_msg(&dev->send_list, msg);
230         wake_up(&dev->waitq);
231         spin_unlock(&dev->msg_lock);
232         if (dev->msg_timeout)
233                 ret = wait_event_killable_timeout(msg->waitq, msg->completed,
234                                                   (long)dev->msg_timeout * HZ);
235         else
236                 ret = wait_event_killable(msg->waitq, msg->completed);
237
238         spin_lock(&dev->msg_lock);
239         if (!msg->completed) {
240                 list_del(&msg->list);
241                 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
242                 /* Mark the device as malfunction when there is a timeout */
243                 if (!ret)
244                         vduse_dev_broken(dev);
245         }
246         ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
247         spin_unlock(&dev->msg_lock);
248
249         return ret;
250 }
251
252 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
253                                          struct vduse_virtqueue *vq,
254                                          struct vdpa_vq_state_packed *packed)
255 {
256         struct vduse_dev_msg msg = { 0 };
257         int ret;
258
259         msg.req.type = VDUSE_GET_VQ_STATE;
260         msg.req.vq_state.index = vq->index;
261
262         ret = vduse_dev_msg_sync(dev, &msg);
263         if (ret)
264                 return ret;
265
266         packed->last_avail_counter =
267                         msg.resp.vq_state.packed.last_avail_counter & 0x0001;
268         packed->last_avail_idx =
269                         msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
270         packed->last_used_counter =
271                         msg.resp.vq_state.packed.last_used_counter & 0x0001;
272         packed->last_used_idx =
273                         msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
274
275         return 0;
276 }
277
278 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
279                                         struct vduse_virtqueue *vq,
280                                         struct vdpa_vq_state_split *split)
281 {
282         struct vduse_dev_msg msg = { 0 };
283         int ret;
284
285         msg.req.type = VDUSE_GET_VQ_STATE;
286         msg.req.vq_state.index = vq->index;
287
288         ret = vduse_dev_msg_sync(dev, &msg);
289         if (ret)
290                 return ret;
291
292         split->avail_index = msg.resp.vq_state.split.avail_index;
293
294         return 0;
295 }
296
297 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
298 {
299         struct vduse_dev_msg msg = { 0 };
300
301         msg.req.type = VDUSE_SET_STATUS;
302         msg.req.s.status = status;
303
304         return vduse_dev_msg_sync(dev, &msg);
305 }
306
307 static int vduse_dev_update_iotlb(struct vduse_dev *dev,
308                                   u64 start, u64 last)
309 {
310         struct vduse_dev_msg msg = { 0 };
311
312         if (last < start)
313                 return -EINVAL;
314
315         msg.req.type = VDUSE_UPDATE_IOTLB;
316         msg.req.iova.start = start;
317         msg.req.iova.last = last;
318
319         return vduse_dev_msg_sync(dev, &msg);
320 }
321
322 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
323 {
324         struct file *file = iocb->ki_filp;
325         struct vduse_dev *dev = file->private_data;
326         struct vduse_dev_msg *msg;
327         int size = sizeof(struct vduse_dev_request);
328         ssize_t ret;
329
330         if (iov_iter_count(to) < size)
331                 return -EINVAL;
332
333         spin_lock(&dev->msg_lock);
334         while (1) {
335                 msg = vduse_dequeue_msg(&dev->send_list);
336                 if (msg)
337                         break;
338
339                 ret = -EAGAIN;
340                 if (file->f_flags & O_NONBLOCK)
341                         goto unlock;
342
343                 spin_unlock(&dev->msg_lock);
344                 ret = wait_event_interruptible_exclusive(dev->waitq,
345                                         !list_empty(&dev->send_list));
346                 if (ret)
347                         return ret;
348
349                 spin_lock(&dev->msg_lock);
350         }
351         spin_unlock(&dev->msg_lock);
352         ret = copy_to_iter(&msg->req, size, to);
353         spin_lock(&dev->msg_lock);
354         if (ret != size) {
355                 ret = -EFAULT;
356                 vduse_enqueue_msg(&dev->send_list, msg);
357                 goto unlock;
358         }
359         vduse_enqueue_msg(&dev->recv_list, msg);
360 unlock:
361         spin_unlock(&dev->msg_lock);
362
363         return ret;
364 }
365
366 static bool is_mem_zero(const char *ptr, int size)
367 {
368         int i;
369
370         for (i = 0; i < size; i++) {
371                 if (ptr[i])
372                         return false;
373         }
374         return true;
375 }
376
377 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
378 {
379         struct file *file = iocb->ki_filp;
380         struct vduse_dev *dev = file->private_data;
381         struct vduse_dev_response resp;
382         struct vduse_dev_msg *msg;
383         size_t ret;
384
385         ret = copy_from_iter(&resp, sizeof(resp), from);
386         if (ret != sizeof(resp))
387                 return -EINVAL;
388
389         if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
390                 return -EINVAL;
391
392         spin_lock(&dev->msg_lock);
393         msg = vduse_find_msg(&dev->recv_list, resp.request_id);
394         if (!msg) {
395                 ret = -ENOENT;
396                 goto unlock;
397         }
398
399         memcpy(&msg->resp, &resp, sizeof(resp));
400         msg->completed = 1;
401         wake_up(&msg->waitq);
402 unlock:
403         spin_unlock(&dev->msg_lock);
404
405         return ret;
406 }
407
408 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
409 {
410         struct vduse_dev *dev = file->private_data;
411         __poll_t mask = 0;
412
413         poll_wait(file, &dev->waitq, wait);
414
415         spin_lock(&dev->msg_lock);
416
417         if (unlikely(dev->broken))
418                 mask |= EPOLLERR;
419         if (!list_empty(&dev->send_list))
420                 mask |= EPOLLIN | EPOLLRDNORM;
421         if (!list_empty(&dev->recv_list))
422                 mask |= EPOLLOUT | EPOLLWRNORM;
423
424         spin_unlock(&dev->msg_lock);
425
426         return mask;
427 }
428
429 static void vduse_dev_reset(struct vduse_dev *dev)
430 {
431         int i;
432         struct vduse_iova_domain *domain = dev->domain;
433
434         /* The coherent mappings are handled in vduse_dev_free_coherent() */
435         if (domain && domain->bounce_map)
436                 vduse_domain_reset_bounce_map(domain);
437
438         down_write(&dev->rwsem);
439
440         dev->status = 0;
441         dev->driver_features = 0;
442         dev->generation++;
443         spin_lock(&dev->irq_lock);
444         dev->config_cb.callback = NULL;
445         dev->config_cb.private = NULL;
446         spin_unlock(&dev->irq_lock);
447         flush_work(&dev->inject);
448
449         for (i = 0; i < dev->vq_num; i++) {
450                 struct vduse_virtqueue *vq = dev->vqs[i];
451
452                 vq->ready = false;
453                 vq->desc_addr = 0;
454                 vq->driver_addr = 0;
455                 vq->device_addr = 0;
456                 vq->num = 0;
457                 memset(&vq->state, 0, sizeof(vq->state));
458
459                 spin_lock(&vq->kick_lock);
460                 vq->kicked = false;
461                 if (vq->kickfd)
462                         eventfd_ctx_put(vq->kickfd);
463                 vq->kickfd = NULL;
464                 spin_unlock(&vq->kick_lock);
465
466                 spin_lock(&vq->irq_lock);
467                 vq->cb.callback = NULL;
468                 vq->cb.private = NULL;
469                 vq->cb.trigger = NULL;
470                 spin_unlock(&vq->irq_lock);
471                 flush_work(&vq->inject);
472                 flush_work(&vq->kick);
473         }
474
475         up_write(&dev->rwsem);
476 }
477
478 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
479                                 u64 desc_area, u64 driver_area,
480                                 u64 device_area)
481 {
482         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
483         struct vduse_virtqueue *vq = dev->vqs[idx];
484
485         vq->desc_addr = desc_area;
486         vq->driver_addr = driver_area;
487         vq->device_addr = device_area;
488
489         return 0;
490 }
491
492 static void vduse_vq_kick(struct vduse_virtqueue *vq)
493 {
494         spin_lock(&vq->kick_lock);
495         if (!vq->ready)
496                 goto unlock;
497
498         if (vq->kickfd)
499                 eventfd_signal(vq->kickfd);
500         else
501                 vq->kicked = true;
502 unlock:
503         spin_unlock(&vq->kick_lock);
504 }
505
506 static void vduse_vq_kick_work(struct work_struct *work)
507 {
508         struct vduse_virtqueue *vq = container_of(work,
509                                         struct vduse_virtqueue, kick);
510
511         vduse_vq_kick(vq);
512 }
513
514 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
515 {
516         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
517         struct vduse_virtqueue *vq = dev->vqs[idx];
518
519         if (!eventfd_signal_allowed()) {
520                 schedule_work(&vq->kick);
521                 return;
522         }
523         vduse_vq_kick(vq);
524 }
525
526 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
527                               struct vdpa_callback *cb)
528 {
529         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
530         struct vduse_virtqueue *vq = dev->vqs[idx];
531
532         spin_lock(&vq->irq_lock);
533         vq->cb.callback = cb->callback;
534         vq->cb.private = cb->private;
535         vq->cb.trigger = cb->trigger;
536         spin_unlock(&vq->irq_lock);
537 }
538
539 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
540 {
541         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
542         struct vduse_virtqueue *vq = dev->vqs[idx];
543
544         vq->num = num;
545 }
546
547 static u16 vduse_vdpa_get_vq_size(struct vdpa_device *vdpa, u16 idx)
548 {
549         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
550         struct vduse_virtqueue *vq = dev->vqs[idx];
551
552         if (vq->num)
553                 return vq->num;
554         else
555                 return vq->num_max;
556 }
557
558 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
559                                         u16 idx, bool ready)
560 {
561         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
562         struct vduse_virtqueue *vq = dev->vqs[idx];
563
564         vq->ready = ready;
565 }
566
567 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
568 {
569         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
570         struct vduse_virtqueue *vq = dev->vqs[idx];
571
572         return vq->ready;
573 }
574
575 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
576                                 const struct vdpa_vq_state *state)
577 {
578         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
579         struct vduse_virtqueue *vq = dev->vqs[idx];
580
581         if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
582                 vq->state.packed.last_avail_counter =
583                                 state->packed.last_avail_counter;
584                 vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
585                 vq->state.packed.last_used_counter =
586                                 state->packed.last_used_counter;
587                 vq->state.packed.last_used_idx = state->packed.last_used_idx;
588         } else
589                 vq->state.split.avail_index = state->split.avail_index;
590
591         return 0;
592 }
593
594 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
595                                 struct vdpa_vq_state *state)
596 {
597         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
598         struct vduse_virtqueue *vq = dev->vqs[idx];
599
600         if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
601                 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
602
603         return vduse_dev_get_vq_state_split(dev, vq, &state->split);
604 }
605
606 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
607 {
608         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
609
610         return dev->vq_align;
611 }
612
613 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
614 {
615         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
616
617         return dev->device_features;
618 }
619
620 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
621 {
622         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
623
624         dev->driver_features = features;
625         return 0;
626 }
627
628 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
629 {
630         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
631
632         return dev->driver_features;
633 }
634
635 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
636                                   struct vdpa_callback *cb)
637 {
638         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
639
640         spin_lock(&dev->irq_lock);
641         dev->config_cb.callback = cb->callback;
642         dev->config_cb.private = cb->private;
643         spin_unlock(&dev->irq_lock);
644 }
645
646 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
647 {
648         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
649         u16 num_max = 0;
650         int i;
651
652         for (i = 0; i < dev->vq_num; i++)
653                 if (num_max < dev->vqs[i]->num_max)
654                         num_max = dev->vqs[i]->num_max;
655
656         return num_max;
657 }
658
659 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
660 {
661         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
662
663         return dev->device_id;
664 }
665
666 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
667 {
668         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
669
670         return dev->vendor_id;
671 }
672
673 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
674 {
675         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
676
677         return dev->status;
678 }
679
680 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
681 {
682         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
683
684         if (vduse_dev_set_status(dev, status))
685                 return;
686
687         dev->status = status;
688 }
689
690 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
691 {
692         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
693
694         return dev->config_size;
695 }
696
697 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
698                                   void *buf, unsigned int len)
699 {
700         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
701
702         /* Initialize the buffer in case of partial copy. */
703         memset(buf, 0, len);
704
705         if (offset > dev->config_size)
706                 return;
707
708         if (len > dev->config_size - offset)
709                 len = dev->config_size - offset;
710
711         memcpy(buf, dev->config + offset, len);
712 }
713
714 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
715                         const void *buf, unsigned int len)
716 {
717         /* Now we only support read-only configuration space */
718 }
719
720 static int vduse_vdpa_reset(struct vdpa_device *vdpa)
721 {
722         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
723         int ret = vduse_dev_set_status(dev, 0);
724
725         vduse_dev_reset(dev);
726
727         return ret;
728 }
729
730 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
731 {
732         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
733
734         return dev->generation;
735 }
736
737 static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
738                                       const struct cpumask *cpu_mask)
739 {
740         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
741
742         if (cpu_mask)
743                 cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
744         else
745                 cpumask_setall(&dev->vqs[idx]->irq_affinity);
746
747         return 0;
748 }
749
750 static const struct cpumask *
751 vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx)
752 {
753         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
754
755         return &dev->vqs[idx]->irq_affinity;
756 }
757
758 static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
759                                 unsigned int asid,
760                                 struct vhost_iotlb *iotlb)
761 {
762         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
763         int ret;
764
765         ret = vduse_domain_set_map(dev->domain, iotlb);
766         if (ret)
767                 return ret;
768
769         ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
770         if (ret) {
771                 vduse_domain_clear_map(dev->domain, iotlb);
772                 return ret;
773         }
774
775         return 0;
776 }
777
778 static void vduse_vdpa_free(struct vdpa_device *vdpa)
779 {
780         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
781
782         dev->vdev = NULL;
783 }
784
785 static const struct vdpa_config_ops vduse_vdpa_config_ops = {
786         .set_vq_address         = vduse_vdpa_set_vq_address,
787         .kick_vq                = vduse_vdpa_kick_vq,
788         .set_vq_cb              = vduse_vdpa_set_vq_cb,
789         .set_vq_num             = vduse_vdpa_set_vq_num,
790         .get_vq_size            = vduse_vdpa_get_vq_size,
791         .set_vq_ready           = vduse_vdpa_set_vq_ready,
792         .get_vq_ready           = vduse_vdpa_get_vq_ready,
793         .set_vq_state           = vduse_vdpa_set_vq_state,
794         .get_vq_state           = vduse_vdpa_get_vq_state,
795         .get_vq_align           = vduse_vdpa_get_vq_align,
796         .get_device_features    = vduse_vdpa_get_device_features,
797         .set_driver_features    = vduse_vdpa_set_driver_features,
798         .get_driver_features    = vduse_vdpa_get_driver_features,
799         .set_config_cb          = vduse_vdpa_set_config_cb,
800         .get_vq_num_max         = vduse_vdpa_get_vq_num_max,
801         .get_device_id          = vduse_vdpa_get_device_id,
802         .get_vendor_id          = vduse_vdpa_get_vendor_id,
803         .get_status             = vduse_vdpa_get_status,
804         .set_status             = vduse_vdpa_set_status,
805         .get_config_size        = vduse_vdpa_get_config_size,
806         .get_config             = vduse_vdpa_get_config,
807         .set_config             = vduse_vdpa_set_config,
808         .get_generation         = vduse_vdpa_get_generation,
809         .set_vq_affinity        = vduse_vdpa_set_vq_affinity,
810         .get_vq_affinity        = vduse_vdpa_get_vq_affinity,
811         .reset                  = vduse_vdpa_reset,
812         .set_map                = vduse_vdpa_set_map,
813         .free                   = vduse_vdpa_free,
814 };
815
816 static void vduse_dev_sync_single_for_device(struct device *dev,
817                                              dma_addr_t dma_addr, size_t size,
818                                              enum dma_data_direction dir)
819 {
820         struct vduse_dev *vdev = dev_to_vduse(dev);
821         struct vduse_iova_domain *domain = vdev->domain;
822
823         vduse_domain_sync_single_for_device(domain, dma_addr, size, dir);
824 }
825
826 static void vduse_dev_sync_single_for_cpu(struct device *dev,
827                                              dma_addr_t dma_addr, size_t size,
828                                              enum dma_data_direction dir)
829 {
830         struct vduse_dev *vdev = dev_to_vduse(dev);
831         struct vduse_iova_domain *domain = vdev->domain;
832
833         vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir);
834 }
835
836 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
837                                      unsigned long offset, size_t size,
838                                      enum dma_data_direction dir,
839                                      unsigned long attrs)
840 {
841         struct vduse_dev *vdev = dev_to_vduse(dev);
842         struct vduse_iova_domain *domain = vdev->domain;
843
844         return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
845 }
846
847 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
848                                 size_t size, enum dma_data_direction dir,
849                                 unsigned long attrs)
850 {
851         struct vduse_dev *vdev = dev_to_vduse(dev);
852         struct vduse_iova_domain *domain = vdev->domain;
853
854         return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
855 }
856
857 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
858                                         dma_addr_t *dma_addr, gfp_t flag,
859                                         unsigned long attrs)
860 {
861         struct vduse_dev *vdev = dev_to_vduse(dev);
862         struct vduse_iova_domain *domain = vdev->domain;
863         unsigned long iova;
864         void *addr;
865
866         *dma_addr = DMA_MAPPING_ERROR;
867         addr = vduse_domain_alloc_coherent(domain, size,
868                                 (dma_addr_t *)&iova, flag, attrs);
869         if (!addr)
870                 return NULL;
871
872         *dma_addr = (dma_addr_t)iova;
873
874         return addr;
875 }
876
877 static void vduse_dev_free_coherent(struct device *dev, size_t size,
878                                         void *vaddr, dma_addr_t dma_addr,
879                                         unsigned long attrs)
880 {
881         struct vduse_dev *vdev = dev_to_vduse(dev);
882         struct vduse_iova_domain *domain = vdev->domain;
883
884         vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
885 }
886
887 static size_t vduse_dev_max_mapping_size(struct device *dev)
888 {
889         struct vduse_dev *vdev = dev_to_vduse(dev);
890         struct vduse_iova_domain *domain = vdev->domain;
891
892         return domain->bounce_size;
893 }
894
895 static const struct dma_map_ops vduse_dev_dma_ops = {
896         .sync_single_for_device = vduse_dev_sync_single_for_device,
897         .sync_single_for_cpu = vduse_dev_sync_single_for_cpu,
898         .map_page = vduse_dev_map_page,
899         .unmap_page = vduse_dev_unmap_page,
900         .alloc = vduse_dev_alloc_coherent,
901         .free = vduse_dev_free_coherent,
902         .max_mapping_size = vduse_dev_max_mapping_size,
903 };
904
905 static unsigned int perm_to_file_flags(u8 perm)
906 {
907         unsigned int flags = 0;
908
909         switch (perm) {
910         case VDUSE_ACCESS_WO:
911                 flags |= O_WRONLY;
912                 break;
913         case VDUSE_ACCESS_RO:
914                 flags |= O_RDONLY;
915                 break;
916         case VDUSE_ACCESS_RW:
917                 flags |= O_RDWR;
918                 break;
919         default:
920                 WARN(1, "invalidate vhost IOTLB permission\n");
921                 break;
922         }
923
924         return flags;
925 }
926
927 static int vduse_kickfd_setup(struct vduse_dev *dev,
928                         struct vduse_vq_eventfd *eventfd)
929 {
930         struct eventfd_ctx *ctx = NULL;
931         struct vduse_virtqueue *vq;
932         u32 index;
933
934         if (eventfd->index >= dev->vq_num)
935                 return -EINVAL;
936
937         index = array_index_nospec(eventfd->index, dev->vq_num);
938         vq = dev->vqs[index];
939         if (eventfd->fd >= 0) {
940                 ctx = eventfd_ctx_fdget(eventfd->fd);
941                 if (IS_ERR(ctx))
942                         return PTR_ERR(ctx);
943         } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
944                 return 0;
945
946         spin_lock(&vq->kick_lock);
947         if (vq->kickfd)
948                 eventfd_ctx_put(vq->kickfd);
949         vq->kickfd = ctx;
950         if (vq->ready && vq->kicked && vq->kickfd) {
951                 eventfd_signal(vq->kickfd);
952                 vq->kicked = false;
953         }
954         spin_unlock(&vq->kick_lock);
955
956         return 0;
957 }
958
959 static bool vduse_dev_is_ready(struct vduse_dev *dev)
960 {
961         int i;
962
963         for (i = 0; i < dev->vq_num; i++)
964                 if (!dev->vqs[i]->num_max)
965                         return false;
966
967         return true;
968 }
969
970 static void vduse_dev_irq_inject(struct work_struct *work)
971 {
972         struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
973
974         spin_lock_bh(&dev->irq_lock);
975         if (dev->config_cb.callback)
976                 dev->config_cb.callback(dev->config_cb.private);
977         spin_unlock_bh(&dev->irq_lock);
978 }
979
980 static void vduse_vq_irq_inject(struct work_struct *work)
981 {
982         struct vduse_virtqueue *vq = container_of(work,
983                                         struct vduse_virtqueue, inject);
984
985         spin_lock_bh(&vq->irq_lock);
986         if (vq->ready && vq->cb.callback)
987                 vq->cb.callback(vq->cb.private);
988         spin_unlock_bh(&vq->irq_lock);
989 }
990
991 static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
992 {
993         bool signal = false;
994
995         if (!vq->cb.trigger)
996                 return false;
997
998         spin_lock_irq(&vq->irq_lock);
999         if (vq->ready && vq->cb.trigger) {
1000                 eventfd_signal(vq->cb.trigger);
1001                 signal = true;
1002         }
1003         spin_unlock_irq(&vq->irq_lock);
1004
1005         return signal;
1006 }
1007
1008 static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
1009                                     struct work_struct *irq_work,
1010                                     int irq_effective_cpu)
1011 {
1012         int ret = -EINVAL;
1013
1014         down_read(&dev->rwsem);
1015         if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1016                 goto unlock;
1017
1018         ret = 0;
1019         if (irq_effective_cpu == IRQ_UNBOUND)
1020                 queue_work(vduse_irq_wq, irq_work);
1021         else
1022                 queue_work_on(irq_effective_cpu,
1023                               vduse_irq_bound_wq, irq_work);
1024 unlock:
1025         up_read(&dev->rwsem);
1026
1027         return ret;
1028 }
1029
1030 static int vduse_dev_dereg_umem(struct vduse_dev *dev,
1031                                 u64 iova, u64 size)
1032 {
1033         int ret;
1034
1035         mutex_lock(&dev->mem_lock);
1036         ret = -ENOENT;
1037         if (!dev->umem)
1038                 goto unlock;
1039
1040         ret = -EINVAL;
1041         if (!dev->domain)
1042                 goto unlock;
1043
1044         if (dev->umem->iova != iova || size != dev->domain->bounce_size)
1045                 goto unlock;
1046
1047         vduse_domain_remove_user_bounce_pages(dev->domain);
1048         unpin_user_pages_dirty_lock(dev->umem->pages,
1049                                     dev->umem->npages, true);
1050         atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
1051         mmdrop(dev->umem->mm);
1052         vfree(dev->umem->pages);
1053         kfree(dev->umem);
1054         dev->umem = NULL;
1055         ret = 0;
1056 unlock:
1057         mutex_unlock(&dev->mem_lock);
1058         return ret;
1059 }
1060
1061 static int vduse_dev_reg_umem(struct vduse_dev *dev,
1062                               u64 iova, u64 uaddr, u64 size)
1063 {
1064         struct page **page_list = NULL;
1065         struct vduse_umem *umem = NULL;
1066         long pinned = 0;
1067         unsigned long npages, lock_limit;
1068         int ret;
1069
1070         if (!dev->domain || !dev->domain->bounce_map ||
1071             size != dev->domain->bounce_size ||
1072             iova != 0 || uaddr & ~PAGE_MASK)
1073                 return -EINVAL;
1074
1075         mutex_lock(&dev->mem_lock);
1076         ret = -EEXIST;
1077         if (dev->umem)
1078                 goto unlock;
1079
1080         ret = -ENOMEM;
1081         npages = size >> PAGE_SHIFT;
1082         page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
1083                               GFP_KERNEL_ACCOUNT);
1084         umem = kzalloc(sizeof(*umem), GFP_KERNEL);
1085         if (!page_list || !umem)
1086                 goto unlock;
1087
1088         mmap_read_lock(current->mm);
1089
1090         lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
1091         if (npages + atomic64_read(&current->mm->pinned_vm) > lock_limit)
1092                 goto out;
1093
1094         pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
1095                                 page_list);
1096         if (pinned != npages) {
1097                 ret = pinned < 0 ? pinned : -ENOMEM;
1098                 goto out;
1099         }
1100
1101         ret = vduse_domain_add_user_bounce_pages(dev->domain,
1102                                                  page_list, pinned);
1103         if (ret)
1104                 goto out;
1105
1106         atomic64_add(npages, &current->mm->pinned_vm);
1107
1108         umem->pages = page_list;
1109         umem->npages = pinned;
1110         umem->iova = iova;
1111         umem->mm = current->mm;
1112         mmgrab(current->mm);
1113
1114         dev->umem = umem;
1115 out:
1116         if (ret && pinned > 0)
1117                 unpin_user_pages(page_list, pinned);
1118
1119         mmap_read_unlock(current->mm);
1120 unlock:
1121         if (ret) {
1122                 vfree(page_list);
1123                 kfree(umem);
1124         }
1125         mutex_unlock(&dev->mem_lock);
1126         return ret;
1127 }
1128
1129 static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
1130 {
1131         int curr_cpu = vq->irq_effective_cpu;
1132
1133         while (true) {
1134                 curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
1135                 if (cpu_online(curr_cpu))
1136                         break;
1137
1138                 if (curr_cpu >= nr_cpu_ids)
1139                         curr_cpu = IRQ_UNBOUND;
1140         }
1141
1142         vq->irq_effective_cpu = curr_cpu;
1143 }
1144
1145 static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
1146                             unsigned long arg)
1147 {
1148         struct vduse_dev *dev = file->private_data;
1149         void __user *argp = (void __user *)arg;
1150         int ret;
1151
1152         if (unlikely(dev->broken))
1153                 return -EPERM;
1154
1155         switch (cmd) {
1156         case VDUSE_IOTLB_GET_FD: {
1157                 struct vduse_iotlb_entry entry;
1158                 struct vhost_iotlb_map *map;
1159                 struct vdpa_map_file *map_file;
1160                 struct file *f = NULL;
1161
1162                 ret = -EFAULT;
1163                 if (copy_from_user(&entry, argp, sizeof(entry)))
1164                         break;
1165
1166                 ret = -EINVAL;
1167                 if (entry.start > entry.last)
1168                         break;
1169
1170                 mutex_lock(&dev->domain_lock);
1171                 if (!dev->domain) {
1172                         mutex_unlock(&dev->domain_lock);
1173                         break;
1174                 }
1175                 spin_lock(&dev->domain->iotlb_lock);
1176                 map = vhost_iotlb_itree_first(dev->domain->iotlb,
1177                                               entry.start, entry.last);
1178                 if (map) {
1179                         map_file = (struct vdpa_map_file *)map->opaque;
1180                         f = get_file(map_file->file);
1181                         entry.offset = map_file->offset;
1182                         entry.start = map->start;
1183                         entry.last = map->last;
1184                         entry.perm = map->perm;
1185                 }
1186                 spin_unlock(&dev->domain->iotlb_lock);
1187                 mutex_unlock(&dev->domain_lock);
1188                 ret = -EINVAL;
1189                 if (!f)
1190                         break;
1191
1192                 ret = -EFAULT;
1193                 if (copy_to_user(argp, &entry, sizeof(entry))) {
1194                         fput(f);
1195                         break;
1196                 }
1197                 ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm));
1198                 fput(f);
1199                 break;
1200         }
1201         case VDUSE_DEV_GET_FEATURES:
1202                 /*
1203                  * Just mirror what driver wrote here.
1204                  * The driver is expected to check FEATURE_OK later.
1205                  */
1206                 ret = put_user(dev->driver_features, (u64 __user *)argp);
1207                 break;
1208         case VDUSE_DEV_SET_CONFIG: {
1209                 struct vduse_config_data config;
1210                 unsigned long size = offsetof(struct vduse_config_data,
1211                                               buffer);
1212
1213                 ret = -EFAULT;
1214                 if (copy_from_user(&config, argp, size))
1215                         break;
1216
1217                 ret = -EINVAL;
1218                 if (config.offset > dev->config_size ||
1219                     config.length == 0 ||
1220                     config.length > dev->config_size - config.offset)
1221                         break;
1222
1223                 ret = -EFAULT;
1224                 if (copy_from_user(dev->config + config.offset, argp + size,
1225                                    config.length))
1226                         break;
1227
1228                 ret = 0;
1229                 break;
1230         }
1231         case VDUSE_DEV_INJECT_CONFIG_IRQ:
1232                 ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
1233                 break;
1234         case VDUSE_VQ_SETUP: {
1235                 struct vduse_vq_config config;
1236                 u32 index;
1237
1238                 ret = -EFAULT;
1239                 if (copy_from_user(&config, argp, sizeof(config)))
1240                         break;
1241
1242                 ret = -EINVAL;
1243                 if (config.index >= dev->vq_num)
1244                         break;
1245
1246                 if (!is_mem_zero((const char *)config.reserved,
1247                                  sizeof(config.reserved)))
1248                         break;
1249
1250                 index = array_index_nospec(config.index, dev->vq_num);
1251                 dev->vqs[index]->num_max = config.max_size;
1252                 ret = 0;
1253                 break;
1254         }
1255         case VDUSE_VQ_GET_INFO: {
1256                 struct vduse_vq_info vq_info;
1257                 struct vduse_virtqueue *vq;
1258                 u32 index;
1259
1260                 ret = -EFAULT;
1261                 if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1262                         break;
1263
1264                 ret = -EINVAL;
1265                 if (vq_info.index >= dev->vq_num)
1266                         break;
1267
1268                 index = array_index_nospec(vq_info.index, dev->vq_num);
1269                 vq = dev->vqs[index];
1270                 vq_info.desc_addr = vq->desc_addr;
1271                 vq_info.driver_addr = vq->driver_addr;
1272                 vq_info.device_addr = vq->device_addr;
1273                 vq_info.num = vq->num;
1274
1275                 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1276                         vq_info.packed.last_avail_counter =
1277                                 vq->state.packed.last_avail_counter;
1278                         vq_info.packed.last_avail_idx =
1279                                 vq->state.packed.last_avail_idx;
1280                         vq_info.packed.last_used_counter =
1281                                 vq->state.packed.last_used_counter;
1282                         vq_info.packed.last_used_idx =
1283                                 vq->state.packed.last_used_idx;
1284                 } else
1285                         vq_info.split.avail_index =
1286                                 vq->state.split.avail_index;
1287
1288                 vq_info.ready = vq->ready;
1289
1290                 ret = -EFAULT;
1291                 if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1292                         break;
1293
1294                 ret = 0;
1295                 break;
1296         }
1297         case VDUSE_VQ_SETUP_KICKFD: {
1298                 struct vduse_vq_eventfd eventfd;
1299
1300                 ret = -EFAULT;
1301                 if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1302                         break;
1303
1304                 ret = vduse_kickfd_setup(dev, &eventfd);
1305                 break;
1306         }
1307         case VDUSE_VQ_INJECT_IRQ: {
1308                 u32 index;
1309
1310                 ret = -EFAULT;
1311                 if (get_user(index, (u32 __user *)argp))
1312                         break;
1313
1314                 ret = -EINVAL;
1315                 if (index >= dev->vq_num)
1316                         break;
1317
1318                 ret = 0;
1319                 index = array_index_nospec(index, dev->vq_num);
1320                 if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
1321                         vduse_vq_update_effective_cpu(dev->vqs[index]);
1322                         ret = vduse_dev_queue_irq_work(dev,
1323                                                 &dev->vqs[index]->inject,
1324                                                 dev->vqs[index]->irq_effective_cpu);
1325                 }
1326                 break;
1327         }
1328         case VDUSE_IOTLB_REG_UMEM: {
1329                 struct vduse_iova_umem umem;
1330
1331                 ret = -EFAULT;
1332                 if (copy_from_user(&umem, argp, sizeof(umem)))
1333                         break;
1334
1335                 ret = -EINVAL;
1336                 if (!is_mem_zero((const char *)umem.reserved,
1337                                  sizeof(umem.reserved)))
1338                         break;
1339
1340                 mutex_lock(&dev->domain_lock);
1341                 ret = vduse_dev_reg_umem(dev, umem.iova,
1342                                          umem.uaddr, umem.size);
1343                 mutex_unlock(&dev->domain_lock);
1344                 break;
1345         }
1346         case VDUSE_IOTLB_DEREG_UMEM: {
1347                 struct vduse_iova_umem umem;
1348
1349                 ret = -EFAULT;
1350                 if (copy_from_user(&umem, argp, sizeof(umem)))
1351                         break;
1352
1353                 ret = -EINVAL;
1354                 if (!is_mem_zero((const char *)umem.reserved,
1355                                  sizeof(umem.reserved)))
1356                         break;
1357                 mutex_lock(&dev->domain_lock);
1358                 ret = vduse_dev_dereg_umem(dev, umem.iova,
1359                                            umem.size);
1360                 mutex_unlock(&dev->domain_lock);
1361                 break;
1362         }
1363         case VDUSE_IOTLB_GET_INFO: {
1364                 struct vduse_iova_info info;
1365                 struct vhost_iotlb_map *map;
1366
1367                 ret = -EFAULT;
1368                 if (copy_from_user(&info, argp, sizeof(info)))
1369                         break;
1370
1371                 ret = -EINVAL;
1372                 if (info.start > info.last)
1373                         break;
1374
1375                 if (!is_mem_zero((const char *)info.reserved,
1376                                  sizeof(info.reserved)))
1377                         break;
1378
1379                 mutex_lock(&dev->domain_lock);
1380                 if (!dev->domain) {
1381                         mutex_unlock(&dev->domain_lock);
1382                         break;
1383                 }
1384                 spin_lock(&dev->domain->iotlb_lock);
1385                 map = vhost_iotlb_itree_first(dev->domain->iotlb,
1386                                               info.start, info.last);
1387                 if (map) {
1388                         info.start = map->start;
1389                         info.last = map->last;
1390                         info.capability = 0;
1391                         if (dev->domain->bounce_map && map->start == 0 &&
1392                             map->last == dev->domain->bounce_size - 1)
1393                                 info.capability |= VDUSE_IOVA_CAP_UMEM;
1394                 }
1395                 spin_unlock(&dev->domain->iotlb_lock);
1396                 mutex_unlock(&dev->domain_lock);
1397                 if (!map)
1398                         break;
1399
1400                 ret = -EFAULT;
1401                 if (copy_to_user(argp, &info, sizeof(info)))
1402                         break;
1403
1404                 ret = 0;
1405                 break;
1406         }
1407         default:
1408                 ret = -ENOIOCTLCMD;
1409                 break;
1410         }
1411
1412         return ret;
1413 }
1414
1415 static int vduse_dev_release(struct inode *inode, struct file *file)
1416 {
1417         struct vduse_dev *dev = file->private_data;
1418
1419         mutex_lock(&dev->domain_lock);
1420         if (dev->domain)
1421                 vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
1422         mutex_unlock(&dev->domain_lock);
1423         spin_lock(&dev->msg_lock);
1424         /* Make sure the inflight messages can processed after reconncection */
1425         list_splice_init(&dev->recv_list, &dev->send_list);
1426         spin_unlock(&dev->msg_lock);
1427         dev->connected = false;
1428
1429         return 0;
1430 }
1431
1432 static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1433 {
1434         struct vduse_dev *dev;
1435
1436         mutex_lock(&vduse_lock);
1437         dev = idr_find(&vduse_idr, minor);
1438         mutex_unlock(&vduse_lock);
1439
1440         return dev;
1441 }
1442
1443 static int vduse_dev_open(struct inode *inode, struct file *file)
1444 {
1445         int ret;
1446         struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1447
1448         if (!dev)
1449                 return -ENODEV;
1450
1451         ret = -EBUSY;
1452         mutex_lock(&dev->lock);
1453         if (dev->connected)
1454                 goto unlock;
1455
1456         ret = 0;
1457         dev->connected = true;
1458         file->private_data = dev;
1459 unlock:
1460         mutex_unlock(&dev->lock);
1461
1462         return ret;
1463 }
1464
1465 static const struct file_operations vduse_dev_fops = {
1466         .owner          = THIS_MODULE,
1467         .open           = vduse_dev_open,
1468         .release        = vduse_dev_release,
1469         .read_iter      = vduse_dev_read_iter,
1470         .write_iter     = vduse_dev_write_iter,
1471         .poll           = vduse_dev_poll,
1472         .unlocked_ioctl = vduse_dev_ioctl,
1473         .compat_ioctl   = compat_ptr_ioctl,
1474         .llseek         = noop_llseek,
1475 };
1476
1477 static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf)
1478 {
1479         return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity));
1480 }
1481
1482 static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq,
1483                                      const char *buf, size_t count)
1484 {
1485         cpumask_var_t new_value;
1486         int ret;
1487
1488         if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
1489                 return -ENOMEM;
1490
1491         ret = cpumask_parse(buf, new_value);
1492         if (ret)
1493                 goto free_mask;
1494
1495         ret = -EINVAL;
1496         if (!cpumask_intersects(new_value, cpu_online_mask))
1497                 goto free_mask;
1498
1499         cpumask_copy(&vq->irq_affinity, new_value);
1500         ret = count;
1501 free_mask:
1502         free_cpumask_var(new_value);
1503         return ret;
1504 }
1505
1506 struct vq_sysfs_entry {
1507         struct attribute attr;
1508         ssize_t (*show)(struct vduse_virtqueue *vq, char *buf);
1509         ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf,
1510                          size_t count);
1511 };
1512
1513 static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity);
1514
1515 static struct attribute *vq_attrs[] = {
1516         &irq_cb_affinity_attr.attr,
1517         NULL,
1518 };
1519 ATTRIBUTE_GROUPS(vq);
1520
1521 static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr,
1522                             char *buf)
1523 {
1524         struct vduse_virtqueue *vq = container_of(kobj,
1525                                         struct vduse_virtqueue, kobj);
1526         struct vq_sysfs_entry *entry = container_of(attr,
1527                                         struct vq_sysfs_entry, attr);
1528
1529         if (!entry->show)
1530                 return -EIO;
1531
1532         return entry->show(vq, buf);
1533 }
1534
1535 static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr,
1536                              const char *buf, size_t count)
1537 {
1538         struct vduse_virtqueue *vq = container_of(kobj,
1539                                         struct vduse_virtqueue, kobj);
1540         struct vq_sysfs_entry *entry = container_of(attr,
1541                                         struct vq_sysfs_entry, attr);
1542
1543         if (!entry->store)
1544                 return -EIO;
1545
1546         return entry->store(vq, buf, count);
1547 }
1548
1549 static const struct sysfs_ops vq_sysfs_ops = {
1550         .show = vq_attr_show,
1551         .store = vq_attr_store,
1552 };
1553
1554 static void vq_release(struct kobject *kobj)
1555 {
1556         struct vduse_virtqueue *vq = container_of(kobj,
1557                                         struct vduse_virtqueue, kobj);
1558         kfree(vq);
1559 }
1560
1561 static const struct kobj_type vq_type = {
1562         .release        = vq_release,
1563         .sysfs_ops      = &vq_sysfs_ops,
1564         .default_groups = vq_groups,
1565 };
1566
1567 static char *vduse_devnode(const struct device *dev, umode_t *mode)
1568 {
1569         return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1570 }
1571
1572 static const struct class vduse_class = {
1573         .name = "vduse",
1574         .devnode = vduse_devnode,
1575 };
1576
1577 static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
1578 {
1579         int i;
1580
1581         if (!dev->vqs)
1582                 return;
1583
1584         for (i = 0; i < dev->vq_num; i++)
1585                 kobject_put(&dev->vqs[i]->kobj);
1586         kfree(dev->vqs);
1587 }
1588
1589 static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
1590 {
1591         int ret, i;
1592
1593         dev->vq_align = vq_align;
1594         dev->vq_num = vq_num;
1595         dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1596         if (!dev->vqs)
1597                 return -ENOMEM;
1598
1599         for (i = 0; i < vq_num; i++) {
1600                 dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
1601                 if (!dev->vqs[i]) {
1602                         ret = -ENOMEM;
1603                         goto err;
1604                 }
1605
1606                 dev->vqs[i]->index = i;
1607                 dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
1608                 INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
1609                 INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
1610                 spin_lock_init(&dev->vqs[i]->kick_lock);
1611                 spin_lock_init(&dev->vqs[i]->irq_lock);
1612                 cpumask_setall(&dev->vqs[i]->irq_affinity);
1613
1614                 kobject_init(&dev->vqs[i]->kobj, &vq_type);
1615                 ret = kobject_add(&dev->vqs[i]->kobj,
1616                                   &dev->dev->kobj, "vq%d", i);
1617                 if (ret) {
1618                         kfree(dev->vqs[i]);
1619                         goto err;
1620                 }
1621         }
1622
1623         return 0;
1624 err:
1625         while (i--)
1626                 kobject_put(&dev->vqs[i]->kobj);
1627         kfree(dev->vqs);
1628         dev->vqs = NULL;
1629         return ret;
1630 }
1631
1632 static struct vduse_dev *vduse_dev_create(void)
1633 {
1634         struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1635
1636         if (!dev)
1637                 return NULL;
1638
1639         mutex_init(&dev->lock);
1640         mutex_init(&dev->mem_lock);
1641         mutex_init(&dev->domain_lock);
1642         spin_lock_init(&dev->msg_lock);
1643         INIT_LIST_HEAD(&dev->send_list);
1644         INIT_LIST_HEAD(&dev->recv_list);
1645         spin_lock_init(&dev->irq_lock);
1646         init_rwsem(&dev->rwsem);
1647
1648         INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1649         init_waitqueue_head(&dev->waitq);
1650
1651         return dev;
1652 }
1653
1654 static void vduse_dev_destroy(struct vduse_dev *dev)
1655 {
1656         kfree(dev);
1657 }
1658
1659 static struct vduse_dev *vduse_find_dev(const char *name)
1660 {
1661         struct vduse_dev *dev;
1662         int id;
1663
1664         idr_for_each_entry(&vduse_idr, dev, id)
1665                 if (!strcmp(dev->name, name))
1666                         return dev;
1667
1668         return NULL;
1669 }
1670
1671 static int vduse_destroy_dev(char *name)
1672 {
1673         struct vduse_dev *dev = vduse_find_dev(name);
1674
1675         if (!dev)
1676                 return -EINVAL;
1677
1678         mutex_lock(&dev->lock);
1679         if (dev->vdev || dev->connected) {
1680                 mutex_unlock(&dev->lock);
1681                 return -EBUSY;
1682         }
1683         dev->connected = true;
1684         mutex_unlock(&dev->lock);
1685
1686         vduse_dev_reset(dev);
1687         device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1688         idr_remove(&vduse_idr, dev->minor);
1689         kvfree(dev->config);
1690         vduse_dev_deinit_vqs(dev);
1691         if (dev->domain)
1692                 vduse_domain_destroy(dev->domain);
1693         kfree(dev->name);
1694         vduse_dev_destroy(dev);
1695         module_put(THIS_MODULE);
1696
1697         return 0;
1698 }
1699
1700 static bool device_is_allowed(u32 device_id)
1701 {
1702         int i;
1703
1704         for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1705                 if (allowed_device_id[i] == device_id)
1706                         return true;
1707
1708         return false;
1709 }
1710
1711 static bool features_is_valid(struct vduse_dev_config *config)
1712 {
1713         if (!(config->features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1714                 return false;
1715
1716         /* Now we only support read-only configuration space */
1717         if ((config->device_id == VIRTIO_ID_BLOCK) &&
1718                         (config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE)))
1719                 return false;
1720         else if ((config->device_id == VIRTIO_ID_NET) &&
1721                         (config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1722                 return false;
1723
1724         if ((config->device_id == VIRTIO_ID_NET) &&
1725                         !(config->features & BIT_ULL(VIRTIO_F_VERSION_1)))
1726                 return false;
1727
1728         return true;
1729 }
1730
1731 static bool vduse_validate_config(struct vduse_dev_config *config)
1732 {
1733         if (!is_mem_zero((const char *)config->reserved,
1734                          sizeof(config->reserved)))
1735                 return false;
1736
1737         if (config->vq_align > PAGE_SIZE)
1738                 return false;
1739
1740         if (config->config_size > PAGE_SIZE)
1741                 return false;
1742
1743         if (config->vq_num > 0xffff)
1744                 return false;
1745
1746         if (!config->name[0])
1747                 return false;
1748
1749         if (!device_is_allowed(config->device_id))
1750                 return false;
1751
1752         if (!features_is_valid(config))
1753                 return false;
1754
1755         return true;
1756 }
1757
1758 static ssize_t msg_timeout_show(struct device *device,
1759                                 struct device_attribute *attr, char *buf)
1760 {
1761         struct vduse_dev *dev = dev_get_drvdata(device);
1762
1763         return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1764 }
1765
1766 static ssize_t msg_timeout_store(struct device *device,
1767                                  struct device_attribute *attr,
1768                                  const char *buf, size_t count)
1769 {
1770         struct vduse_dev *dev = dev_get_drvdata(device);
1771         int ret;
1772
1773         ret = kstrtouint(buf, 10, &dev->msg_timeout);
1774         if (ret < 0)
1775                 return ret;
1776
1777         return count;
1778 }
1779
1780 static DEVICE_ATTR_RW(msg_timeout);
1781
1782 static ssize_t bounce_size_show(struct device *device,
1783                                 struct device_attribute *attr, char *buf)
1784 {
1785         struct vduse_dev *dev = dev_get_drvdata(device);
1786
1787         return sysfs_emit(buf, "%u\n", dev->bounce_size);
1788 }
1789
1790 static ssize_t bounce_size_store(struct device *device,
1791                                  struct device_attribute *attr,
1792                                  const char *buf, size_t count)
1793 {
1794         struct vduse_dev *dev = dev_get_drvdata(device);
1795         unsigned int bounce_size;
1796         int ret;
1797
1798         ret = -EPERM;
1799         mutex_lock(&dev->domain_lock);
1800         if (dev->domain)
1801                 goto unlock;
1802
1803         ret = kstrtouint(buf, 10, &bounce_size);
1804         if (ret < 0)
1805                 goto unlock;
1806
1807         ret = -EINVAL;
1808         if (bounce_size > VDUSE_MAX_BOUNCE_SIZE ||
1809             bounce_size < VDUSE_MIN_BOUNCE_SIZE)
1810                 goto unlock;
1811
1812         dev->bounce_size = bounce_size & PAGE_MASK;
1813         ret = count;
1814 unlock:
1815         mutex_unlock(&dev->domain_lock);
1816         return ret;
1817 }
1818
1819 static DEVICE_ATTR_RW(bounce_size);
1820
1821 static struct attribute *vduse_dev_attrs[] = {
1822         &dev_attr_msg_timeout.attr,
1823         &dev_attr_bounce_size.attr,
1824         NULL
1825 };
1826
1827 ATTRIBUTE_GROUPS(vduse_dev);
1828
1829 static int vduse_create_dev(struct vduse_dev_config *config,
1830                             void *config_buf, u64 api_version)
1831 {
1832         int ret;
1833         struct vduse_dev *dev;
1834
1835         ret = -EPERM;
1836         if ((config->device_id == VIRTIO_ID_NET) && !capable(CAP_NET_ADMIN))
1837                 goto err;
1838
1839         ret = -EEXIST;
1840         if (vduse_find_dev(config->name))
1841                 goto err;
1842
1843         ret = -ENOMEM;
1844         dev = vduse_dev_create();
1845         if (!dev)
1846                 goto err;
1847
1848         dev->api_version = api_version;
1849         dev->device_features = config->features;
1850         dev->device_id = config->device_id;
1851         dev->vendor_id = config->vendor_id;
1852         dev->name = kstrdup(config->name, GFP_KERNEL);
1853         if (!dev->name)
1854                 goto err_str;
1855
1856         dev->bounce_size = VDUSE_BOUNCE_SIZE;
1857         dev->config = config_buf;
1858         dev->config_size = config->config_size;
1859
1860         ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1861         if (ret < 0)
1862                 goto err_idr;
1863
1864         dev->minor = ret;
1865         dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1866         dev->dev = device_create_with_groups(&vduse_class, NULL,
1867                                 MKDEV(MAJOR(vduse_major), dev->minor),
1868                                 dev, vduse_dev_groups, "%s", config->name);
1869         if (IS_ERR(dev->dev)) {
1870                 ret = PTR_ERR(dev->dev);
1871                 goto err_dev;
1872         }
1873
1874         ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
1875         if (ret)
1876                 goto err_vqs;
1877
1878         __module_get(THIS_MODULE);
1879
1880         return 0;
1881 err_vqs:
1882         device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1883 err_dev:
1884         idr_remove(&vduse_idr, dev->minor);
1885 err_idr:
1886         kfree(dev->name);
1887 err_str:
1888         vduse_dev_destroy(dev);
1889 err:
1890         return ret;
1891 }
1892
1893 static long vduse_ioctl(struct file *file, unsigned int cmd,
1894                         unsigned long arg)
1895 {
1896         int ret;
1897         void __user *argp = (void __user *)arg;
1898         struct vduse_control *control = file->private_data;
1899
1900         mutex_lock(&vduse_lock);
1901         switch (cmd) {
1902         case VDUSE_GET_API_VERSION:
1903                 ret = put_user(control->api_version, (u64 __user *)argp);
1904                 break;
1905         case VDUSE_SET_API_VERSION: {
1906                 u64 api_version;
1907
1908                 ret = -EFAULT;
1909                 if (get_user(api_version, (u64 __user *)argp))
1910                         break;
1911
1912                 ret = -EINVAL;
1913                 if (api_version > VDUSE_API_VERSION)
1914                         break;
1915
1916                 ret = 0;
1917                 control->api_version = api_version;
1918                 break;
1919         }
1920         case VDUSE_CREATE_DEV: {
1921                 struct vduse_dev_config config;
1922                 unsigned long size = offsetof(struct vduse_dev_config, config);
1923                 void *buf;
1924
1925                 ret = -EFAULT;
1926                 if (copy_from_user(&config, argp, size))
1927                         break;
1928
1929                 ret = -EINVAL;
1930                 if (vduse_validate_config(&config) == false)
1931                         break;
1932
1933                 buf = vmemdup_user(argp + size, config.config_size);
1934                 if (IS_ERR(buf)) {
1935                         ret = PTR_ERR(buf);
1936                         break;
1937                 }
1938                 config.name[VDUSE_NAME_MAX - 1] = '\0';
1939                 ret = vduse_create_dev(&config, buf, control->api_version);
1940                 if (ret)
1941                         kvfree(buf);
1942                 break;
1943         }
1944         case VDUSE_DESTROY_DEV: {
1945                 char name[VDUSE_NAME_MAX];
1946
1947                 ret = -EFAULT;
1948                 if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1949                         break;
1950
1951                 name[VDUSE_NAME_MAX - 1] = '\0';
1952                 ret = vduse_destroy_dev(name);
1953                 break;
1954         }
1955         default:
1956                 ret = -EINVAL;
1957                 break;
1958         }
1959         mutex_unlock(&vduse_lock);
1960
1961         return ret;
1962 }
1963
1964 static int vduse_release(struct inode *inode, struct file *file)
1965 {
1966         struct vduse_control *control = file->private_data;
1967
1968         kfree(control);
1969         return 0;
1970 }
1971
1972 static int vduse_open(struct inode *inode, struct file *file)
1973 {
1974         struct vduse_control *control;
1975
1976         control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1977         if (!control)
1978                 return -ENOMEM;
1979
1980         control->api_version = VDUSE_API_VERSION;
1981         file->private_data = control;
1982
1983         return 0;
1984 }
1985
1986 static const struct file_operations vduse_ctrl_fops = {
1987         .owner          = THIS_MODULE,
1988         .open           = vduse_open,
1989         .release        = vduse_release,
1990         .unlocked_ioctl = vduse_ioctl,
1991         .compat_ioctl   = compat_ptr_ioctl,
1992         .llseek         = noop_llseek,
1993 };
1994
1995 struct vduse_mgmt_dev {
1996         struct vdpa_mgmt_dev mgmt_dev;
1997         struct device dev;
1998 };
1999
2000 static struct vduse_mgmt_dev *vduse_mgmt;
2001
2002 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
2003 {
2004         struct vduse_vdpa *vdev;
2005         int ret;
2006
2007         if (dev->vdev)
2008                 return -EEXIST;
2009
2010         vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
2011                                  &vduse_vdpa_config_ops, 1, 1, name, true);
2012         if (IS_ERR(vdev))
2013                 return PTR_ERR(vdev);
2014
2015         dev->vdev = vdev;
2016         vdev->dev = dev;
2017         vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
2018         ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
2019         if (ret) {
2020                 put_device(&vdev->vdpa.dev);
2021                 return ret;
2022         }
2023         set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
2024         vdev->vdpa.dma_dev = &vdev->vdpa.dev;
2025         vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
2026
2027         return 0;
2028 }
2029
2030 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
2031                         const struct vdpa_dev_set_config *config)
2032 {
2033         struct vduse_dev *dev;
2034         int ret;
2035
2036         mutex_lock(&vduse_lock);
2037         dev = vduse_find_dev(name);
2038         if (!dev || !vduse_dev_is_ready(dev)) {
2039                 mutex_unlock(&vduse_lock);
2040                 return -EINVAL;
2041         }
2042         ret = vduse_dev_init_vdpa(dev, name);
2043         mutex_unlock(&vduse_lock);
2044         if (ret)
2045                 return ret;
2046
2047         mutex_lock(&dev->domain_lock);
2048         if (!dev->domain)
2049                 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
2050                                                   dev->bounce_size);
2051         mutex_unlock(&dev->domain_lock);
2052         if (!dev->domain) {
2053                 put_device(&dev->vdev->vdpa.dev);
2054                 return -ENOMEM;
2055         }
2056
2057         ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
2058         if (ret) {
2059                 put_device(&dev->vdev->vdpa.dev);
2060                 mutex_lock(&dev->domain_lock);
2061                 vduse_domain_destroy(dev->domain);
2062                 dev->domain = NULL;
2063                 mutex_unlock(&dev->domain_lock);
2064                 return ret;
2065         }
2066
2067         return 0;
2068 }
2069
2070 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
2071 {
2072         _vdpa_unregister_device(dev);
2073 }
2074
2075 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
2076         .dev_add = vdpa_dev_add,
2077         .dev_del = vdpa_dev_del,
2078 };
2079
2080 static struct virtio_device_id id_table[] = {
2081         { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
2082         { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2083         { 0 },
2084 };
2085
2086 static void vduse_mgmtdev_release(struct device *dev)
2087 {
2088         struct vduse_mgmt_dev *mgmt_dev;
2089
2090         mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
2091         kfree(mgmt_dev);
2092 }
2093
2094 static int vduse_mgmtdev_init(void)
2095 {
2096         int ret;
2097
2098         vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
2099         if (!vduse_mgmt)
2100                 return -ENOMEM;
2101
2102         ret = dev_set_name(&vduse_mgmt->dev, "vduse");
2103         if (ret) {
2104                 kfree(vduse_mgmt);
2105                 return ret;
2106         }
2107
2108         vduse_mgmt->dev.release = vduse_mgmtdev_release;
2109
2110         ret = device_register(&vduse_mgmt->dev);
2111         if (ret)
2112                 goto dev_reg_err;
2113
2114         vduse_mgmt->mgmt_dev.id_table = id_table;
2115         vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
2116         vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
2117         ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
2118         if (ret)
2119                 device_unregister(&vduse_mgmt->dev);
2120
2121         return ret;
2122
2123 dev_reg_err:
2124         put_device(&vduse_mgmt->dev);
2125         return ret;
2126 }
2127
2128 static void vduse_mgmtdev_exit(void)
2129 {
2130         vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
2131         device_unregister(&vduse_mgmt->dev);
2132 }
2133
2134 static int vduse_init(void)
2135 {
2136         int ret;
2137         struct device *dev;
2138
2139         ret = class_register(&vduse_class);
2140         if (ret)
2141                 return ret;
2142
2143         ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
2144         if (ret)
2145                 goto err_chardev_region;
2146
2147         /* /dev/vduse/control */
2148         cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
2149         vduse_ctrl_cdev.owner = THIS_MODULE;
2150         ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
2151         if (ret)
2152                 goto err_ctrl_cdev;
2153
2154         dev = device_create(&vduse_class, NULL, vduse_major, NULL, "control");
2155         if (IS_ERR(dev)) {
2156                 ret = PTR_ERR(dev);
2157                 goto err_device;
2158         }
2159
2160         /* /dev/vduse/$DEVICE */
2161         cdev_init(&vduse_cdev, &vduse_dev_fops);
2162         vduse_cdev.owner = THIS_MODULE;
2163         ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
2164                        VDUSE_DEV_MAX - 1);
2165         if (ret)
2166                 goto err_cdev;
2167
2168         ret = -ENOMEM;
2169         vduse_irq_wq = alloc_workqueue("vduse-irq",
2170                                 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
2171         if (!vduse_irq_wq)
2172                 goto err_wq;
2173
2174         vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
2175         if (!vduse_irq_bound_wq)
2176                 goto err_bound_wq;
2177
2178         ret = vduse_domain_init();
2179         if (ret)
2180                 goto err_domain;
2181
2182         ret = vduse_mgmtdev_init();
2183         if (ret)
2184                 goto err_mgmtdev;
2185
2186         return 0;
2187 err_mgmtdev:
2188         vduse_domain_exit();
2189 err_domain:
2190         destroy_workqueue(vduse_irq_bound_wq);
2191 err_bound_wq:
2192         destroy_workqueue(vduse_irq_wq);
2193 err_wq:
2194         cdev_del(&vduse_cdev);
2195 err_cdev:
2196         device_destroy(&vduse_class, vduse_major);
2197 err_device:
2198         cdev_del(&vduse_ctrl_cdev);
2199 err_ctrl_cdev:
2200         unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2201 err_chardev_region:
2202         class_unregister(&vduse_class);
2203         return ret;
2204 }
2205 module_init(vduse_init);
2206
2207 static void vduse_exit(void)
2208 {
2209         vduse_mgmtdev_exit();
2210         vduse_domain_exit();
2211         destroy_workqueue(vduse_irq_bound_wq);
2212         destroy_workqueue(vduse_irq_wq);
2213         cdev_del(&vduse_cdev);
2214         device_destroy(&vduse_class, vduse_major);
2215         cdev_del(&vduse_ctrl_cdev);
2216         unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2217         class_unregister(&vduse_class);
2218 }
2219 module_exit(vduse_exit);
2220
2221 MODULE_LICENSE(DRV_LICENSE);
2222 MODULE_AUTHOR(DRV_AUTHOR);
2223 MODULE_DESCRIPTION(DRV_DESC);
This page took 0.160543 seconds and 4 git commands to generate.