1 // SPDX-License-Identifier: GPL-2.0-only
3 * VDUSE: vDPA Device in Userspace
5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
11 #include "linux/virtio_net.h"
12 #include <linux/init.h>
13 #include <linux/module.h>
14 #include <linux/cdev.h>
15 #include <linux/device.h>
16 #include <linux/eventfd.h>
17 #include <linux/slab.h>
18 #include <linux/wait.h>
19 #include <linux/dma-map-ops.h>
20 #include <linux/poll.h>
21 #include <linux/file.h>
22 #include <linux/uio.h>
23 #include <linux/vdpa.h>
24 #include <linux/nospec.h>
25 #include <linux/vmalloc.h>
26 #include <linux/sched/mm.h>
27 #include <uapi/linux/vduse.h>
28 #include <uapi/linux/vdpa.h>
29 #include <uapi/linux/virtio_config.h>
30 #include <uapi/linux/virtio_ids.h>
31 #include <uapi/linux/virtio_blk.h>
32 #include <uapi/linux/virtio_ring.h>
33 #include <linux/mod_devicetable.h>
35 #include "iova_domain.h"
38 #define DRV_DESC "vDPA Device in Userspace"
39 #define DRV_LICENSE "GPL v2"
41 #define VDUSE_DEV_MAX (1U << MINORBITS)
42 #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
43 #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
44 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
45 /* 128 MB reserved for virtqueue creation */
46 #define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024)
47 #define VDUSE_MSG_DEFAULT_TIMEOUT 30
49 #define IRQ_UNBOUND -1
51 struct vduse_virtqueue {
58 struct vdpa_vq_state state;
63 struct eventfd_ctx *kickfd;
64 struct vdpa_callback cb;
65 struct work_struct inject;
66 struct work_struct kick;
67 int irq_effective_cpu;
68 struct cpumask irq_affinity;
75 struct vdpa_device vdpa;
76 struct vduse_dev *dev;
87 struct vduse_vdpa *vdev;
89 struct vduse_virtqueue **vqs;
90 struct vduse_iova_domain *domain;
96 wait_queue_head_t waitq;
97 struct list_head send_list;
98 struct list_head recv_list;
99 struct vdpa_callback config_cb;
100 struct work_struct inject;
102 struct rw_semaphore rwsem;
117 struct vduse_umem *umem;
118 struct mutex mem_lock;
119 unsigned int bounce_size;
120 struct mutex domain_lock;
123 struct vduse_dev_msg {
124 struct vduse_dev_request req;
125 struct vduse_dev_response resp;
126 struct list_head list;
127 wait_queue_head_t waitq;
131 struct vduse_control {
135 static DEFINE_MUTEX(vduse_lock);
136 static DEFINE_IDR(vduse_idr);
138 static dev_t vduse_major;
139 static struct cdev vduse_ctrl_cdev;
140 static struct cdev vduse_cdev;
141 static struct workqueue_struct *vduse_irq_wq;
142 static struct workqueue_struct *vduse_irq_bound_wq;
144 static u32 allowed_device_id[] = {
149 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
151 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
156 static inline struct vduse_dev *dev_to_vduse(struct device *dev)
158 struct vdpa_device *vdpa = dev_to_vdpa(dev);
160 return vdpa_to_vduse(vdpa);
163 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
166 struct vduse_dev_msg *msg;
168 list_for_each_entry(msg, head, list) {
169 if (msg->req.request_id == request_id) {
170 list_del(&msg->list);
178 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
180 struct vduse_dev_msg *msg = NULL;
182 if (!list_empty(head)) {
183 msg = list_first_entry(head, struct vduse_dev_msg, list);
184 list_del(&msg->list);
190 static void vduse_enqueue_msg(struct list_head *head,
191 struct vduse_dev_msg *msg)
193 list_add_tail(&msg->list, head);
196 static void vduse_dev_broken(struct vduse_dev *dev)
198 struct vduse_dev_msg *msg, *tmp;
200 if (unlikely(dev->broken))
203 list_splice_init(&dev->recv_list, &dev->send_list);
204 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
205 list_del(&msg->list);
207 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
208 wake_up(&msg->waitq);
211 wake_up(&dev->waitq);
214 static int vduse_dev_msg_sync(struct vduse_dev *dev,
215 struct vduse_dev_msg *msg)
219 if (unlikely(dev->broken))
222 init_waitqueue_head(&msg->waitq);
223 spin_lock(&dev->msg_lock);
224 if (unlikely(dev->broken)) {
225 spin_unlock(&dev->msg_lock);
228 msg->req.request_id = dev->msg_unique++;
229 vduse_enqueue_msg(&dev->send_list, msg);
230 wake_up(&dev->waitq);
231 spin_unlock(&dev->msg_lock);
232 if (dev->msg_timeout)
233 ret = wait_event_killable_timeout(msg->waitq, msg->completed,
234 (long)dev->msg_timeout * HZ);
236 ret = wait_event_killable(msg->waitq, msg->completed);
238 spin_lock(&dev->msg_lock);
239 if (!msg->completed) {
240 list_del(&msg->list);
241 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
242 /* Mark the device as malfunction when there is a timeout */
244 vduse_dev_broken(dev);
246 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
247 spin_unlock(&dev->msg_lock);
252 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
253 struct vduse_virtqueue *vq,
254 struct vdpa_vq_state_packed *packed)
256 struct vduse_dev_msg msg = { 0 };
259 msg.req.type = VDUSE_GET_VQ_STATE;
260 msg.req.vq_state.index = vq->index;
262 ret = vduse_dev_msg_sync(dev, &msg);
266 packed->last_avail_counter =
267 msg.resp.vq_state.packed.last_avail_counter & 0x0001;
268 packed->last_avail_idx =
269 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
270 packed->last_used_counter =
271 msg.resp.vq_state.packed.last_used_counter & 0x0001;
272 packed->last_used_idx =
273 msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
278 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
279 struct vduse_virtqueue *vq,
280 struct vdpa_vq_state_split *split)
282 struct vduse_dev_msg msg = { 0 };
285 msg.req.type = VDUSE_GET_VQ_STATE;
286 msg.req.vq_state.index = vq->index;
288 ret = vduse_dev_msg_sync(dev, &msg);
292 split->avail_index = msg.resp.vq_state.split.avail_index;
297 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
299 struct vduse_dev_msg msg = { 0 };
301 msg.req.type = VDUSE_SET_STATUS;
302 msg.req.s.status = status;
304 return vduse_dev_msg_sync(dev, &msg);
307 static int vduse_dev_update_iotlb(struct vduse_dev *dev,
310 struct vduse_dev_msg msg = { 0 };
315 msg.req.type = VDUSE_UPDATE_IOTLB;
316 msg.req.iova.start = start;
317 msg.req.iova.last = last;
319 return vduse_dev_msg_sync(dev, &msg);
322 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
324 struct file *file = iocb->ki_filp;
325 struct vduse_dev *dev = file->private_data;
326 struct vduse_dev_msg *msg;
327 int size = sizeof(struct vduse_dev_request);
330 if (iov_iter_count(to) < size)
333 spin_lock(&dev->msg_lock);
335 msg = vduse_dequeue_msg(&dev->send_list);
340 if (file->f_flags & O_NONBLOCK)
343 spin_unlock(&dev->msg_lock);
344 ret = wait_event_interruptible_exclusive(dev->waitq,
345 !list_empty(&dev->send_list));
349 spin_lock(&dev->msg_lock);
351 spin_unlock(&dev->msg_lock);
352 ret = copy_to_iter(&msg->req, size, to);
353 spin_lock(&dev->msg_lock);
356 vduse_enqueue_msg(&dev->send_list, msg);
359 vduse_enqueue_msg(&dev->recv_list, msg);
361 spin_unlock(&dev->msg_lock);
366 static bool is_mem_zero(const char *ptr, int size)
370 for (i = 0; i < size; i++) {
377 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
379 struct file *file = iocb->ki_filp;
380 struct vduse_dev *dev = file->private_data;
381 struct vduse_dev_response resp;
382 struct vduse_dev_msg *msg;
385 ret = copy_from_iter(&resp, sizeof(resp), from);
386 if (ret != sizeof(resp))
389 if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
392 spin_lock(&dev->msg_lock);
393 msg = vduse_find_msg(&dev->recv_list, resp.request_id);
399 memcpy(&msg->resp, &resp, sizeof(resp));
401 wake_up(&msg->waitq);
403 spin_unlock(&dev->msg_lock);
408 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
410 struct vduse_dev *dev = file->private_data;
413 poll_wait(file, &dev->waitq, wait);
415 spin_lock(&dev->msg_lock);
417 if (unlikely(dev->broken))
419 if (!list_empty(&dev->send_list))
420 mask |= EPOLLIN | EPOLLRDNORM;
421 if (!list_empty(&dev->recv_list))
422 mask |= EPOLLOUT | EPOLLWRNORM;
424 spin_unlock(&dev->msg_lock);
429 static void vduse_dev_reset(struct vduse_dev *dev)
432 struct vduse_iova_domain *domain = dev->domain;
434 /* The coherent mappings are handled in vduse_dev_free_coherent() */
435 if (domain && domain->bounce_map)
436 vduse_domain_reset_bounce_map(domain);
438 down_write(&dev->rwsem);
441 dev->driver_features = 0;
443 spin_lock(&dev->irq_lock);
444 dev->config_cb.callback = NULL;
445 dev->config_cb.private = NULL;
446 spin_unlock(&dev->irq_lock);
447 flush_work(&dev->inject);
449 for (i = 0; i < dev->vq_num; i++) {
450 struct vduse_virtqueue *vq = dev->vqs[i];
457 memset(&vq->state, 0, sizeof(vq->state));
459 spin_lock(&vq->kick_lock);
462 eventfd_ctx_put(vq->kickfd);
464 spin_unlock(&vq->kick_lock);
466 spin_lock(&vq->irq_lock);
467 vq->cb.callback = NULL;
468 vq->cb.private = NULL;
469 vq->cb.trigger = NULL;
470 spin_unlock(&vq->irq_lock);
471 flush_work(&vq->inject);
472 flush_work(&vq->kick);
475 up_write(&dev->rwsem);
478 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
479 u64 desc_area, u64 driver_area,
482 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
483 struct vduse_virtqueue *vq = dev->vqs[idx];
485 vq->desc_addr = desc_area;
486 vq->driver_addr = driver_area;
487 vq->device_addr = device_area;
492 static void vduse_vq_kick(struct vduse_virtqueue *vq)
494 spin_lock(&vq->kick_lock);
499 eventfd_signal(vq->kickfd);
503 spin_unlock(&vq->kick_lock);
506 static void vduse_vq_kick_work(struct work_struct *work)
508 struct vduse_virtqueue *vq = container_of(work,
509 struct vduse_virtqueue, kick);
514 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
516 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
517 struct vduse_virtqueue *vq = dev->vqs[idx];
519 if (!eventfd_signal_allowed()) {
520 schedule_work(&vq->kick);
526 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
527 struct vdpa_callback *cb)
529 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
530 struct vduse_virtqueue *vq = dev->vqs[idx];
532 spin_lock(&vq->irq_lock);
533 vq->cb.callback = cb->callback;
534 vq->cb.private = cb->private;
535 vq->cb.trigger = cb->trigger;
536 spin_unlock(&vq->irq_lock);
539 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
541 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
542 struct vduse_virtqueue *vq = dev->vqs[idx];
547 static u16 vduse_vdpa_get_vq_size(struct vdpa_device *vdpa, u16 idx)
549 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
550 struct vduse_virtqueue *vq = dev->vqs[idx];
558 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
561 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
562 struct vduse_virtqueue *vq = dev->vqs[idx];
567 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
569 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
570 struct vduse_virtqueue *vq = dev->vqs[idx];
575 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
576 const struct vdpa_vq_state *state)
578 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
579 struct vduse_virtqueue *vq = dev->vqs[idx];
581 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
582 vq->state.packed.last_avail_counter =
583 state->packed.last_avail_counter;
584 vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
585 vq->state.packed.last_used_counter =
586 state->packed.last_used_counter;
587 vq->state.packed.last_used_idx = state->packed.last_used_idx;
589 vq->state.split.avail_index = state->split.avail_index;
594 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
595 struct vdpa_vq_state *state)
597 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
598 struct vduse_virtqueue *vq = dev->vqs[idx];
600 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
601 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
603 return vduse_dev_get_vq_state_split(dev, vq, &state->split);
606 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
608 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
610 return dev->vq_align;
613 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
615 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
617 return dev->device_features;
620 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
622 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
624 dev->driver_features = features;
628 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
630 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
632 return dev->driver_features;
635 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
636 struct vdpa_callback *cb)
638 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
640 spin_lock(&dev->irq_lock);
641 dev->config_cb.callback = cb->callback;
642 dev->config_cb.private = cb->private;
643 spin_unlock(&dev->irq_lock);
646 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
648 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
652 for (i = 0; i < dev->vq_num; i++)
653 if (num_max < dev->vqs[i]->num_max)
654 num_max = dev->vqs[i]->num_max;
659 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
661 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
663 return dev->device_id;
666 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
668 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
670 return dev->vendor_id;
673 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
675 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
680 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
682 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
684 if (vduse_dev_set_status(dev, status))
687 dev->status = status;
690 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
692 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
694 return dev->config_size;
697 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
698 void *buf, unsigned int len)
700 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
702 /* Initialize the buffer in case of partial copy. */
705 if (offset > dev->config_size)
708 if (len > dev->config_size - offset)
709 len = dev->config_size - offset;
711 memcpy(buf, dev->config + offset, len);
714 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
715 const void *buf, unsigned int len)
717 /* Now we only support read-only configuration space */
720 static int vduse_vdpa_reset(struct vdpa_device *vdpa)
722 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
723 int ret = vduse_dev_set_status(dev, 0);
725 vduse_dev_reset(dev);
730 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
732 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
734 return dev->generation;
737 static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
738 const struct cpumask *cpu_mask)
740 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
743 cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
745 cpumask_setall(&dev->vqs[idx]->irq_affinity);
750 static const struct cpumask *
751 vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx)
753 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
755 return &dev->vqs[idx]->irq_affinity;
758 static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
760 struct vhost_iotlb *iotlb)
762 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
765 ret = vduse_domain_set_map(dev->domain, iotlb);
769 ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
771 vduse_domain_clear_map(dev->domain, iotlb);
778 static void vduse_vdpa_free(struct vdpa_device *vdpa)
780 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
785 static const struct vdpa_config_ops vduse_vdpa_config_ops = {
786 .set_vq_address = vduse_vdpa_set_vq_address,
787 .kick_vq = vduse_vdpa_kick_vq,
788 .set_vq_cb = vduse_vdpa_set_vq_cb,
789 .set_vq_num = vduse_vdpa_set_vq_num,
790 .get_vq_size = vduse_vdpa_get_vq_size,
791 .set_vq_ready = vduse_vdpa_set_vq_ready,
792 .get_vq_ready = vduse_vdpa_get_vq_ready,
793 .set_vq_state = vduse_vdpa_set_vq_state,
794 .get_vq_state = vduse_vdpa_get_vq_state,
795 .get_vq_align = vduse_vdpa_get_vq_align,
796 .get_device_features = vduse_vdpa_get_device_features,
797 .set_driver_features = vduse_vdpa_set_driver_features,
798 .get_driver_features = vduse_vdpa_get_driver_features,
799 .set_config_cb = vduse_vdpa_set_config_cb,
800 .get_vq_num_max = vduse_vdpa_get_vq_num_max,
801 .get_device_id = vduse_vdpa_get_device_id,
802 .get_vendor_id = vduse_vdpa_get_vendor_id,
803 .get_status = vduse_vdpa_get_status,
804 .set_status = vduse_vdpa_set_status,
805 .get_config_size = vduse_vdpa_get_config_size,
806 .get_config = vduse_vdpa_get_config,
807 .set_config = vduse_vdpa_set_config,
808 .get_generation = vduse_vdpa_get_generation,
809 .set_vq_affinity = vduse_vdpa_set_vq_affinity,
810 .get_vq_affinity = vduse_vdpa_get_vq_affinity,
811 .reset = vduse_vdpa_reset,
812 .set_map = vduse_vdpa_set_map,
813 .free = vduse_vdpa_free,
816 static void vduse_dev_sync_single_for_device(struct device *dev,
817 dma_addr_t dma_addr, size_t size,
818 enum dma_data_direction dir)
820 struct vduse_dev *vdev = dev_to_vduse(dev);
821 struct vduse_iova_domain *domain = vdev->domain;
823 vduse_domain_sync_single_for_device(domain, dma_addr, size, dir);
826 static void vduse_dev_sync_single_for_cpu(struct device *dev,
827 dma_addr_t dma_addr, size_t size,
828 enum dma_data_direction dir)
830 struct vduse_dev *vdev = dev_to_vduse(dev);
831 struct vduse_iova_domain *domain = vdev->domain;
833 vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir);
836 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
837 unsigned long offset, size_t size,
838 enum dma_data_direction dir,
841 struct vduse_dev *vdev = dev_to_vduse(dev);
842 struct vduse_iova_domain *domain = vdev->domain;
844 return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
847 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
848 size_t size, enum dma_data_direction dir,
851 struct vduse_dev *vdev = dev_to_vduse(dev);
852 struct vduse_iova_domain *domain = vdev->domain;
854 return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
857 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
858 dma_addr_t *dma_addr, gfp_t flag,
861 struct vduse_dev *vdev = dev_to_vduse(dev);
862 struct vduse_iova_domain *domain = vdev->domain;
866 *dma_addr = DMA_MAPPING_ERROR;
867 addr = vduse_domain_alloc_coherent(domain, size,
868 (dma_addr_t *)&iova, flag, attrs);
872 *dma_addr = (dma_addr_t)iova;
877 static void vduse_dev_free_coherent(struct device *dev, size_t size,
878 void *vaddr, dma_addr_t dma_addr,
881 struct vduse_dev *vdev = dev_to_vduse(dev);
882 struct vduse_iova_domain *domain = vdev->domain;
884 vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
887 static size_t vduse_dev_max_mapping_size(struct device *dev)
889 struct vduse_dev *vdev = dev_to_vduse(dev);
890 struct vduse_iova_domain *domain = vdev->domain;
892 return domain->bounce_size;
895 static const struct dma_map_ops vduse_dev_dma_ops = {
896 .sync_single_for_device = vduse_dev_sync_single_for_device,
897 .sync_single_for_cpu = vduse_dev_sync_single_for_cpu,
898 .map_page = vduse_dev_map_page,
899 .unmap_page = vduse_dev_unmap_page,
900 .alloc = vduse_dev_alloc_coherent,
901 .free = vduse_dev_free_coherent,
902 .max_mapping_size = vduse_dev_max_mapping_size,
905 static unsigned int perm_to_file_flags(u8 perm)
907 unsigned int flags = 0;
910 case VDUSE_ACCESS_WO:
913 case VDUSE_ACCESS_RO:
916 case VDUSE_ACCESS_RW:
920 WARN(1, "invalidate vhost IOTLB permission\n");
927 static int vduse_kickfd_setup(struct vduse_dev *dev,
928 struct vduse_vq_eventfd *eventfd)
930 struct eventfd_ctx *ctx = NULL;
931 struct vduse_virtqueue *vq;
934 if (eventfd->index >= dev->vq_num)
937 index = array_index_nospec(eventfd->index, dev->vq_num);
938 vq = dev->vqs[index];
939 if (eventfd->fd >= 0) {
940 ctx = eventfd_ctx_fdget(eventfd->fd);
943 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
946 spin_lock(&vq->kick_lock);
948 eventfd_ctx_put(vq->kickfd);
950 if (vq->ready && vq->kicked && vq->kickfd) {
951 eventfd_signal(vq->kickfd);
954 spin_unlock(&vq->kick_lock);
959 static bool vduse_dev_is_ready(struct vduse_dev *dev)
963 for (i = 0; i < dev->vq_num; i++)
964 if (!dev->vqs[i]->num_max)
970 static void vduse_dev_irq_inject(struct work_struct *work)
972 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
974 spin_lock_bh(&dev->irq_lock);
975 if (dev->config_cb.callback)
976 dev->config_cb.callback(dev->config_cb.private);
977 spin_unlock_bh(&dev->irq_lock);
980 static void vduse_vq_irq_inject(struct work_struct *work)
982 struct vduse_virtqueue *vq = container_of(work,
983 struct vduse_virtqueue, inject);
985 spin_lock_bh(&vq->irq_lock);
986 if (vq->ready && vq->cb.callback)
987 vq->cb.callback(vq->cb.private);
988 spin_unlock_bh(&vq->irq_lock);
991 static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
998 spin_lock_irq(&vq->irq_lock);
999 if (vq->ready && vq->cb.trigger) {
1000 eventfd_signal(vq->cb.trigger);
1003 spin_unlock_irq(&vq->irq_lock);
1008 static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
1009 struct work_struct *irq_work,
1010 int irq_effective_cpu)
1014 down_read(&dev->rwsem);
1015 if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1019 if (irq_effective_cpu == IRQ_UNBOUND)
1020 queue_work(vduse_irq_wq, irq_work);
1022 queue_work_on(irq_effective_cpu,
1023 vduse_irq_bound_wq, irq_work);
1025 up_read(&dev->rwsem);
1030 static int vduse_dev_dereg_umem(struct vduse_dev *dev,
1035 mutex_lock(&dev->mem_lock);
1044 if (dev->umem->iova != iova || size != dev->domain->bounce_size)
1047 vduse_domain_remove_user_bounce_pages(dev->domain);
1048 unpin_user_pages_dirty_lock(dev->umem->pages,
1049 dev->umem->npages, true);
1050 atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
1051 mmdrop(dev->umem->mm);
1052 vfree(dev->umem->pages);
1057 mutex_unlock(&dev->mem_lock);
1061 static int vduse_dev_reg_umem(struct vduse_dev *dev,
1062 u64 iova, u64 uaddr, u64 size)
1064 struct page **page_list = NULL;
1065 struct vduse_umem *umem = NULL;
1067 unsigned long npages, lock_limit;
1070 if (!dev->domain || !dev->domain->bounce_map ||
1071 size != dev->domain->bounce_size ||
1072 iova != 0 || uaddr & ~PAGE_MASK)
1075 mutex_lock(&dev->mem_lock);
1081 npages = size >> PAGE_SHIFT;
1082 page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
1083 GFP_KERNEL_ACCOUNT);
1084 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
1085 if (!page_list || !umem)
1088 mmap_read_lock(current->mm);
1090 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
1091 if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit)
1094 pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
1096 if (pinned != npages) {
1097 ret = pinned < 0 ? pinned : -ENOMEM;
1101 ret = vduse_domain_add_user_bounce_pages(dev->domain,
1106 atomic64_add(npages, ¤t->mm->pinned_vm);
1108 umem->pages = page_list;
1109 umem->npages = pinned;
1111 umem->mm = current->mm;
1112 mmgrab(current->mm);
1116 if (ret && pinned > 0)
1117 unpin_user_pages(page_list, pinned);
1119 mmap_read_unlock(current->mm);
1125 mutex_unlock(&dev->mem_lock);
1129 static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
1131 int curr_cpu = vq->irq_effective_cpu;
1134 curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
1135 if (cpu_online(curr_cpu))
1138 if (curr_cpu >= nr_cpu_ids)
1139 curr_cpu = IRQ_UNBOUND;
1142 vq->irq_effective_cpu = curr_cpu;
1145 static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
1148 struct vduse_dev *dev = file->private_data;
1149 void __user *argp = (void __user *)arg;
1152 if (unlikely(dev->broken))
1156 case VDUSE_IOTLB_GET_FD: {
1157 struct vduse_iotlb_entry entry;
1158 struct vhost_iotlb_map *map;
1159 struct vdpa_map_file *map_file;
1160 struct file *f = NULL;
1163 if (copy_from_user(&entry, argp, sizeof(entry)))
1167 if (entry.start > entry.last)
1170 mutex_lock(&dev->domain_lock);
1172 mutex_unlock(&dev->domain_lock);
1175 spin_lock(&dev->domain->iotlb_lock);
1176 map = vhost_iotlb_itree_first(dev->domain->iotlb,
1177 entry.start, entry.last);
1179 map_file = (struct vdpa_map_file *)map->opaque;
1180 f = get_file(map_file->file);
1181 entry.offset = map_file->offset;
1182 entry.start = map->start;
1183 entry.last = map->last;
1184 entry.perm = map->perm;
1186 spin_unlock(&dev->domain->iotlb_lock);
1187 mutex_unlock(&dev->domain_lock);
1193 if (copy_to_user(argp, &entry, sizeof(entry))) {
1197 ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm));
1201 case VDUSE_DEV_GET_FEATURES:
1203 * Just mirror what driver wrote here.
1204 * The driver is expected to check FEATURE_OK later.
1206 ret = put_user(dev->driver_features, (u64 __user *)argp);
1208 case VDUSE_DEV_SET_CONFIG: {
1209 struct vduse_config_data config;
1210 unsigned long size = offsetof(struct vduse_config_data,
1214 if (copy_from_user(&config, argp, size))
1218 if (config.offset > dev->config_size ||
1219 config.length == 0 ||
1220 config.length > dev->config_size - config.offset)
1224 if (copy_from_user(dev->config + config.offset, argp + size,
1231 case VDUSE_DEV_INJECT_CONFIG_IRQ:
1232 ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
1234 case VDUSE_VQ_SETUP: {
1235 struct vduse_vq_config config;
1239 if (copy_from_user(&config, argp, sizeof(config)))
1243 if (config.index >= dev->vq_num)
1246 if (!is_mem_zero((const char *)config.reserved,
1247 sizeof(config.reserved)))
1250 index = array_index_nospec(config.index, dev->vq_num);
1251 dev->vqs[index]->num_max = config.max_size;
1255 case VDUSE_VQ_GET_INFO: {
1256 struct vduse_vq_info vq_info;
1257 struct vduse_virtqueue *vq;
1261 if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1265 if (vq_info.index >= dev->vq_num)
1268 index = array_index_nospec(vq_info.index, dev->vq_num);
1269 vq = dev->vqs[index];
1270 vq_info.desc_addr = vq->desc_addr;
1271 vq_info.driver_addr = vq->driver_addr;
1272 vq_info.device_addr = vq->device_addr;
1273 vq_info.num = vq->num;
1275 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1276 vq_info.packed.last_avail_counter =
1277 vq->state.packed.last_avail_counter;
1278 vq_info.packed.last_avail_idx =
1279 vq->state.packed.last_avail_idx;
1280 vq_info.packed.last_used_counter =
1281 vq->state.packed.last_used_counter;
1282 vq_info.packed.last_used_idx =
1283 vq->state.packed.last_used_idx;
1285 vq_info.split.avail_index =
1286 vq->state.split.avail_index;
1288 vq_info.ready = vq->ready;
1291 if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1297 case VDUSE_VQ_SETUP_KICKFD: {
1298 struct vduse_vq_eventfd eventfd;
1301 if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1304 ret = vduse_kickfd_setup(dev, &eventfd);
1307 case VDUSE_VQ_INJECT_IRQ: {
1311 if (get_user(index, (u32 __user *)argp))
1315 if (index >= dev->vq_num)
1319 index = array_index_nospec(index, dev->vq_num);
1320 if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
1321 vduse_vq_update_effective_cpu(dev->vqs[index]);
1322 ret = vduse_dev_queue_irq_work(dev,
1323 &dev->vqs[index]->inject,
1324 dev->vqs[index]->irq_effective_cpu);
1328 case VDUSE_IOTLB_REG_UMEM: {
1329 struct vduse_iova_umem umem;
1332 if (copy_from_user(&umem, argp, sizeof(umem)))
1336 if (!is_mem_zero((const char *)umem.reserved,
1337 sizeof(umem.reserved)))
1340 mutex_lock(&dev->domain_lock);
1341 ret = vduse_dev_reg_umem(dev, umem.iova,
1342 umem.uaddr, umem.size);
1343 mutex_unlock(&dev->domain_lock);
1346 case VDUSE_IOTLB_DEREG_UMEM: {
1347 struct vduse_iova_umem umem;
1350 if (copy_from_user(&umem, argp, sizeof(umem)))
1354 if (!is_mem_zero((const char *)umem.reserved,
1355 sizeof(umem.reserved)))
1357 mutex_lock(&dev->domain_lock);
1358 ret = vduse_dev_dereg_umem(dev, umem.iova,
1360 mutex_unlock(&dev->domain_lock);
1363 case VDUSE_IOTLB_GET_INFO: {
1364 struct vduse_iova_info info;
1365 struct vhost_iotlb_map *map;
1368 if (copy_from_user(&info, argp, sizeof(info)))
1372 if (info.start > info.last)
1375 if (!is_mem_zero((const char *)info.reserved,
1376 sizeof(info.reserved)))
1379 mutex_lock(&dev->domain_lock);
1381 mutex_unlock(&dev->domain_lock);
1384 spin_lock(&dev->domain->iotlb_lock);
1385 map = vhost_iotlb_itree_first(dev->domain->iotlb,
1386 info.start, info.last);
1388 info.start = map->start;
1389 info.last = map->last;
1390 info.capability = 0;
1391 if (dev->domain->bounce_map && map->start == 0 &&
1392 map->last == dev->domain->bounce_size - 1)
1393 info.capability |= VDUSE_IOVA_CAP_UMEM;
1395 spin_unlock(&dev->domain->iotlb_lock);
1396 mutex_unlock(&dev->domain_lock);
1401 if (copy_to_user(argp, &info, sizeof(info)))
1415 static int vduse_dev_release(struct inode *inode, struct file *file)
1417 struct vduse_dev *dev = file->private_data;
1419 mutex_lock(&dev->domain_lock);
1421 vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
1422 mutex_unlock(&dev->domain_lock);
1423 spin_lock(&dev->msg_lock);
1424 /* Make sure the inflight messages can processed after reconncection */
1425 list_splice_init(&dev->recv_list, &dev->send_list);
1426 spin_unlock(&dev->msg_lock);
1427 dev->connected = false;
1432 static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1434 struct vduse_dev *dev;
1436 mutex_lock(&vduse_lock);
1437 dev = idr_find(&vduse_idr, minor);
1438 mutex_unlock(&vduse_lock);
1443 static int vduse_dev_open(struct inode *inode, struct file *file)
1446 struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1452 mutex_lock(&dev->lock);
1457 dev->connected = true;
1458 file->private_data = dev;
1460 mutex_unlock(&dev->lock);
1465 static const struct file_operations vduse_dev_fops = {
1466 .owner = THIS_MODULE,
1467 .open = vduse_dev_open,
1468 .release = vduse_dev_release,
1469 .read_iter = vduse_dev_read_iter,
1470 .write_iter = vduse_dev_write_iter,
1471 .poll = vduse_dev_poll,
1472 .unlocked_ioctl = vduse_dev_ioctl,
1473 .compat_ioctl = compat_ptr_ioctl,
1474 .llseek = noop_llseek,
1477 static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf)
1479 return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity));
1482 static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq,
1483 const char *buf, size_t count)
1485 cpumask_var_t new_value;
1488 if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
1491 ret = cpumask_parse(buf, new_value);
1496 if (!cpumask_intersects(new_value, cpu_online_mask))
1499 cpumask_copy(&vq->irq_affinity, new_value);
1502 free_cpumask_var(new_value);
1506 struct vq_sysfs_entry {
1507 struct attribute attr;
1508 ssize_t (*show)(struct vduse_virtqueue *vq, char *buf);
1509 ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf,
1513 static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity);
1515 static struct attribute *vq_attrs[] = {
1516 &irq_cb_affinity_attr.attr,
1519 ATTRIBUTE_GROUPS(vq);
1521 static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr,
1524 struct vduse_virtqueue *vq = container_of(kobj,
1525 struct vduse_virtqueue, kobj);
1526 struct vq_sysfs_entry *entry = container_of(attr,
1527 struct vq_sysfs_entry, attr);
1532 return entry->show(vq, buf);
1535 static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr,
1536 const char *buf, size_t count)
1538 struct vduse_virtqueue *vq = container_of(kobj,
1539 struct vduse_virtqueue, kobj);
1540 struct vq_sysfs_entry *entry = container_of(attr,
1541 struct vq_sysfs_entry, attr);
1546 return entry->store(vq, buf, count);
1549 static const struct sysfs_ops vq_sysfs_ops = {
1550 .show = vq_attr_show,
1551 .store = vq_attr_store,
1554 static void vq_release(struct kobject *kobj)
1556 struct vduse_virtqueue *vq = container_of(kobj,
1557 struct vduse_virtqueue, kobj);
1561 static const struct kobj_type vq_type = {
1562 .release = vq_release,
1563 .sysfs_ops = &vq_sysfs_ops,
1564 .default_groups = vq_groups,
1567 static char *vduse_devnode(const struct device *dev, umode_t *mode)
1569 return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1572 static const struct class vduse_class = {
1574 .devnode = vduse_devnode,
1577 static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
1584 for (i = 0; i < dev->vq_num; i++)
1585 kobject_put(&dev->vqs[i]->kobj);
1589 static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
1593 dev->vq_align = vq_align;
1594 dev->vq_num = vq_num;
1595 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1599 for (i = 0; i < vq_num; i++) {
1600 dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
1606 dev->vqs[i]->index = i;
1607 dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
1608 INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
1609 INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
1610 spin_lock_init(&dev->vqs[i]->kick_lock);
1611 spin_lock_init(&dev->vqs[i]->irq_lock);
1612 cpumask_setall(&dev->vqs[i]->irq_affinity);
1614 kobject_init(&dev->vqs[i]->kobj, &vq_type);
1615 ret = kobject_add(&dev->vqs[i]->kobj,
1616 &dev->dev->kobj, "vq%d", i);
1626 kobject_put(&dev->vqs[i]->kobj);
1632 static struct vduse_dev *vduse_dev_create(void)
1634 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1639 mutex_init(&dev->lock);
1640 mutex_init(&dev->mem_lock);
1641 mutex_init(&dev->domain_lock);
1642 spin_lock_init(&dev->msg_lock);
1643 INIT_LIST_HEAD(&dev->send_list);
1644 INIT_LIST_HEAD(&dev->recv_list);
1645 spin_lock_init(&dev->irq_lock);
1646 init_rwsem(&dev->rwsem);
1648 INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1649 init_waitqueue_head(&dev->waitq);
1654 static void vduse_dev_destroy(struct vduse_dev *dev)
1659 static struct vduse_dev *vduse_find_dev(const char *name)
1661 struct vduse_dev *dev;
1664 idr_for_each_entry(&vduse_idr, dev, id)
1665 if (!strcmp(dev->name, name))
1671 static int vduse_destroy_dev(char *name)
1673 struct vduse_dev *dev = vduse_find_dev(name);
1678 mutex_lock(&dev->lock);
1679 if (dev->vdev || dev->connected) {
1680 mutex_unlock(&dev->lock);
1683 dev->connected = true;
1684 mutex_unlock(&dev->lock);
1686 vduse_dev_reset(dev);
1687 device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1688 idr_remove(&vduse_idr, dev->minor);
1689 kvfree(dev->config);
1690 vduse_dev_deinit_vqs(dev);
1692 vduse_domain_destroy(dev->domain);
1694 vduse_dev_destroy(dev);
1695 module_put(THIS_MODULE);
1700 static bool device_is_allowed(u32 device_id)
1704 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1705 if (allowed_device_id[i] == device_id)
1711 static bool features_is_valid(struct vduse_dev_config *config)
1713 if (!(config->features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1716 /* Now we only support read-only configuration space */
1717 if ((config->device_id == VIRTIO_ID_BLOCK) &&
1718 (config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE)))
1720 else if ((config->device_id == VIRTIO_ID_NET) &&
1721 (config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1724 if ((config->device_id == VIRTIO_ID_NET) &&
1725 !(config->features & BIT_ULL(VIRTIO_F_VERSION_1)))
1731 static bool vduse_validate_config(struct vduse_dev_config *config)
1733 if (!is_mem_zero((const char *)config->reserved,
1734 sizeof(config->reserved)))
1737 if (config->vq_align > PAGE_SIZE)
1740 if (config->config_size > PAGE_SIZE)
1743 if (config->vq_num > 0xffff)
1746 if (!config->name[0])
1749 if (!device_is_allowed(config->device_id))
1752 if (!features_is_valid(config))
1758 static ssize_t msg_timeout_show(struct device *device,
1759 struct device_attribute *attr, char *buf)
1761 struct vduse_dev *dev = dev_get_drvdata(device);
1763 return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1766 static ssize_t msg_timeout_store(struct device *device,
1767 struct device_attribute *attr,
1768 const char *buf, size_t count)
1770 struct vduse_dev *dev = dev_get_drvdata(device);
1773 ret = kstrtouint(buf, 10, &dev->msg_timeout);
1780 static DEVICE_ATTR_RW(msg_timeout);
1782 static ssize_t bounce_size_show(struct device *device,
1783 struct device_attribute *attr, char *buf)
1785 struct vduse_dev *dev = dev_get_drvdata(device);
1787 return sysfs_emit(buf, "%u\n", dev->bounce_size);
1790 static ssize_t bounce_size_store(struct device *device,
1791 struct device_attribute *attr,
1792 const char *buf, size_t count)
1794 struct vduse_dev *dev = dev_get_drvdata(device);
1795 unsigned int bounce_size;
1799 mutex_lock(&dev->domain_lock);
1803 ret = kstrtouint(buf, 10, &bounce_size);
1808 if (bounce_size > VDUSE_MAX_BOUNCE_SIZE ||
1809 bounce_size < VDUSE_MIN_BOUNCE_SIZE)
1812 dev->bounce_size = bounce_size & PAGE_MASK;
1815 mutex_unlock(&dev->domain_lock);
1819 static DEVICE_ATTR_RW(bounce_size);
1821 static struct attribute *vduse_dev_attrs[] = {
1822 &dev_attr_msg_timeout.attr,
1823 &dev_attr_bounce_size.attr,
1827 ATTRIBUTE_GROUPS(vduse_dev);
1829 static int vduse_create_dev(struct vduse_dev_config *config,
1830 void *config_buf, u64 api_version)
1833 struct vduse_dev *dev;
1836 if ((config->device_id == VIRTIO_ID_NET) && !capable(CAP_NET_ADMIN))
1840 if (vduse_find_dev(config->name))
1844 dev = vduse_dev_create();
1848 dev->api_version = api_version;
1849 dev->device_features = config->features;
1850 dev->device_id = config->device_id;
1851 dev->vendor_id = config->vendor_id;
1852 dev->name = kstrdup(config->name, GFP_KERNEL);
1856 dev->bounce_size = VDUSE_BOUNCE_SIZE;
1857 dev->config = config_buf;
1858 dev->config_size = config->config_size;
1860 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1865 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1866 dev->dev = device_create_with_groups(&vduse_class, NULL,
1867 MKDEV(MAJOR(vduse_major), dev->minor),
1868 dev, vduse_dev_groups, "%s", config->name);
1869 if (IS_ERR(dev->dev)) {
1870 ret = PTR_ERR(dev->dev);
1874 ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
1878 __module_get(THIS_MODULE);
1882 device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1884 idr_remove(&vduse_idr, dev->minor);
1888 vduse_dev_destroy(dev);
1893 static long vduse_ioctl(struct file *file, unsigned int cmd,
1897 void __user *argp = (void __user *)arg;
1898 struct vduse_control *control = file->private_data;
1900 mutex_lock(&vduse_lock);
1902 case VDUSE_GET_API_VERSION:
1903 ret = put_user(control->api_version, (u64 __user *)argp);
1905 case VDUSE_SET_API_VERSION: {
1909 if (get_user(api_version, (u64 __user *)argp))
1913 if (api_version > VDUSE_API_VERSION)
1917 control->api_version = api_version;
1920 case VDUSE_CREATE_DEV: {
1921 struct vduse_dev_config config;
1922 unsigned long size = offsetof(struct vduse_dev_config, config);
1926 if (copy_from_user(&config, argp, size))
1930 if (vduse_validate_config(&config) == false)
1933 buf = vmemdup_user(argp + size, config.config_size);
1938 config.name[VDUSE_NAME_MAX - 1] = '\0';
1939 ret = vduse_create_dev(&config, buf, control->api_version);
1944 case VDUSE_DESTROY_DEV: {
1945 char name[VDUSE_NAME_MAX];
1948 if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1951 name[VDUSE_NAME_MAX - 1] = '\0';
1952 ret = vduse_destroy_dev(name);
1959 mutex_unlock(&vduse_lock);
1964 static int vduse_release(struct inode *inode, struct file *file)
1966 struct vduse_control *control = file->private_data;
1972 static int vduse_open(struct inode *inode, struct file *file)
1974 struct vduse_control *control;
1976 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1980 control->api_version = VDUSE_API_VERSION;
1981 file->private_data = control;
1986 static const struct file_operations vduse_ctrl_fops = {
1987 .owner = THIS_MODULE,
1989 .release = vduse_release,
1990 .unlocked_ioctl = vduse_ioctl,
1991 .compat_ioctl = compat_ptr_ioctl,
1992 .llseek = noop_llseek,
1995 struct vduse_mgmt_dev {
1996 struct vdpa_mgmt_dev mgmt_dev;
2000 static struct vduse_mgmt_dev *vduse_mgmt;
2002 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
2004 struct vduse_vdpa *vdev;
2010 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
2011 &vduse_vdpa_config_ops, 1, 1, name, true);
2013 return PTR_ERR(vdev);
2017 vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
2018 ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
2020 put_device(&vdev->vdpa.dev);
2023 set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
2024 vdev->vdpa.dma_dev = &vdev->vdpa.dev;
2025 vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
2030 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
2031 const struct vdpa_dev_set_config *config)
2033 struct vduse_dev *dev;
2036 mutex_lock(&vduse_lock);
2037 dev = vduse_find_dev(name);
2038 if (!dev || !vduse_dev_is_ready(dev)) {
2039 mutex_unlock(&vduse_lock);
2042 ret = vduse_dev_init_vdpa(dev, name);
2043 mutex_unlock(&vduse_lock);
2047 mutex_lock(&dev->domain_lock);
2049 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
2051 mutex_unlock(&dev->domain_lock);
2053 put_device(&dev->vdev->vdpa.dev);
2057 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
2059 put_device(&dev->vdev->vdpa.dev);
2060 mutex_lock(&dev->domain_lock);
2061 vduse_domain_destroy(dev->domain);
2063 mutex_unlock(&dev->domain_lock);
2070 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
2072 _vdpa_unregister_device(dev);
2075 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
2076 .dev_add = vdpa_dev_add,
2077 .dev_del = vdpa_dev_del,
2080 static struct virtio_device_id id_table[] = {
2081 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
2082 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2086 static void vduse_mgmtdev_release(struct device *dev)
2088 struct vduse_mgmt_dev *mgmt_dev;
2090 mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
2094 static int vduse_mgmtdev_init(void)
2098 vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
2102 ret = dev_set_name(&vduse_mgmt->dev, "vduse");
2108 vduse_mgmt->dev.release = vduse_mgmtdev_release;
2110 ret = device_register(&vduse_mgmt->dev);
2114 vduse_mgmt->mgmt_dev.id_table = id_table;
2115 vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
2116 vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
2117 ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
2119 device_unregister(&vduse_mgmt->dev);
2124 put_device(&vduse_mgmt->dev);
2128 static void vduse_mgmtdev_exit(void)
2130 vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
2131 device_unregister(&vduse_mgmt->dev);
2134 static int vduse_init(void)
2139 ret = class_register(&vduse_class);
2143 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
2145 goto err_chardev_region;
2147 /* /dev/vduse/control */
2148 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
2149 vduse_ctrl_cdev.owner = THIS_MODULE;
2150 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
2154 dev = device_create(&vduse_class, NULL, vduse_major, NULL, "control");
2160 /* /dev/vduse/$DEVICE */
2161 cdev_init(&vduse_cdev, &vduse_dev_fops);
2162 vduse_cdev.owner = THIS_MODULE;
2163 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
2169 vduse_irq_wq = alloc_workqueue("vduse-irq",
2170 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
2174 vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
2175 if (!vduse_irq_bound_wq)
2178 ret = vduse_domain_init();
2182 ret = vduse_mgmtdev_init();
2188 vduse_domain_exit();
2190 destroy_workqueue(vduse_irq_bound_wq);
2192 destroy_workqueue(vduse_irq_wq);
2194 cdev_del(&vduse_cdev);
2196 device_destroy(&vduse_class, vduse_major);
2198 cdev_del(&vduse_ctrl_cdev);
2200 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2202 class_unregister(&vduse_class);
2205 module_init(vduse_init);
2207 static void vduse_exit(void)
2209 vduse_mgmtdev_exit();
2210 vduse_domain_exit();
2211 destroy_workqueue(vduse_irq_bound_wq);
2212 destroy_workqueue(vduse_irq_wq);
2213 cdev_del(&vduse_cdev);
2214 device_destroy(&vduse_class, vduse_major);
2215 cdev_del(&vduse_ctrl_cdev);
2216 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2217 class_unregister(&vduse_class);
2219 module_exit(vduse_exit);
2221 MODULE_LICENSE(DRV_LICENSE);
2222 MODULE_AUTHOR(DRV_AUTHOR);
2223 MODULE_DESCRIPTION(DRV_DESC);