2 * bsg.c - block layer implementation of the sg v4 interface
7 * This file is subject to the terms and conditions of the GNU General Public
8 * License version 2. See the file "COPYING" in the main directory of this
9 * archive for more details.
12 #include <linux/module.h>
13 #include <linux/init.h>
14 #include <linux/file.h>
15 #include <linux/blkdev.h>
16 #include <linux/poll.h>
17 #include <linux/cdev.h>
18 #include <linux/jiffies.h>
19 #include <linux/percpu.h>
20 #include <linux/uio.h>
21 #include <linux/idr.h>
22 #include <linux/bsg.h>
23 #include <linux/slab.h>
25 #include <scsi/scsi.h>
26 #include <scsi/scsi_ioctl.h>
27 #include <scsi/scsi_cmnd.h>
28 #include <scsi/scsi_device.h>
29 #include <scsi/scsi_driver.h>
32 #define BSG_DESCRIPTION "Block layer SCSI generic (bsg) driver"
33 #define BSG_VERSION "0.4"
35 #define bsg_dbg(bd, fmt, ...) \
36 pr_debug("%s: " fmt, (bd)->name, ##__VA_ARGS__)
39 struct request_queue *queue;
41 struct list_head busy_list;
42 struct list_head done_list;
43 struct hlist_node dev_list;
47 wait_queue_head_t wq_done;
48 wait_queue_head_t wq_free;
58 #define BSG_DEFAULT_CMDS 64
59 #define BSG_MAX_DEVS 32768
61 static DEFINE_MUTEX(bsg_mutex);
62 static DEFINE_IDR(bsg_minor_idr);
64 #define BSG_LIST_ARRAY_SIZE 8
65 static struct hlist_head bsg_device_list[BSG_LIST_ARRAY_SIZE];
67 static struct class *bsg_class;
70 static struct kmem_cache *bsg_cmd_cachep;
73 * our internal command type
76 struct bsg_device *bd;
77 struct list_head list;
85 static void bsg_free_command(struct bsg_command *bc)
87 struct bsg_device *bd = bc->bd;
90 kmem_cache_free(bsg_cmd_cachep, bc);
92 spin_lock_irqsave(&bd->lock, flags);
94 spin_unlock_irqrestore(&bd->lock, flags);
96 wake_up(&bd->wq_free);
99 static struct bsg_command *bsg_alloc_command(struct bsg_device *bd)
101 struct bsg_command *bc = ERR_PTR(-EINVAL);
103 spin_lock_irq(&bd->lock);
105 if (bd->queued_cmds >= bd->max_queue)
109 spin_unlock_irq(&bd->lock);
111 bc = kmem_cache_zalloc(bsg_cmd_cachep, GFP_KERNEL);
113 spin_lock_irq(&bd->lock);
115 bc = ERR_PTR(-ENOMEM);
120 INIT_LIST_HEAD(&bc->list);
121 bsg_dbg(bd, "returning free cmd %p\n", bc);
124 spin_unlock_irq(&bd->lock);
128 static inline struct hlist_head *bsg_dev_idx_hash(int index)
130 return &bsg_device_list[index & (BSG_LIST_ARRAY_SIZE - 1)];
133 #define uptr64(val) ((void __user *)(uintptr_t)(val))
135 static int bsg_scsi_check_proto(struct sg_io_v4 *hdr)
137 if (hdr->protocol != BSG_PROTOCOL_SCSI ||
138 hdr->subprotocol != BSG_SUB_PROTOCOL_SCSI_CMD)
143 static int bsg_scsi_fill_hdr(struct request *rq, struct sg_io_v4 *hdr,
146 struct scsi_request *sreq = scsi_req(rq);
148 sreq->cmd_len = hdr->request_len;
149 if (sreq->cmd_len > BLK_MAX_CDB) {
150 sreq->cmd = kzalloc(sreq->cmd_len, GFP_KERNEL);
155 if (copy_from_user(sreq->cmd, uptr64(hdr->request), sreq->cmd_len))
157 if (blk_verify_command(sreq->cmd, mode))
162 static int bsg_scsi_complete_rq(struct request *rq, struct sg_io_v4 *hdr)
164 struct scsi_request *sreq = scsi_req(rq);
168 * fill in all the output members
170 hdr->device_status = sreq->result & 0xff;
171 hdr->transport_status = host_byte(sreq->result);
172 hdr->driver_status = driver_byte(sreq->result);
174 if (hdr->device_status || hdr->transport_status || hdr->driver_status)
175 hdr->info |= SG_INFO_CHECK;
176 hdr->response_len = 0;
178 if (sreq->sense_len && hdr->response) {
179 int len = min_t(unsigned int, hdr->max_response_len,
182 if (copy_to_user(uptr64(hdr->response), sreq->sense, len))
185 hdr->response_len = len;
189 hdr->dout_resid = sreq->resid_len;
190 hdr->din_resid = scsi_req(rq->next_rq)->resid_len;
191 } else if (rq_data_dir(rq) == READ) {
192 hdr->din_resid = sreq->resid_len;
194 hdr->dout_resid = sreq->resid_len;
200 static void bsg_scsi_free_rq(struct request *rq)
202 scsi_req_free_cmd(scsi_req(rq));
205 static const struct bsg_ops bsg_scsi_ops = {
206 .check_proto = bsg_scsi_check_proto,
207 .fill_hdr = bsg_scsi_fill_hdr,
208 .complete_rq = bsg_scsi_complete_rq,
209 .free_rq = bsg_scsi_free_rq,
212 static struct request *
213 bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode)
215 struct request *rq, *next_rq = NULL;
218 if (!q->bsg_dev.class_dev)
219 return ERR_PTR(-ENXIO);
221 if (hdr->guard != 'Q')
222 return ERR_PTR(-EINVAL);
224 ret = q->bsg_dev.ops->check_proto(hdr);
228 rq = blk_get_request(q, hdr->dout_xfer_len ?
229 REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
233 ret = q->bsg_dev.ops->fill_hdr(rq, hdr, mode);
237 rq->timeout = msecs_to_jiffies(hdr->timeout);
239 rq->timeout = q->sg_timeout;
241 rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
242 if (rq->timeout < BLK_MIN_SG_TIMEOUT)
243 rq->timeout = BLK_MIN_SG_TIMEOUT;
245 if (hdr->dout_xfer_len && hdr->din_xfer_len) {
246 if (!test_bit(QUEUE_FLAG_BIDI, &q->queue_flags)) {
251 next_rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
252 if (IS_ERR(next_rq)) {
253 ret = PTR_ERR(next_rq);
257 rq->next_rq = next_rq;
258 ret = blk_rq_map_user(q, next_rq, NULL, uptr64(hdr->din_xferp),
259 hdr->din_xfer_len, GFP_KERNEL);
261 goto out_free_nextrq;
264 if (hdr->dout_xfer_len) {
265 ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr->dout_xferp),
266 hdr->dout_xfer_len, GFP_KERNEL);
267 } else if (hdr->din_xfer_len) {
268 ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr->din_xferp),
269 hdr->din_xfer_len, GFP_KERNEL);
273 goto out_unmap_nextrq;
278 blk_rq_unmap_user(rq->next_rq->bio);
281 blk_put_request(rq->next_rq);
283 q->bsg_dev.ops->free_rq(rq);
289 * async completion call-back from the block layer, when scsi/ide/whatever
290 * calls end_that_request_last() on a request
292 static void bsg_rq_end_io(struct request *rq, blk_status_t status)
294 struct bsg_command *bc = rq->end_io_data;
295 struct bsg_device *bd = bc->bd;
298 bsg_dbg(bd, "finished rq %p bc %p, bio %p\n",
301 bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration);
303 spin_lock_irqsave(&bd->lock, flags);
304 list_move_tail(&bc->list, &bd->done_list);
306 spin_unlock_irqrestore(&bd->lock, flags);
308 wake_up(&bd->wq_done);
312 * do final setup of a 'bc' and submit the matching 'rq' to the block
315 static void bsg_add_command(struct bsg_device *bd, struct request_queue *q,
316 struct bsg_command *bc, struct request *rq)
318 int at_head = (0 == (bc->hdr.flags & BSG_FLAG_Q_AT_TAIL));
321 * add bc command to busy queue and submit rq for io
326 bc->bidi_bio = rq->next_rq->bio;
327 bc->hdr.duration = jiffies;
328 spin_lock_irq(&bd->lock);
329 list_add_tail(&bc->list, &bd->busy_list);
330 spin_unlock_irq(&bd->lock);
332 bsg_dbg(bd, "queueing rq %p, bc %p\n", rq, bc);
334 rq->end_io_data = bc;
335 blk_execute_rq_nowait(q, NULL, rq, at_head, bsg_rq_end_io);
338 static struct bsg_command *bsg_next_done_cmd(struct bsg_device *bd)
340 struct bsg_command *bc = NULL;
342 spin_lock_irq(&bd->lock);
344 bc = list_first_entry(&bd->done_list, struct bsg_command, list);
348 spin_unlock_irq(&bd->lock);
354 * Get a finished command from the done list
356 static struct bsg_command *bsg_get_done_cmd(struct bsg_device *bd)
358 struct bsg_command *bc;
362 bc = bsg_next_done_cmd(bd);
366 if (!test_bit(BSG_F_BLOCK, &bd->flags)) {
367 bc = ERR_PTR(-EAGAIN);
371 ret = wait_event_interruptible(bd->wq_done, bd->done_cmds);
373 bc = ERR_PTR(-ERESTARTSYS);
378 bsg_dbg(bd, "returning done %p\n", bc);
383 static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
384 struct bio *bio, struct bio *bidi_bio)
388 ret = rq->q->bsg_dev.ops->complete_rq(rq, hdr);
391 blk_rq_unmap_user(bidi_bio);
392 blk_put_request(rq->next_rq);
395 blk_rq_unmap_user(bio);
396 rq->q->bsg_dev.ops->free_rq(rq);
401 static bool bsg_complete(struct bsg_device *bd)
407 spin_lock_irq(&bd->lock);
409 BUG_ON(bd->done_cmds > bd->queued_cmds);
412 * All commands consumed.
414 if (bd->done_cmds == bd->queued_cmds)
417 spin = !test_bit(BSG_F_BLOCK, &bd->flags);
419 spin_unlock_irq(&bd->lock);
420 } while (!ret && spin);
425 static int bsg_complete_all_commands(struct bsg_device *bd)
427 struct bsg_command *bc;
430 bsg_dbg(bd, "entered\n");
433 * wait for all commands to complete
435 io_wait_event(bd->wq_done, bsg_complete(bd));
438 * discard done commands
442 spin_lock_irq(&bd->lock);
443 if (!bd->queued_cmds) {
444 spin_unlock_irq(&bd->lock);
447 spin_unlock_irq(&bd->lock);
449 bc = bsg_get_done_cmd(bd);
453 tret = blk_complete_sgv4_hdr_rq(bc->rq, &bc->hdr, bc->bio,
458 bsg_free_command(bc);
465 __bsg_read(char __user *buf, size_t count, struct bsg_device *bd,
466 const struct iovec *iov, ssize_t *bytes_read)
468 struct bsg_command *bc;
469 int nr_commands, ret;
471 if (count % sizeof(struct sg_io_v4))
475 nr_commands = count / sizeof(struct sg_io_v4);
476 while (nr_commands) {
477 bc = bsg_get_done_cmd(bd);
484 * this is the only case where we need to copy data back
485 * after completing the request. so do that here,
486 * bsg_complete_work() cannot do that for us
488 ret = blk_complete_sgv4_hdr_rq(bc->rq, &bc->hdr, bc->bio,
491 if (copy_to_user(buf, &bc->hdr, sizeof(bc->hdr)))
494 bsg_free_command(bc);
499 buf += sizeof(struct sg_io_v4);
500 *bytes_read += sizeof(struct sg_io_v4);
507 static inline void bsg_set_block(struct bsg_device *bd, struct file *file)
509 if (file->f_flags & O_NONBLOCK)
510 clear_bit(BSG_F_BLOCK, &bd->flags);
512 set_bit(BSG_F_BLOCK, &bd->flags);
516 * Check if the error is a "real" error that we should return.
518 static inline int err_block_err(int ret)
520 if (ret && ret != -ENOSPC && ret != -ENODATA && ret != -EAGAIN)
527 bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
529 struct bsg_device *bd = file->private_data;
533 bsg_dbg(bd, "read %zd bytes\n", count);
535 bsg_set_block(bd, file);
538 ret = __bsg_read(buf, count, bd, NULL, &bytes_read);
541 if (!bytes_read || err_block_err(ret))
547 static int __bsg_write(struct bsg_device *bd, const char __user *buf,
548 size_t count, ssize_t *bytes_written, fmode_t mode)
550 struct bsg_command *bc;
552 int ret, nr_commands;
554 if (count % sizeof(struct sg_io_v4))
557 nr_commands = count / sizeof(struct sg_io_v4);
561 while (nr_commands) {
562 struct request_queue *q = bd->queue;
564 bc = bsg_alloc_command(bd);
571 if (copy_from_user(&bc->hdr, buf, sizeof(bc->hdr))) {
577 * get a request, fill in the blanks, and add to request queue
579 rq = bsg_map_hdr(bd->queue, &bc->hdr, mode);
586 bsg_add_command(bd, q, bc, rq);
590 buf += sizeof(struct sg_io_v4);
591 *bytes_written += sizeof(struct sg_io_v4);
595 bsg_free_command(bc);
601 bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
603 struct bsg_device *bd = file->private_data;
604 ssize_t bytes_written;
607 bsg_dbg(bd, "write %zd bytes\n", count);
609 if (unlikely(uaccess_kernel()))
612 bsg_set_block(bd, file);
615 ret = __bsg_write(bd, buf, count, &bytes_written, file->f_mode);
617 *ppos = bytes_written;
620 * return bytes written on non-fatal errors
622 if (!bytes_written || err_block_err(ret))
625 bsg_dbg(bd, "returning %zd\n", bytes_written);
626 return bytes_written;
629 static struct bsg_device *bsg_alloc_device(void)
631 struct bsg_device *bd;
633 bd = kzalloc(sizeof(struct bsg_device), GFP_KERNEL);
637 spin_lock_init(&bd->lock);
639 bd->max_queue = BSG_DEFAULT_CMDS;
641 INIT_LIST_HEAD(&bd->busy_list);
642 INIT_LIST_HEAD(&bd->done_list);
643 INIT_HLIST_NODE(&bd->dev_list);
645 init_waitqueue_head(&bd->wq_free);
646 init_waitqueue_head(&bd->wq_done);
650 static int bsg_put_device(struct bsg_device *bd)
652 int ret = 0, do_free;
653 struct request_queue *q = bd->queue;
655 mutex_lock(&bsg_mutex);
657 do_free = atomic_dec_and_test(&bd->ref_count);
659 mutex_unlock(&bsg_mutex);
663 hlist_del(&bd->dev_list);
664 mutex_unlock(&bsg_mutex);
666 bsg_dbg(bd, "tearing down\n");
669 * close can always block
671 set_bit(BSG_F_BLOCK, &bd->flags);
674 * correct error detection baddies here again. it's the responsibility
675 * of the app to properly reap commands before close() if it wants
676 * fool-proof error detection
678 ret = bsg_complete_all_commands(bd);
687 static struct bsg_device *bsg_add_device(struct inode *inode,
688 struct request_queue *rq,
691 struct bsg_device *bd;
692 unsigned char buf[32];
694 lockdep_assert_held(&bsg_mutex);
696 if (!blk_get_queue(rq))
697 return ERR_PTR(-ENXIO);
699 bd = bsg_alloc_device();
702 return ERR_PTR(-ENOMEM);
707 bsg_set_block(bd, file);
709 atomic_set(&bd->ref_count, 1);
710 hlist_add_head(&bd->dev_list, bsg_dev_idx_hash(iminor(inode)));
712 strncpy(bd->name, dev_name(rq->bsg_dev.class_dev), sizeof(bd->name) - 1);
713 bsg_dbg(bd, "bound to <%s>, max queue %d\n",
714 format_dev_t(buf, inode->i_rdev), bd->max_queue);
719 static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q)
721 struct bsg_device *bd;
723 lockdep_assert_held(&bsg_mutex);
725 hlist_for_each_entry(bd, bsg_dev_idx_hash(minor), dev_list) {
726 if (bd->queue == q) {
727 atomic_inc(&bd->ref_count);
736 static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file)
738 struct bsg_device *bd;
739 struct bsg_class_device *bcd;
742 * find the class device
744 mutex_lock(&bsg_mutex);
745 bcd = idr_find(&bsg_minor_idr, iminor(inode));
748 bd = ERR_PTR(-ENODEV);
752 bd = __bsg_get_device(iminor(inode), bcd->queue);
754 bd = bsg_add_device(inode, bcd->queue, file);
757 mutex_unlock(&bsg_mutex);
761 static int bsg_open(struct inode *inode, struct file *file)
763 struct bsg_device *bd;
765 bd = bsg_get_device(inode, file);
770 file->private_data = bd;
774 static int bsg_release(struct inode *inode, struct file *file)
776 struct bsg_device *bd = file->private_data;
778 file->private_data = NULL;
779 return bsg_put_device(bd);
782 static __poll_t bsg_poll(struct file *file, poll_table *wait)
784 struct bsg_device *bd = file->private_data;
787 poll_wait(file, &bd->wq_done, wait);
788 poll_wait(file, &bd->wq_free, wait);
790 spin_lock_irq(&bd->lock);
791 if (!list_empty(&bd->done_list))
792 mask |= EPOLLIN | EPOLLRDNORM;
793 if (bd->queued_cmds < bd->max_queue)
795 spin_unlock_irq(&bd->lock);
800 static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
802 struct bsg_device *bd = file->private_data;
803 int __user *uarg = (int __user *) arg;
810 case SG_GET_COMMAND_Q:
811 return put_user(bd->max_queue, uarg);
812 case SG_SET_COMMAND_Q: {
815 if (get_user(queue, uarg))
820 spin_lock_irq(&bd->lock);
821 bd->max_queue = queue;
822 spin_unlock_irq(&bd->lock);
829 case SG_GET_VERSION_NUM:
830 case SCSI_IOCTL_GET_IDLUN:
831 case SCSI_IOCTL_GET_BUS_NUMBER:
834 case SG_GET_RESERVED_SIZE:
835 case SG_SET_RESERVED_SIZE:
836 case SG_EMULATED_HOST:
837 case SCSI_IOCTL_SEND_COMMAND: {
838 void __user *uarg = (void __user *) arg;
839 return scsi_cmd_ioctl(bd->queue, NULL, file->f_mode, cmd, uarg);
843 struct bio *bio, *bidi_bio = NULL;
847 if (copy_from_user(&hdr, uarg, sizeof(hdr)))
850 rq = bsg_map_hdr(bd->queue, &hdr, file->f_mode);
856 bidi_bio = rq->next_rq->bio;
858 at_head = (0 == (hdr.flags & BSG_FLAG_Q_AT_TAIL));
859 blk_execute_rq(bd->queue, NULL, rq, at_head);
860 ret = blk_complete_sgv4_hdr_rq(rq, &hdr, bio, bidi_bio);
862 if (copy_to_user(uarg, &hdr, sizeof(hdr)))
872 static const struct file_operations bsg_fops = {
877 .release = bsg_release,
878 .unlocked_ioctl = bsg_ioctl,
879 .owner = THIS_MODULE,
880 .llseek = default_llseek,
883 void bsg_unregister_queue(struct request_queue *q)
885 struct bsg_class_device *bcd = &q->bsg_dev;
890 mutex_lock(&bsg_mutex);
891 idr_remove(&bsg_minor_idr, bcd->minor);
893 sysfs_remove_link(&q->kobj, "bsg");
894 device_unregister(bcd->class_dev);
895 bcd->class_dev = NULL;
896 mutex_unlock(&bsg_mutex);
898 EXPORT_SYMBOL_GPL(bsg_unregister_queue);
900 int bsg_register_queue(struct request_queue *q, struct device *parent,
901 const char *name, const struct bsg_ops *ops)
903 struct bsg_class_device *bcd;
906 struct device *class_dev = NULL;
909 * we need a proper transport to send commands, not a stacked device
911 if (!queue_is_rq_based(q))
915 memset(bcd, 0, sizeof(*bcd));
917 mutex_lock(&bsg_mutex);
919 ret = idr_alloc(&bsg_minor_idr, bcd, 0, BSG_MAX_DEVS, GFP_KERNEL);
921 if (ret == -ENOSPC) {
922 printk(KERN_ERR "bsg: too many bsg devices\n");
931 dev = MKDEV(bsg_major, bcd->minor);
932 class_dev = device_create(bsg_class, parent, dev, NULL, "%s", name);
933 if (IS_ERR(class_dev)) {
934 ret = PTR_ERR(class_dev);
937 bcd->class_dev = class_dev;
940 ret = sysfs_create_link(&q->kobj, &bcd->class_dev->kobj, "bsg");
942 goto unregister_class_dev;
945 mutex_unlock(&bsg_mutex);
948 unregister_class_dev:
949 device_unregister(class_dev);
951 idr_remove(&bsg_minor_idr, bcd->minor);
953 mutex_unlock(&bsg_mutex);
957 int bsg_scsi_register_queue(struct request_queue *q, struct device *parent)
959 if (!blk_queue_scsi_passthrough(q)) {
960 WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
964 return bsg_register_queue(q, parent, dev_name(parent), &bsg_scsi_ops);
966 EXPORT_SYMBOL_GPL(bsg_scsi_register_queue);
968 static struct cdev bsg_cdev;
970 static char *bsg_devnode(struct device *dev, umode_t *mode)
972 return kasprintf(GFP_KERNEL, "bsg/%s", dev_name(dev));
975 static int __init bsg_init(void)
980 bsg_cmd_cachep = kmem_cache_create("bsg_cmd",
981 sizeof(struct bsg_command), 0, 0, NULL);
982 if (!bsg_cmd_cachep) {
983 printk(KERN_ERR "bsg: failed creating slab cache\n");
987 for (i = 0; i < BSG_LIST_ARRAY_SIZE; i++)
988 INIT_HLIST_HEAD(&bsg_device_list[i]);
990 bsg_class = class_create(THIS_MODULE, "bsg");
991 if (IS_ERR(bsg_class)) {
992 ret = PTR_ERR(bsg_class);
993 goto destroy_kmemcache;
995 bsg_class->devnode = bsg_devnode;
997 ret = alloc_chrdev_region(&devid, 0, BSG_MAX_DEVS, "bsg");
999 goto destroy_bsg_class;
1001 bsg_major = MAJOR(devid);
1003 cdev_init(&bsg_cdev, &bsg_fops);
1004 ret = cdev_add(&bsg_cdev, MKDEV(bsg_major, 0), BSG_MAX_DEVS);
1006 goto unregister_chrdev;
1008 printk(KERN_INFO BSG_DESCRIPTION " version " BSG_VERSION
1009 " loaded (major %d)\n", bsg_major);
1012 unregister_chrdev_region(MKDEV(bsg_major, 0), BSG_MAX_DEVS);
1014 class_destroy(bsg_class);
1016 kmem_cache_destroy(bsg_cmd_cachep);
1020 MODULE_AUTHOR("Jens Axboe");
1021 MODULE_DESCRIPTION(BSG_DESCRIPTION);
1022 MODULE_LICENSE("GPL");
1024 device_initcall(bsg_init);