2 * bsg.c - block layer implementation of the sg v4 interface
7 * This file is subject to the terms and conditions of the GNU General Public
8 * License version 2. See the file "COPYING" in the main directory of this
9 * archive for more details.
12 #include <linux/module.h>
13 #include <linux/init.h>
14 #include <linux/file.h>
15 #include <linux/blkdev.h>
16 #include <linux/poll.h>
17 #include <linux/cdev.h>
18 #include <linux/jiffies.h>
19 #include <linux/percpu.h>
20 #include <linux/uio.h>
21 #include <linux/idr.h>
22 #include <linux/bsg.h>
23 #include <linux/slab.h>
25 #include <scsi/scsi.h>
26 #include <scsi/scsi_ioctl.h>
27 #include <scsi/scsi_cmnd.h>
28 #include <scsi/scsi_device.h>
29 #include <scsi/scsi_driver.h>
32 #define BSG_DESCRIPTION "Block layer SCSI generic (bsg) driver"
33 #define BSG_VERSION "0.4"
35 #define bsg_dbg(bd, fmt, ...) \
36 pr_debug("%s: " fmt, (bd)->name, ##__VA_ARGS__)
39 struct request_queue *queue;
41 struct list_head busy_list;
42 struct list_head done_list;
43 struct hlist_node dev_list;
47 wait_queue_head_t wq_done;
48 wait_queue_head_t wq_free;
58 #define BSG_DEFAULT_CMDS 64
59 #define BSG_MAX_DEVS 32768
61 static DEFINE_MUTEX(bsg_mutex);
62 static DEFINE_IDR(bsg_minor_idr);
64 #define BSG_LIST_ARRAY_SIZE 8
65 static struct hlist_head bsg_device_list[BSG_LIST_ARRAY_SIZE];
67 static struct class *bsg_class;
70 static struct kmem_cache *bsg_cmd_cachep;
73 * our internal command type
76 struct bsg_device *bd;
77 struct list_head list;
85 static void bsg_free_command(struct bsg_command *bc)
87 struct bsg_device *bd = bc->bd;
90 kmem_cache_free(bsg_cmd_cachep, bc);
92 spin_lock_irqsave(&bd->lock, flags);
94 spin_unlock_irqrestore(&bd->lock, flags);
96 wake_up(&bd->wq_free);
99 static struct bsg_command *bsg_alloc_command(struct bsg_device *bd)
101 struct bsg_command *bc = ERR_PTR(-EINVAL);
103 spin_lock_irq(&bd->lock);
105 if (bd->queued_cmds >= bd->max_queue)
109 spin_unlock_irq(&bd->lock);
111 bc = kmem_cache_zalloc(bsg_cmd_cachep, GFP_KERNEL);
113 spin_lock_irq(&bd->lock);
115 bc = ERR_PTR(-ENOMEM);
120 INIT_LIST_HEAD(&bc->list);
121 bsg_dbg(bd, "returning free cmd %p\n", bc);
124 spin_unlock_irq(&bd->lock);
128 static inline struct hlist_head *bsg_dev_idx_hash(int index)
130 return &bsg_device_list[index & (BSG_LIST_ARRAY_SIZE - 1)];
133 #define uptr64(val) ((void __user *)(uintptr_t)(val))
135 static int bsg_scsi_check_proto(struct sg_io_v4 *hdr)
137 if (hdr->protocol != BSG_PROTOCOL_SCSI ||
138 hdr->subprotocol != BSG_SUB_PROTOCOL_SCSI_CMD)
143 static int bsg_scsi_fill_hdr(struct request *rq, struct sg_io_v4 *hdr,
146 struct scsi_request *sreq = scsi_req(rq);
148 sreq->cmd_len = hdr->request_len;
149 if (sreq->cmd_len > BLK_MAX_CDB) {
150 sreq->cmd = kzalloc(sreq->cmd_len, GFP_KERNEL);
155 if (copy_from_user(sreq->cmd, uptr64(hdr->request), sreq->cmd_len))
157 if (blk_verify_command(sreq->cmd, mode))
162 static int bsg_scsi_complete_rq(struct request *rq, struct sg_io_v4 *hdr)
164 struct scsi_request *sreq = scsi_req(rq);
168 * fill in all the output members
170 hdr->device_status = sreq->result & 0xff;
171 hdr->transport_status = host_byte(sreq->result);
172 hdr->driver_status = driver_byte(sreq->result);
174 if (hdr->device_status || hdr->transport_status || hdr->driver_status)
175 hdr->info |= SG_INFO_CHECK;
176 hdr->response_len = 0;
178 if (sreq->sense_len && hdr->response) {
179 int len = min_t(unsigned int, hdr->max_response_len,
182 if (copy_to_user(uptr64(hdr->response), sreq->sense, len))
185 hdr->response_len = len;
189 hdr->dout_resid = sreq->resid_len;
190 hdr->din_resid = scsi_req(rq->next_rq)->resid_len;
191 } else if (rq_data_dir(rq) == READ) {
192 hdr->din_resid = sreq->resid_len;
194 hdr->dout_resid = sreq->resid_len;
200 static void bsg_scsi_free_rq(struct request *rq)
202 scsi_req_free_cmd(scsi_req(rq));
205 static const struct bsg_ops bsg_scsi_ops = {
206 .check_proto = bsg_scsi_check_proto,
207 .fill_hdr = bsg_scsi_fill_hdr,
208 .complete_rq = bsg_scsi_complete_rq,
209 .free_rq = bsg_scsi_free_rq,
212 static struct request *
213 bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode)
215 struct request *rq, *next_rq = NULL;
218 if (!q->bsg_dev.class_dev)
219 return ERR_PTR(-ENXIO);
221 if (hdr->guard != 'Q')
222 return ERR_PTR(-EINVAL);
224 ret = q->bsg_dev.ops->check_proto(hdr);
228 rq = blk_get_request(q, hdr->dout_xfer_len ?
229 REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
233 ret = q->bsg_dev.ops->fill_hdr(rq, hdr, mode);
237 rq->timeout = msecs_to_jiffies(hdr->timeout);
239 rq->timeout = q->sg_timeout;
241 rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
242 if (rq->timeout < BLK_MIN_SG_TIMEOUT)
243 rq->timeout = BLK_MIN_SG_TIMEOUT;
245 if (hdr->dout_xfer_len && hdr->din_xfer_len) {
246 if (!test_bit(QUEUE_FLAG_BIDI, &q->queue_flags)) {
251 next_rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
252 if (IS_ERR(next_rq)) {
253 ret = PTR_ERR(next_rq);
257 rq->next_rq = next_rq;
258 ret = blk_rq_map_user(q, next_rq, NULL, uptr64(hdr->din_xferp),
259 hdr->din_xfer_len, GFP_KERNEL);
261 goto out_free_nextrq;
264 if (hdr->dout_xfer_len) {
265 ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr->dout_xferp),
266 hdr->dout_xfer_len, GFP_KERNEL);
267 } else if (hdr->din_xfer_len) {
268 ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr->din_xferp),
269 hdr->din_xfer_len, GFP_KERNEL);
271 ret = blk_rq_map_user(q, rq, NULL, NULL, 0, GFP_KERNEL);
275 goto out_unmap_nextrq;
280 blk_rq_unmap_user(rq->next_rq->bio);
283 blk_put_request(rq->next_rq);
285 q->bsg_dev.ops->free_rq(rq);
291 * async completion call-back from the block layer, when scsi/ide/whatever
292 * calls end_that_request_last() on a request
294 static void bsg_rq_end_io(struct request *rq, blk_status_t status)
296 struct bsg_command *bc = rq->end_io_data;
297 struct bsg_device *bd = bc->bd;
300 bsg_dbg(bd, "finished rq %p bc %p, bio %p\n",
303 bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration);
305 spin_lock_irqsave(&bd->lock, flags);
306 list_move_tail(&bc->list, &bd->done_list);
308 spin_unlock_irqrestore(&bd->lock, flags);
310 wake_up(&bd->wq_done);
314 * do final setup of a 'bc' and submit the matching 'rq' to the block
317 static void bsg_add_command(struct bsg_device *bd, struct request_queue *q,
318 struct bsg_command *bc, struct request *rq)
320 int at_head = (0 == (bc->hdr.flags & BSG_FLAG_Q_AT_TAIL));
323 * add bc command to busy queue and submit rq for io
328 bc->bidi_bio = rq->next_rq->bio;
329 bc->hdr.duration = jiffies;
330 spin_lock_irq(&bd->lock);
331 list_add_tail(&bc->list, &bd->busy_list);
332 spin_unlock_irq(&bd->lock);
334 bsg_dbg(bd, "queueing rq %p, bc %p\n", rq, bc);
336 rq->end_io_data = bc;
337 blk_execute_rq_nowait(q, NULL, rq, at_head, bsg_rq_end_io);
340 static struct bsg_command *bsg_next_done_cmd(struct bsg_device *bd)
342 struct bsg_command *bc = NULL;
344 spin_lock_irq(&bd->lock);
346 bc = list_first_entry(&bd->done_list, struct bsg_command, list);
350 spin_unlock_irq(&bd->lock);
356 * Get a finished command from the done list
358 static struct bsg_command *bsg_get_done_cmd(struct bsg_device *bd)
360 struct bsg_command *bc;
364 bc = bsg_next_done_cmd(bd);
368 if (!test_bit(BSG_F_BLOCK, &bd->flags)) {
369 bc = ERR_PTR(-EAGAIN);
373 ret = wait_event_interruptible(bd->wq_done, bd->done_cmds);
375 bc = ERR_PTR(-ERESTARTSYS);
380 bsg_dbg(bd, "returning done %p\n", bc);
385 static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
386 struct bio *bio, struct bio *bidi_bio)
390 ret = rq->q->bsg_dev.ops->complete_rq(rq, hdr);
393 blk_rq_unmap_user(bidi_bio);
394 blk_put_request(rq->next_rq);
397 blk_rq_unmap_user(bio);
398 rq->q->bsg_dev.ops->free_rq(rq);
403 static bool bsg_complete(struct bsg_device *bd)
409 spin_lock_irq(&bd->lock);
411 BUG_ON(bd->done_cmds > bd->queued_cmds);
414 * All commands consumed.
416 if (bd->done_cmds == bd->queued_cmds)
419 spin = !test_bit(BSG_F_BLOCK, &bd->flags);
421 spin_unlock_irq(&bd->lock);
422 } while (!ret && spin);
427 static int bsg_complete_all_commands(struct bsg_device *bd)
429 struct bsg_command *bc;
432 bsg_dbg(bd, "entered\n");
435 * wait for all commands to complete
437 io_wait_event(bd->wq_done, bsg_complete(bd));
440 * discard done commands
444 spin_lock_irq(&bd->lock);
445 if (!bd->queued_cmds) {
446 spin_unlock_irq(&bd->lock);
449 spin_unlock_irq(&bd->lock);
451 bc = bsg_get_done_cmd(bd);
455 tret = blk_complete_sgv4_hdr_rq(bc->rq, &bc->hdr, bc->bio,
460 bsg_free_command(bc);
467 __bsg_read(char __user *buf, size_t count, struct bsg_device *bd,
468 const struct iovec *iov, ssize_t *bytes_read)
470 struct bsg_command *bc;
471 int nr_commands, ret;
473 if (count % sizeof(struct sg_io_v4))
477 nr_commands = count / sizeof(struct sg_io_v4);
478 while (nr_commands) {
479 bc = bsg_get_done_cmd(bd);
486 * this is the only case where we need to copy data back
487 * after completing the request. so do that here,
488 * bsg_complete_work() cannot do that for us
490 ret = blk_complete_sgv4_hdr_rq(bc->rq, &bc->hdr, bc->bio,
493 if (copy_to_user(buf, &bc->hdr, sizeof(bc->hdr)))
496 bsg_free_command(bc);
501 buf += sizeof(struct sg_io_v4);
502 *bytes_read += sizeof(struct sg_io_v4);
509 static inline void bsg_set_block(struct bsg_device *bd, struct file *file)
511 if (file->f_flags & O_NONBLOCK)
512 clear_bit(BSG_F_BLOCK, &bd->flags);
514 set_bit(BSG_F_BLOCK, &bd->flags);
518 * Check if the error is a "real" error that we should return.
520 static inline int err_block_err(int ret)
522 if (ret && ret != -ENOSPC && ret != -ENODATA && ret != -EAGAIN)
529 bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
531 struct bsg_device *bd = file->private_data;
535 bsg_dbg(bd, "read %zd bytes\n", count);
537 bsg_set_block(bd, file);
540 ret = __bsg_read(buf, count, bd, NULL, &bytes_read);
543 if (!bytes_read || err_block_err(ret))
549 static int __bsg_write(struct bsg_device *bd, const char __user *buf,
550 size_t count, ssize_t *bytes_written, fmode_t mode)
552 struct bsg_command *bc;
554 int ret, nr_commands;
556 if (count % sizeof(struct sg_io_v4))
559 nr_commands = count / sizeof(struct sg_io_v4);
563 while (nr_commands) {
564 struct request_queue *q = bd->queue;
566 bc = bsg_alloc_command(bd);
573 if (copy_from_user(&bc->hdr, buf, sizeof(bc->hdr))) {
579 * get a request, fill in the blanks, and add to request queue
581 rq = bsg_map_hdr(bd->queue, &bc->hdr, mode);
588 bsg_add_command(bd, q, bc, rq);
592 buf += sizeof(struct sg_io_v4);
593 *bytes_written += sizeof(struct sg_io_v4);
597 bsg_free_command(bc);
603 bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
605 struct bsg_device *bd = file->private_data;
606 ssize_t bytes_written;
609 bsg_dbg(bd, "write %zd bytes\n", count);
611 if (unlikely(uaccess_kernel()))
614 bsg_set_block(bd, file);
617 ret = __bsg_write(bd, buf, count, &bytes_written, file->f_mode);
619 *ppos = bytes_written;
622 * return bytes written on non-fatal errors
624 if (!bytes_written || err_block_err(ret))
627 bsg_dbg(bd, "returning %zd\n", bytes_written);
628 return bytes_written;
631 static struct bsg_device *bsg_alloc_device(void)
633 struct bsg_device *bd;
635 bd = kzalloc(sizeof(struct bsg_device), GFP_KERNEL);
639 spin_lock_init(&bd->lock);
641 bd->max_queue = BSG_DEFAULT_CMDS;
643 INIT_LIST_HEAD(&bd->busy_list);
644 INIT_LIST_HEAD(&bd->done_list);
645 INIT_HLIST_NODE(&bd->dev_list);
647 init_waitqueue_head(&bd->wq_free);
648 init_waitqueue_head(&bd->wq_done);
652 static int bsg_put_device(struct bsg_device *bd)
654 int ret = 0, do_free;
655 struct request_queue *q = bd->queue;
657 mutex_lock(&bsg_mutex);
659 do_free = atomic_dec_and_test(&bd->ref_count);
661 mutex_unlock(&bsg_mutex);
665 hlist_del(&bd->dev_list);
666 mutex_unlock(&bsg_mutex);
668 bsg_dbg(bd, "tearing down\n");
671 * close can always block
673 set_bit(BSG_F_BLOCK, &bd->flags);
676 * correct error detection baddies here again. it's the responsibility
677 * of the app to properly reap commands before close() if it wants
678 * fool-proof error detection
680 ret = bsg_complete_all_commands(bd);
689 static struct bsg_device *bsg_add_device(struct inode *inode,
690 struct request_queue *rq,
693 struct bsg_device *bd;
694 unsigned char buf[32];
696 lockdep_assert_held(&bsg_mutex);
698 if (!blk_get_queue(rq))
699 return ERR_PTR(-ENXIO);
701 bd = bsg_alloc_device();
704 return ERR_PTR(-ENOMEM);
709 bsg_set_block(bd, file);
711 atomic_set(&bd->ref_count, 1);
712 hlist_add_head(&bd->dev_list, bsg_dev_idx_hash(iminor(inode)));
714 strncpy(bd->name, dev_name(rq->bsg_dev.class_dev), sizeof(bd->name) - 1);
715 bsg_dbg(bd, "bound to <%s>, max queue %d\n",
716 format_dev_t(buf, inode->i_rdev), bd->max_queue);
721 static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q)
723 struct bsg_device *bd;
725 lockdep_assert_held(&bsg_mutex);
727 hlist_for_each_entry(bd, bsg_dev_idx_hash(minor), dev_list) {
728 if (bd->queue == q) {
729 atomic_inc(&bd->ref_count);
738 static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file)
740 struct bsg_device *bd;
741 struct bsg_class_device *bcd;
744 * find the class device
746 mutex_lock(&bsg_mutex);
747 bcd = idr_find(&bsg_minor_idr, iminor(inode));
750 bd = ERR_PTR(-ENODEV);
754 bd = __bsg_get_device(iminor(inode), bcd->queue);
756 bd = bsg_add_device(inode, bcd->queue, file);
759 mutex_unlock(&bsg_mutex);
763 static int bsg_open(struct inode *inode, struct file *file)
765 struct bsg_device *bd;
767 bd = bsg_get_device(inode, file);
772 file->private_data = bd;
776 static int bsg_release(struct inode *inode, struct file *file)
778 struct bsg_device *bd = file->private_data;
780 file->private_data = NULL;
781 return bsg_put_device(bd);
784 static __poll_t bsg_poll(struct file *file, poll_table *wait)
786 struct bsg_device *bd = file->private_data;
789 poll_wait(file, &bd->wq_done, wait);
790 poll_wait(file, &bd->wq_free, wait);
792 spin_lock_irq(&bd->lock);
793 if (!list_empty(&bd->done_list))
794 mask |= EPOLLIN | EPOLLRDNORM;
795 if (bd->queued_cmds < bd->max_queue)
797 spin_unlock_irq(&bd->lock);
802 static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
804 struct bsg_device *bd = file->private_data;
805 int __user *uarg = (int __user *) arg;
812 case SG_GET_COMMAND_Q:
813 return put_user(bd->max_queue, uarg);
814 case SG_SET_COMMAND_Q: {
817 if (get_user(queue, uarg))
822 spin_lock_irq(&bd->lock);
823 bd->max_queue = queue;
824 spin_unlock_irq(&bd->lock);
831 case SG_GET_VERSION_NUM:
832 case SCSI_IOCTL_GET_IDLUN:
833 case SCSI_IOCTL_GET_BUS_NUMBER:
836 case SG_GET_RESERVED_SIZE:
837 case SG_SET_RESERVED_SIZE:
838 case SG_EMULATED_HOST:
839 case SCSI_IOCTL_SEND_COMMAND: {
840 void __user *uarg = (void __user *) arg;
841 return scsi_cmd_ioctl(bd->queue, NULL, file->f_mode, cmd, uarg);
845 struct bio *bio, *bidi_bio = NULL;
849 if (copy_from_user(&hdr, uarg, sizeof(hdr)))
852 rq = bsg_map_hdr(bd->queue, &hdr, file->f_mode);
858 bidi_bio = rq->next_rq->bio;
860 at_head = (0 == (hdr.flags & BSG_FLAG_Q_AT_TAIL));
861 blk_execute_rq(bd->queue, NULL, rq, at_head);
862 ret = blk_complete_sgv4_hdr_rq(rq, &hdr, bio, bidi_bio);
864 if (copy_to_user(uarg, &hdr, sizeof(hdr)))
874 static const struct file_operations bsg_fops = {
879 .release = bsg_release,
880 .unlocked_ioctl = bsg_ioctl,
881 .owner = THIS_MODULE,
882 .llseek = default_llseek,
885 void bsg_unregister_queue(struct request_queue *q)
887 struct bsg_class_device *bcd = &q->bsg_dev;
892 mutex_lock(&bsg_mutex);
893 idr_remove(&bsg_minor_idr, bcd->minor);
895 sysfs_remove_link(&q->kobj, "bsg");
896 device_unregister(bcd->class_dev);
897 bcd->class_dev = NULL;
898 mutex_unlock(&bsg_mutex);
900 EXPORT_SYMBOL_GPL(bsg_unregister_queue);
902 int bsg_register_queue(struct request_queue *q, struct device *parent,
903 const char *name, const struct bsg_ops *ops)
905 struct bsg_class_device *bcd;
908 struct device *class_dev = NULL;
911 * we need a proper transport to send commands, not a stacked device
913 if (!queue_is_rq_based(q))
917 memset(bcd, 0, sizeof(*bcd));
919 mutex_lock(&bsg_mutex);
921 ret = idr_alloc(&bsg_minor_idr, bcd, 0, BSG_MAX_DEVS, GFP_KERNEL);
923 if (ret == -ENOSPC) {
924 printk(KERN_ERR "bsg: too many bsg devices\n");
933 dev = MKDEV(bsg_major, bcd->minor);
934 class_dev = device_create(bsg_class, parent, dev, NULL, "%s", name);
935 if (IS_ERR(class_dev)) {
936 ret = PTR_ERR(class_dev);
939 bcd->class_dev = class_dev;
942 ret = sysfs_create_link(&q->kobj, &bcd->class_dev->kobj, "bsg");
944 goto unregister_class_dev;
947 mutex_unlock(&bsg_mutex);
950 unregister_class_dev:
951 device_unregister(class_dev);
953 idr_remove(&bsg_minor_idr, bcd->minor);
955 mutex_unlock(&bsg_mutex);
959 int bsg_scsi_register_queue(struct request_queue *q, struct device *parent)
961 if (!blk_queue_scsi_passthrough(q)) {
962 WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
966 return bsg_register_queue(q, parent, dev_name(parent), &bsg_scsi_ops);
968 EXPORT_SYMBOL_GPL(bsg_scsi_register_queue);
970 static struct cdev bsg_cdev;
972 static char *bsg_devnode(struct device *dev, umode_t *mode)
974 return kasprintf(GFP_KERNEL, "bsg/%s", dev_name(dev));
977 static int __init bsg_init(void)
982 bsg_cmd_cachep = kmem_cache_create("bsg_cmd",
983 sizeof(struct bsg_command), 0, 0, NULL);
984 if (!bsg_cmd_cachep) {
985 printk(KERN_ERR "bsg: failed creating slab cache\n");
989 for (i = 0; i < BSG_LIST_ARRAY_SIZE; i++)
990 INIT_HLIST_HEAD(&bsg_device_list[i]);
992 bsg_class = class_create(THIS_MODULE, "bsg");
993 if (IS_ERR(bsg_class)) {
994 ret = PTR_ERR(bsg_class);
995 goto destroy_kmemcache;
997 bsg_class->devnode = bsg_devnode;
999 ret = alloc_chrdev_region(&devid, 0, BSG_MAX_DEVS, "bsg");
1001 goto destroy_bsg_class;
1003 bsg_major = MAJOR(devid);
1005 cdev_init(&bsg_cdev, &bsg_fops);
1006 ret = cdev_add(&bsg_cdev, MKDEV(bsg_major, 0), BSG_MAX_DEVS);
1008 goto unregister_chrdev;
1010 printk(KERN_INFO BSG_DESCRIPTION " version " BSG_VERSION
1011 " loaded (major %d)\n", bsg_major);
1014 unregister_chrdev_region(MKDEV(bsg_major, 0), BSG_MAX_DEVS);
1016 class_destroy(bsg_class);
1018 kmem_cache_destroy(bsg_cmd_cachep);
1022 MODULE_AUTHOR("Jens Axboe");
1023 MODULE_DESCRIPTION(BSG_DESCRIPTION);
1024 MODULE_LICENSE("GPL");
1026 device_initcall(bsg_init);