]> Git Repo - linux.git/blob - block/mq-deadline.c
block: Prevent deadlocks when switching elevators
[linux.git] / block / mq-deadline.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler,
4  *  for the blk-mq scheduling framework
5  *
6  *  Copyright (C) 2016 Jens Axboe <[email protected]>
7  */
8 #include <linux/kernel.h>
9 #include <linux/fs.h>
10 #include <linux/blkdev.h>
11 #include <linux/bio.h>
12 #include <linux/module.h>
13 #include <linux/slab.h>
14 #include <linux/init.h>
15 #include <linux/compiler.h>
16 #include <linux/rbtree.h>
17 #include <linux/sbitmap.h>
18
19 #include <trace/events/block.h>
20
21 #include "elevator.h"
22 #include "blk.h"
23 #include "blk-mq.h"
24 #include "blk-mq-debugfs.h"
25 #include "blk-mq-sched.h"
26
27 /*
28  * See Documentation/block/deadline-iosched.rst
29  */
30 static const int read_expire = HZ / 2;  /* max time before a read is submitted. */
31 static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
32 /*
33  * Time after which to dispatch lower priority requests even if higher
34  * priority requests are pending.
35  */
36 static const int prio_aging_expire = 10 * HZ;
37 static const int writes_starved = 2;    /* max times reads can starve a write */
38 static const int fifo_batch = 16;       /* # of sequential requests treated as one
39                                      by the above parameters. For throughput. */
40
41 enum dd_data_dir {
42         DD_READ         = READ,
43         DD_WRITE        = WRITE,
44 };
45
46 enum { DD_DIR_COUNT = 2 };
47
48 enum dd_prio {
49         DD_RT_PRIO      = 0,
50         DD_BE_PRIO      = 1,
51         DD_IDLE_PRIO    = 2,
52         DD_PRIO_MAX     = 2,
53 };
54
55 enum { DD_PRIO_COUNT = 3 };
56
57 /*
58  * I/O statistics per I/O priority. It is fine if these counters overflow.
59  * What matters is that these counters are at least as wide as
60  * log2(max_outstanding_requests).
61  */
62 struct io_stats_per_prio {
63         uint32_t inserted;
64         uint32_t merged;
65         uint32_t dispatched;
66         atomic_t completed;
67 };
68
69 /*
70  * Deadline scheduler data per I/O priority (enum dd_prio). Requests are
71  * present on both sort_list[] and fifo_list[].
72  */
73 struct dd_per_prio {
74         struct list_head dispatch;
75         struct rb_root sort_list[DD_DIR_COUNT];
76         struct list_head fifo_list[DD_DIR_COUNT];
77         /* Position of the most recently dispatched request. */
78         sector_t latest_pos[DD_DIR_COUNT];
79         struct io_stats_per_prio stats;
80 };
81
82 struct deadline_data {
83         /*
84          * run time data
85          */
86
87         struct dd_per_prio per_prio[DD_PRIO_COUNT];
88
89         /* Data direction of latest dispatched request. */
90         enum dd_data_dir last_dir;
91         unsigned int batching;          /* number of sequential requests made */
92         unsigned int starved;           /* times reads have starved writes */
93
94         /*
95          * settings that change how the i/o scheduler behaves
96          */
97         int fifo_expire[DD_DIR_COUNT];
98         int fifo_batch;
99         int writes_starved;
100         int front_merges;
101         u32 async_depth;
102         int prio_aging_expire;
103
104         spinlock_t lock;
105 };
106
107 /* Maps an I/O priority class to a deadline scheduler priority. */
108 static const enum dd_prio ioprio_class_to_prio[] = {
109         [IOPRIO_CLASS_NONE]     = DD_BE_PRIO,
110         [IOPRIO_CLASS_RT]       = DD_RT_PRIO,
111         [IOPRIO_CLASS_BE]       = DD_BE_PRIO,
112         [IOPRIO_CLASS_IDLE]     = DD_IDLE_PRIO,
113 };
114
115 static inline struct rb_root *
116 deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq)
117 {
118         return &per_prio->sort_list[rq_data_dir(rq)];
119 }
120
121 /*
122  * Returns the I/O priority class (IOPRIO_CLASS_*) that has been assigned to a
123  * request.
124  */
125 static u8 dd_rq_ioclass(struct request *rq)
126 {
127         return IOPRIO_PRIO_CLASS(req_get_ioprio(rq));
128 }
129
130 /*
131  * Return the first request for which blk_rq_pos() >= @pos.
132  */
133 static inline struct request *deadline_from_pos(struct dd_per_prio *per_prio,
134                                 enum dd_data_dir data_dir, sector_t pos)
135 {
136         struct rb_node *node = per_prio->sort_list[data_dir].rb_node;
137         struct request *rq, *res = NULL;
138
139         if (!node)
140                 return NULL;
141
142         rq = rb_entry_rq(node);
143         while (node) {
144                 rq = rb_entry_rq(node);
145                 if (blk_rq_pos(rq) >= pos) {
146                         res = rq;
147                         node = node->rb_left;
148                 } else {
149                         node = node->rb_right;
150                 }
151         }
152         return res;
153 }
154
155 static void
156 deadline_add_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
157 {
158         struct rb_root *root = deadline_rb_root(per_prio, rq);
159
160         elv_rb_add(root, rq);
161 }
162
163 static inline void
164 deadline_del_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
165 {
166         elv_rb_del(deadline_rb_root(per_prio, rq), rq);
167 }
168
169 /*
170  * remove rq from rbtree and fifo.
171  */
172 static void deadline_remove_request(struct request_queue *q,
173                                     struct dd_per_prio *per_prio,
174                                     struct request *rq)
175 {
176         list_del_init(&rq->queuelist);
177
178         /*
179          * We might not be on the rbtree, if we are doing an insert merge
180          */
181         if (!RB_EMPTY_NODE(&rq->rb_node))
182                 deadline_del_rq_rb(per_prio, rq);
183
184         elv_rqhash_del(q, rq);
185         if (q->last_merge == rq)
186                 q->last_merge = NULL;
187 }
188
189 static void dd_request_merged(struct request_queue *q, struct request *req,
190                               enum elv_merge type)
191 {
192         struct deadline_data *dd = q->elevator->elevator_data;
193         const u8 ioprio_class = dd_rq_ioclass(req);
194         const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
195         struct dd_per_prio *per_prio = &dd->per_prio[prio];
196
197         /*
198          * if the merge was a front merge, we need to reposition request
199          */
200         if (type == ELEVATOR_FRONT_MERGE) {
201                 elv_rb_del(deadline_rb_root(per_prio, req), req);
202                 deadline_add_rq_rb(per_prio, req);
203         }
204 }
205
206 /*
207  * Callback function that is invoked after @next has been merged into @req.
208  */
209 static void dd_merged_requests(struct request_queue *q, struct request *req,
210                                struct request *next)
211 {
212         struct deadline_data *dd = q->elevator->elevator_data;
213         const u8 ioprio_class = dd_rq_ioclass(next);
214         const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
215
216         lockdep_assert_held(&dd->lock);
217
218         dd->per_prio[prio].stats.merged++;
219
220         /*
221          * if next expires before rq, assign its expire time to rq
222          * and move into next position (next will be deleted) in fifo
223          */
224         if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
225                 if (time_before((unsigned long)next->fifo_time,
226                                 (unsigned long)req->fifo_time)) {
227                         list_move(&req->queuelist, &next->queuelist);
228                         req->fifo_time = next->fifo_time;
229                 }
230         }
231
232         /*
233          * kill knowledge of next, this one is a goner
234          */
235         deadline_remove_request(q, &dd->per_prio[prio], next);
236 }
237
238 /*
239  * move an entry to dispatch queue
240  */
241 static void
242 deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
243                       struct request *rq)
244 {
245         /*
246          * take it off the sort and fifo list
247          */
248         deadline_remove_request(rq->q, per_prio, rq);
249 }
250
251 /* Number of requests queued for a given priority level. */
252 static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
253 {
254         const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
255
256         lockdep_assert_held(&dd->lock);
257
258         return stats->inserted - atomic_read(&stats->completed);
259 }
260
261 /*
262  * deadline_check_fifo returns true if and only if there are expired requests
263  * in the FIFO list. Requires !list_empty(&dd->fifo_list[data_dir]).
264  */
265 static inline bool deadline_check_fifo(struct dd_per_prio *per_prio,
266                                        enum dd_data_dir data_dir)
267 {
268         struct request *rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
269
270         return time_is_before_eq_jiffies((unsigned long)rq->fifo_time);
271 }
272
273 /*
274  * For the specified data direction, return the next request to
275  * dispatch using arrival ordered lists.
276  */
277 static struct request *
278 deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
279                       enum dd_data_dir data_dir)
280 {
281         if (list_empty(&per_prio->fifo_list[data_dir]))
282                 return NULL;
283
284         return rq_entry_fifo(per_prio->fifo_list[data_dir].next);
285 }
286
287 /*
288  * For the specified data direction, return the next request to
289  * dispatch using sector position sorted lists.
290  */
291 static struct request *
292 deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
293                       enum dd_data_dir data_dir)
294 {
295         return deadline_from_pos(per_prio, data_dir,
296                                  per_prio->latest_pos[data_dir]);
297 }
298
299 /*
300  * Returns true if and only if @rq started after @latest_start where
301  * @latest_start is in jiffies.
302  */
303 static bool started_after(struct deadline_data *dd, struct request *rq,
304                           unsigned long latest_start)
305 {
306         unsigned long start_time = (unsigned long)rq->fifo_time;
307
308         start_time -= dd->fifo_expire[rq_data_dir(rq)];
309
310         return time_after(start_time, latest_start);
311 }
312
313 /*
314  * deadline_dispatch_requests selects the best request according to
315  * read/write expire, fifo_batch, etc and with a start time <= @latest_start.
316  */
317 static struct request *__dd_dispatch_request(struct deadline_data *dd,
318                                              struct dd_per_prio *per_prio,
319                                              unsigned long latest_start)
320 {
321         struct request *rq, *next_rq;
322         enum dd_data_dir data_dir;
323         enum dd_prio prio;
324         u8 ioprio_class;
325
326         lockdep_assert_held(&dd->lock);
327
328         if (!list_empty(&per_prio->dispatch)) {
329                 rq = list_first_entry(&per_prio->dispatch, struct request,
330                                       queuelist);
331                 if (started_after(dd, rq, latest_start))
332                         return NULL;
333                 list_del_init(&rq->queuelist);
334                 data_dir = rq_data_dir(rq);
335                 goto done;
336         }
337
338         /*
339          * batches are currently reads XOR writes
340          */
341         rq = deadline_next_request(dd, per_prio, dd->last_dir);
342         if (rq && dd->batching < dd->fifo_batch) {
343                 /* we have a next request and are still entitled to batch */
344                 data_dir = rq_data_dir(rq);
345                 goto dispatch_request;
346         }
347
348         /*
349          * at this point we are not running a batch. select the appropriate
350          * data direction (read / write)
351          */
352
353         if (!list_empty(&per_prio->fifo_list[DD_READ])) {
354                 BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_READ]));
355
356                 if (deadline_fifo_request(dd, per_prio, DD_WRITE) &&
357                     (dd->starved++ >= dd->writes_starved))
358                         goto dispatch_writes;
359
360                 data_dir = DD_READ;
361
362                 goto dispatch_find_request;
363         }
364
365         /*
366          * there are either no reads or writes have been starved
367          */
368
369         if (!list_empty(&per_prio->fifo_list[DD_WRITE])) {
370 dispatch_writes:
371                 BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_WRITE]));
372
373                 dd->starved = 0;
374
375                 data_dir = DD_WRITE;
376
377                 goto dispatch_find_request;
378         }
379
380         return NULL;
381
382 dispatch_find_request:
383         /*
384          * we are not running a batch, find best request for selected data_dir
385          */
386         next_rq = deadline_next_request(dd, per_prio, data_dir);
387         if (deadline_check_fifo(per_prio, data_dir) || !next_rq) {
388                 /*
389                  * A deadline has expired, the last request was in the other
390                  * direction, or we have run out of higher-sectored requests.
391                  * Start again from the request with the earliest expiry time.
392                  */
393                 rq = deadline_fifo_request(dd, per_prio, data_dir);
394         } else {
395                 /*
396                  * The last req was the same dir and we have a next request in
397                  * sort order. No expired requests so continue on from here.
398                  */
399                 rq = next_rq;
400         }
401
402         if (!rq)
403                 return NULL;
404
405         dd->last_dir = data_dir;
406         dd->batching = 0;
407
408 dispatch_request:
409         if (started_after(dd, rq, latest_start))
410                 return NULL;
411
412         /*
413          * rq is the selected appropriate request.
414          */
415         dd->batching++;
416         deadline_move_request(dd, per_prio, rq);
417 done:
418         ioprio_class = dd_rq_ioclass(rq);
419         prio = ioprio_class_to_prio[ioprio_class];
420         dd->per_prio[prio].latest_pos[data_dir] = blk_rq_pos(rq);
421         dd->per_prio[prio].stats.dispatched++;
422         rq->rq_flags |= RQF_STARTED;
423         return rq;
424 }
425
426 /*
427  * Check whether there are any requests with priority other than DD_RT_PRIO
428  * that were inserted more than prio_aging_expire jiffies ago.
429  */
430 static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd,
431                                                       unsigned long now)
432 {
433         struct request *rq;
434         enum dd_prio prio;
435         int prio_cnt;
436
437         lockdep_assert_held(&dd->lock);
438
439         prio_cnt = !!dd_queued(dd, DD_RT_PRIO) + !!dd_queued(dd, DD_BE_PRIO) +
440                    !!dd_queued(dd, DD_IDLE_PRIO);
441         if (prio_cnt < 2)
442                 return NULL;
443
444         for (prio = DD_BE_PRIO; prio <= DD_PRIO_MAX; prio++) {
445                 rq = __dd_dispatch_request(dd, &dd->per_prio[prio],
446                                            now - dd->prio_aging_expire);
447                 if (rq)
448                         return rq;
449         }
450
451         return NULL;
452 }
453
454 /*
455  * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests().
456  *
457  * One confusing aspect here is that we get called for a specific
458  * hardware queue, but we may return a request that is for a
459  * different hardware queue. This is because mq-deadline has shared
460  * state for all hardware queues, in terms of sorting, FIFOs, etc.
461  */
462 static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
463 {
464         struct deadline_data *dd = hctx->queue->elevator->elevator_data;
465         const unsigned long now = jiffies;
466         struct request *rq;
467         enum dd_prio prio;
468
469         spin_lock(&dd->lock);
470         rq = dd_dispatch_prio_aged_requests(dd, now);
471         if (rq)
472                 goto unlock;
473
474         /*
475          * Next, dispatch requests in priority order. Ignore lower priority
476          * requests if any higher priority requests are pending.
477          */
478         for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
479                 rq = __dd_dispatch_request(dd, &dd->per_prio[prio], now);
480                 if (rq || dd_queued(dd, prio))
481                         break;
482         }
483
484 unlock:
485         spin_unlock(&dd->lock);
486
487         return rq;
488 }
489
490 /*
491  * 'depth' is a number in the range 1..INT_MAX representing a number of
492  * requests. Scale it with a factor (1 << bt->sb.shift) / q->nr_requests since
493  * 1..(1 << bt->sb.shift) is the range expected by sbitmap_get_shallow().
494  * Values larger than q->nr_requests have the same effect as q->nr_requests.
495  */
496 static int dd_to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth)
497 {
498         struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags;
499         const unsigned int nrr = hctx->queue->nr_requests;
500
501         return ((qdepth << bt->sb.shift) + nrr - 1) / nrr;
502 }
503
504 /*
505  * Called by __blk_mq_alloc_request(). The shallow_depth value set by this
506  * function is used by __blk_mq_get_tag().
507  */
508 static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
509 {
510         struct deadline_data *dd = data->q->elevator->elevator_data;
511
512         /* Do not throttle synchronous reads. */
513         if (op_is_sync(opf) && !op_is_write(opf))
514                 return;
515
516         /*
517          * Throttle asynchronous requests and writes such that these requests
518          * do not block the allocation of synchronous requests.
519          */
520         data->shallow_depth = dd_to_word_depth(data->hctx, dd->async_depth);
521 }
522
523 /* Called by blk_mq_update_nr_requests(). */
524 static void dd_depth_updated(struct blk_mq_hw_ctx *hctx)
525 {
526         struct request_queue *q = hctx->queue;
527         struct deadline_data *dd = q->elevator->elevator_data;
528         struct blk_mq_tags *tags = hctx->sched_tags;
529
530         dd->async_depth = q->nr_requests;
531
532         sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1);
533 }
534
535 /* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */
536 static int dd_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
537 {
538         dd_depth_updated(hctx);
539         return 0;
540 }
541
542 static void dd_exit_sched(struct elevator_queue *e)
543 {
544         struct deadline_data *dd = e->elevator_data;
545         enum dd_prio prio;
546
547         for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
548                 struct dd_per_prio *per_prio = &dd->per_prio[prio];
549                 const struct io_stats_per_prio *stats = &per_prio->stats;
550                 uint32_t queued;
551
552                 WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_READ]));
553                 WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_WRITE]));
554
555                 spin_lock(&dd->lock);
556                 queued = dd_queued(dd, prio);
557                 spin_unlock(&dd->lock);
558
559                 WARN_ONCE(queued != 0,
560                           "statistics for priority %d: i %u m %u d %u c %u\n",
561                           prio, stats->inserted, stats->merged,
562                           stats->dispatched, atomic_read(&stats->completed));
563         }
564
565         kfree(dd);
566 }
567
568 /*
569  * initialize elevator private data (deadline_data).
570  */
571 static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
572 {
573         struct deadline_data *dd;
574         struct elevator_queue *eq;
575         enum dd_prio prio;
576         int ret = -ENOMEM;
577
578         eq = elevator_alloc(q, e);
579         if (!eq)
580                 return ret;
581
582         dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
583         if (!dd)
584                 goto put_eq;
585
586         eq->elevator_data = dd;
587
588         for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
589                 struct dd_per_prio *per_prio = &dd->per_prio[prio];
590
591                 INIT_LIST_HEAD(&per_prio->dispatch);
592                 INIT_LIST_HEAD(&per_prio->fifo_list[DD_READ]);
593                 INIT_LIST_HEAD(&per_prio->fifo_list[DD_WRITE]);
594                 per_prio->sort_list[DD_READ] = RB_ROOT;
595                 per_prio->sort_list[DD_WRITE] = RB_ROOT;
596         }
597         dd->fifo_expire[DD_READ] = read_expire;
598         dd->fifo_expire[DD_WRITE] = write_expire;
599         dd->writes_starved = writes_starved;
600         dd->front_merges = 1;
601         dd->last_dir = DD_WRITE;
602         dd->fifo_batch = fifo_batch;
603         dd->prio_aging_expire = prio_aging_expire;
604         spin_lock_init(&dd->lock);
605
606         /* We dispatch from request queue wide instead of hw queue */
607         blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
608
609         q->elevator = eq;
610         return 0;
611
612 put_eq:
613         kobject_put(&eq->kobj);
614         return ret;
615 }
616
617 /*
618  * Try to merge @bio into an existing request. If @bio has been merged into
619  * an existing request, store the pointer to that request into *@rq.
620  */
621 static int dd_request_merge(struct request_queue *q, struct request **rq,
622                             struct bio *bio)
623 {
624         struct deadline_data *dd = q->elevator->elevator_data;
625         const u8 ioprio_class = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
626         const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
627         struct dd_per_prio *per_prio = &dd->per_prio[prio];
628         sector_t sector = bio_end_sector(bio);
629         struct request *__rq;
630
631         if (!dd->front_merges)
632                 return ELEVATOR_NO_MERGE;
633
634         __rq = elv_rb_find(&per_prio->sort_list[bio_data_dir(bio)], sector);
635         if (__rq) {
636                 BUG_ON(sector != blk_rq_pos(__rq));
637
638                 if (elv_bio_merge_ok(__rq, bio)) {
639                         *rq = __rq;
640                         if (blk_discard_mergable(__rq))
641                                 return ELEVATOR_DISCARD_MERGE;
642                         return ELEVATOR_FRONT_MERGE;
643                 }
644         }
645
646         return ELEVATOR_NO_MERGE;
647 }
648
649 /*
650  * Attempt to merge a bio into an existing request. This function is called
651  * before @bio is associated with a request.
652  */
653 static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
654                 unsigned int nr_segs)
655 {
656         struct deadline_data *dd = q->elevator->elevator_data;
657         struct request *free = NULL;
658         bool ret;
659
660         spin_lock(&dd->lock);
661         ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
662         spin_unlock(&dd->lock);
663
664         if (free)
665                 blk_mq_free_request(free);
666
667         return ret;
668 }
669
670 /*
671  * add rq to rbtree and fifo
672  */
673 static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
674                               blk_insert_t flags, struct list_head *free)
675 {
676         struct request_queue *q = hctx->queue;
677         struct deadline_data *dd = q->elevator->elevator_data;
678         const enum dd_data_dir data_dir = rq_data_dir(rq);
679         u16 ioprio = req_get_ioprio(rq);
680         u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
681         struct dd_per_prio *per_prio;
682         enum dd_prio prio;
683
684         lockdep_assert_held(&dd->lock);
685
686         prio = ioprio_class_to_prio[ioprio_class];
687         per_prio = &dd->per_prio[prio];
688         if (!rq->elv.priv[0]) {
689                 per_prio->stats.inserted++;
690                 rq->elv.priv[0] = (void *)(uintptr_t)1;
691         }
692
693         if (blk_mq_sched_try_insert_merge(q, rq, free))
694                 return;
695
696         trace_block_rq_insert(rq);
697
698         if (flags & BLK_MQ_INSERT_AT_HEAD) {
699                 list_add(&rq->queuelist, &per_prio->dispatch);
700                 rq->fifo_time = jiffies;
701         } else {
702                 struct list_head *insert_before;
703
704                 deadline_add_rq_rb(per_prio, rq);
705
706                 if (rq_mergeable(rq)) {
707                         elv_rqhash_add(q, rq);
708                         if (!q->last_merge)
709                                 q->last_merge = rq;
710                 }
711
712                 /*
713                  * set expire time and add to fifo list
714                  */
715                 rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
716                 insert_before = &per_prio->fifo_list[data_dir];
717                 list_add_tail(&rq->queuelist, insert_before);
718         }
719 }
720
721 /*
722  * Called from blk_mq_insert_request() or blk_mq_dispatch_plug_list().
723  */
724 static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
725                                struct list_head *list,
726                                blk_insert_t flags)
727 {
728         struct request_queue *q = hctx->queue;
729         struct deadline_data *dd = q->elevator->elevator_data;
730         LIST_HEAD(free);
731
732         spin_lock(&dd->lock);
733         while (!list_empty(list)) {
734                 struct request *rq;
735
736                 rq = list_first_entry(list, struct request, queuelist);
737                 list_del_init(&rq->queuelist);
738                 dd_insert_request(hctx, rq, flags, &free);
739         }
740         spin_unlock(&dd->lock);
741
742         blk_mq_free_requests(&free);
743 }
744
745 /* Callback from inside blk_mq_rq_ctx_init(). */
746 static void dd_prepare_request(struct request *rq)
747 {
748         rq->elv.priv[0] = NULL;
749 }
750
751 /*
752  * Callback from inside blk_mq_free_request().
753  */
754 static void dd_finish_request(struct request *rq)
755 {
756         struct request_queue *q = rq->q;
757         struct deadline_data *dd = q->elevator->elevator_data;
758         const u8 ioprio_class = dd_rq_ioclass(rq);
759         const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
760         struct dd_per_prio *per_prio = &dd->per_prio[prio];
761
762         /*
763          * The block layer core may call dd_finish_request() without having
764          * called dd_insert_requests(). Skip requests that bypassed I/O
765          * scheduling. See also blk_mq_request_bypass_insert().
766          */
767         if (rq->elv.priv[0])
768                 atomic_inc(&per_prio->stats.completed);
769 }
770
771 static bool dd_has_work_for_prio(struct dd_per_prio *per_prio)
772 {
773         return !list_empty_careful(&per_prio->dispatch) ||
774                 !list_empty_careful(&per_prio->fifo_list[DD_READ]) ||
775                 !list_empty_careful(&per_prio->fifo_list[DD_WRITE]);
776 }
777
778 static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
779 {
780         struct deadline_data *dd = hctx->queue->elevator->elevator_data;
781         enum dd_prio prio;
782
783         for (prio = 0; prio <= DD_PRIO_MAX; prio++)
784                 if (dd_has_work_for_prio(&dd->per_prio[prio]))
785                         return true;
786
787         return false;
788 }
789
790 /*
791  * sysfs parts below
792  */
793 #define SHOW_INT(__FUNC, __VAR)                                         \
794 static ssize_t __FUNC(struct elevator_queue *e, char *page)             \
795 {                                                                       \
796         struct deadline_data *dd = e->elevator_data;                    \
797                                                                         \
798         return sysfs_emit(page, "%d\n", __VAR);                         \
799 }
800 #define SHOW_JIFFIES(__FUNC, __VAR) SHOW_INT(__FUNC, jiffies_to_msecs(__VAR))
801 SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]);
802 SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
803 SHOW_JIFFIES(deadline_prio_aging_expire_show, dd->prio_aging_expire);
804 SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
805 SHOW_INT(deadline_front_merges_show, dd->front_merges);
806 SHOW_INT(deadline_async_depth_show, dd->async_depth);
807 SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch);
808 #undef SHOW_INT
809 #undef SHOW_JIFFIES
810
811 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)                 \
812 static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
813 {                                                                       \
814         struct deadline_data *dd = e->elevator_data;                    \
815         int __data, __ret;                                              \
816                                                                         \
817         __ret = kstrtoint(page, 0, &__data);                            \
818         if (__ret < 0)                                                  \
819                 return __ret;                                           \
820         if (__data < (MIN))                                             \
821                 __data = (MIN);                                         \
822         else if (__data > (MAX))                                        \
823                 __data = (MAX);                                         \
824         *(__PTR) = __CONV(__data);                                      \
825         return count;                                                   \
826 }
827 #define STORE_INT(__FUNC, __PTR, MIN, MAX)                              \
828         STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, )
829 #define STORE_JIFFIES(__FUNC, __PTR, MIN, MAX)                          \
830         STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, msecs_to_jiffies)
831 STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX);
832 STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX);
833 STORE_JIFFIES(deadline_prio_aging_expire_store, &dd->prio_aging_expire, 0, INT_MAX);
834 STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
835 STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
836 STORE_INT(deadline_async_depth_store, &dd->async_depth, 1, INT_MAX);
837 STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX);
838 #undef STORE_FUNCTION
839 #undef STORE_INT
840 #undef STORE_JIFFIES
841
842 #define DD_ATTR(name) \
843         __ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
844
845 static struct elv_fs_entry deadline_attrs[] = {
846         DD_ATTR(read_expire),
847         DD_ATTR(write_expire),
848         DD_ATTR(writes_starved),
849         DD_ATTR(front_merges),
850         DD_ATTR(async_depth),
851         DD_ATTR(fifo_batch),
852         DD_ATTR(prio_aging_expire),
853         __ATTR_NULL
854 };
855
856 #ifdef CONFIG_BLK_DEBUG_FS
857 #define DEADLINE_DEBUGFS_DDIR_ATTRS(prio, data_dir, name)               \
858 static void *deadline_##name##_fifo_start(struct seq_file *m,           \
859                                           loff_t *pos)                  \
860         __acquires(&dd->lock)                                           \
861 {                                                                       \
862         struct request_queue *q = m->private;                           \
863         struct deadline_data *dd = q->elevator->elevator_data;          \
864         struct dd_per_prio *per_prio = &dd->per_prio[prio];             \
865                                                                         \
866         spin_lock(&dd->lock);                                           \
867         return seq_list_start(&per_prio->fifo_list[data_dir], *pos);    \
868 }                                                                       \
869                                                                         \
870 static void *deadline_##name##_fifo_next(struct seq_file *m, void *v,   \
871                                          loff_t *pos)                   \
872 {                                                                       \
873         struct request_queue *q = m->private;                           \
874         struct deadline_data *dd = q->elevator->elevator_data;          \
875         struct dd_per_prio *per_prio = &dd->per_prio[prio];             \
876                                                                         \
877         return seq_list_next(v, &per_prio->fifo_list[data_dir], pos);   \
878 }                                                                       \
879                                                                         \
880 static void deadline_##name##_fifo_stop(struct seq_file *m, void *v)    \
881         __releases(&dd->lock)                                           \
882 {                                                                       \
883         struct request_queue *q = m->private;                           \
884         struct deadline_data *dd = q->elevator->elevator_data;          \
885                                                                         \
886         spin_unlock(&dd->lock);                                         \
887 }                                                                       \
888                                                                         \
889 static const struct seq_operations deadline_##name##_fifo_seq_ops = {   \
890         .start  = deadline_##name##_fifo_start,                         \
891         .next   = deadline_##name##_fifo_next,                          \
892         .stop   = deadline_##name##_fifo_stop,                          \
893         .show   = blk_mq_debugfs_rq_show,                               \
894 };                                                                      \
895                                                                         \
896 static int deadline_##name##_next_rq_show(void *data,                   \
897                                           struct seq_file *m)           \
898 {                                                                       \
899         struct request_queue *q = data;                                 \
900         struct deadline_data *dd = q->elevator->elevator_data;          \
901         struct dd_per_prio *per_prio = &dd->per_prio[prio];             \
902         struct request *rq;                                             \
903                                                                         \
904         rq = deadline_from_pos(per_prio, data_dir,                      \
905                                per_prio->latest_pos[data_dir]);         \
906         if (rq)                                                         \
907                 __blk_mq_debugfs_rq_show(m, rq);                        \
908         return 0;                                                       \
909 }
910
911 DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_READ, read0);
912 DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_WRITE, write0);
913 DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_READ, read1);
914 DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_WRITE, write1);
915 DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_READ, read2);
916 DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_WRITE, write2);
917 #undef DEADLINE_DEBUGFS_DDIR_ATTRS
918
919 static int deadline_batching_show(void *data, struct seq_file *m)
920 {
921         struct request_queue *q = data;
922         struct deadline_data *dd = q->elevator->elevator_data;
923
924         seq_printf(m, "%u\n", dd->batching);
925         return 0;
926 }
927
928 static int deadline_starved_show(void *data, struct seq_file *m)
929 {
930         struct request_queue *q = data;
931         struct deadline_data *dd = q->elevator->elevator_data;
932
933         seq_printf(m, "%u\n", dd->starved);
934         return 0;
935 }
936
937 static int dd_async_depth_show(void *data, struct seq_file *m)
938 {
939         struct request_queue *q = data;
940         struct deadline_data *dd = q->elevator->elevator_data;
941
942         seq_printf(m, "%u\n", dd->async_depth);
943         return 0;
944 }
945
946 static int dd_queued_show(void *data, struct seq_file *m)
947 {
948         struct request_queue *q = data;
949         struct deadline_data *dd = q->elevator->elevator_data;
950         u32 rt, be, idle;
951
952         spin_lock(&dd->lock);
953         rt = dd_queued(dd, DD_RT_PRIO);
954         be = dd_queued(dd, DD_BE_PRIO);
955         idle = dd_queued(dd, DD_IDLE_PRIO);
956         spin_unlock(&dd->lock);
957
958         seq_printf(m, "%u %u %u\n", rt, be, idle);
959
960         return 0;
961 }
962
963 /* Number of requests owned by the block driver for a given priority. */
964 static u32 dd_owned_by_driver(struct deadline_data *dd, enum dd_prio prio)
965 {
966         const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
967
968         lockdep_assert_held(&dd->lock);
969
970         return stats->dispatched + stats->merged -
971                 atomic_read(&stats->completed);
972 }
973
974 static int dd_owned_by_driver_show(void *data, struct seq_file *m)
975 {
976         struct request_queue *q = data;
977         struct deadline_data *dd = q->elevator->elevator_data;
978         u32 rt, be, idle;
979
980         spin_lock(&dd->lock);
981         rt = dd_owned_by_driver(dd, DD_RT_PRIO);
982         be = dd_owned_by_driver(dd, DD_BE_PRIO);
983         idle = dd_owned_by_driver(dd, DD_IDLE_PRIO);
984         spin_unlock(&dd->lock);
985
986         seq_printf(m, "%u %u %u\n", rt, be, idle);
987
988         return 0;
989 }
990
991 #define DEADLINE_DISPATCH_ATTR(prio)                                    \
992 static void *deadline_dispatch##prio##_start(struct seq_file *m,        \
993                                              loff_t *pos)               \
994         __acquires(&dd->lock)                                           \
995 {                                                                       \
996         struct request_queue *q = m->private;                           \
997         struct deadline_data *dd = q->elevator->elevator_data;          \
998         struct dd_per_prio *per_prio = &dd->per_prio[prio];             \
999                                                                         \
1000         spin_lock(&dd->lock);                                           \
1001         return seq_list_start(&per_prio->dispatch, *pos);               \
1002 }                                                                       \
1003                                                                         \
1004 static void *deadline_dispatch##prio##_next(struct seq_file *m,         \
1005                                             void *v, loff_t *pos)       \
1006 {                                                                       \
1007         struct request_queue *q = m->private;                           \
1008         struct deadline_data *dd = q->elevator->elevator_data;          \
1009         struct dd_per_prio *per_prio = &dd->per_prio[prio];             \
1010                                                                         \
1011         return seq_list_next(v, &per_prio->dispatch, pos);              \
1012 }                                                                       \
1013                                                                         \
1014 static void deadline_dispatch##prio##_stop(struct seq_file *m, void *v) \
1015         __releases(&dd->lock)                                           \
1016 {                                                                       \
1017         struct request_queue *q = m->private;                           \
1018         struct deadline_data *dd = q->elevator->elevator_data;          \
1019                                                                         \
1020         spin_unlock(&dd->lock);                                         \
1021 }                                                                       \
1022                                                                         \
1023 static const struct seq_operations deadline_dispatch##prio##_seq_ops = { \
1024         .start  = deadline_dispatch##prio##_start,                      \
1025         .next   = deadline_dispatch##prio##_next,                       \
1026         .stop   = deadline_dispatch##prio##_stop,                       \
1027         .show   = blk_mq_debugfs_rq_show,                               \
1028 }
1029
1030 DEADLINE_DISPATCH_ATTR(0);
1031 DEADLINE_DISPATCH_ATTR(1);
1032 DEADLINE_DISPATCH_ATTR(2);
1033 #undef DEADLINE_DISPATCH_ATTR
1034
1035 #define DEADLINE_QUEUE_DDIR_ATTRS(name)                                 \
1036         {#name "_fifo_list", 0400,                                      \
1037                         .seq_ops = &deadline_##name##_fifo_seq_ops}
1038 #define DEADLINE_NEXT_RQ_ATTR(name)                                     \
1039         {#name "_next_rq", 0400, deadline_##name##_next_rq_show}
1040 static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = {
1041         DEADLINE_QUEUE_DDIR_ATTRS(read0),
1042         DEADLINE_QUEUE_DDIR_ATTRS(write0),
1043         DEADLINE_QUEUE_DDIR_ATTRS(read1),
1044         DEADLINE_QUEUE_DDIR_ATTRS(write1),
1045         DEADLINE_QUEUE_DDIR_ATTRS(read2),
1046         DEADLINE_QUEUE_DDIR_ATTRS(write2),
1047         DEADLINE_NEXT_RQ_ATTR(read0),
1048         DEADLINE_NEXT_RQ_ATTR(write0),
1049         DEADLINE_NEXT_RQ_ATTR(read1),
1050         DEADLINE_NEXT_RQ_ATTR(write1),
1051         DEADLINE_NEXT_RQ_ATTR(read2),
1052         DEADLINE_NEXT_RQ_ATTR(write2),
1053         {"batching", 0400, deadline_batching_show},
1054         {"starved", 0400, deadline_starved_show},
1055         {"async_depth", 0400, dd_async_depth_show},
1056         {"dispatch0", 0400, .seq_ops = &deadline_dispatch0_seq_ops},
1057         {"dispatch1", 0400, .seq_ops = &deadline_dispatch1_seq_ops},
1058         {"dispatch2", 0400, .seq_ops = &deadline_dispatch2_seq_ops},
1059         {"owned_by_driver", 0400, dd_owned_by_driver_show},
1060         {"queued", 0400, dd_queued_show},
1061         {},
1062 };
1063 #undef DEADLINE_QUEUE_DDIR_ATTRS
1064 #endif
1065
1066 static struct elevator_type mq_deadline = {
1067         .ops = {
1068                 .depth_updated          = dd_depth_updated,
1069                 .limit_depth            = dd_limit_depth,
1070                 .insert_requests        = dd_insert_requests,
1071                 .dispatch_request       = dd_dispatch_request,
1072                 .prepare_request        = dd_prepare_request,
1073                 .finish_request         = dd_finish_request,
1074                 .next_request           = elv_rb_latter_request,
1075                 .former_request         = elv_rb_former_request,
1076                 .bio_merge              = dd_bio_merge,
1077                 .request_merge          = dd_request_merge,
1078                 .requests_merged        = dd_merged_requests,
1079                 .request_merged         = dd_request_merged,
1080                 .has_work               = dd_has_work,
1081                 .init_sched             = dd_init_sched,
1082                 .exit_sched             = dd_exit_sched,
1083                 .init_hctx              = dd_init_hctx,
1084         },
1085
1086 #ifdef CONFIG_BLK_DEBUG_FS
1087         .queue_debugfs_attrs = deadline_queue_debugfs_attrs,
1088 #endif
1089         .elevator_attrs = deadline_attrs,
1090         .elevator_name = "mq-deadline",
1091         .elevator_alias = "deadline",
1092         .elevator_owner = THIS_MODULE,
1093 };
1094 MODULE_ALIAS("mq-deadline-iosched");
1095
1096 static int __init deadline_init(void)
1097 {
1098         return elv_register(&mq_deadline);
1099 }
1100
1101 static void __exit deadline_exit(void)
1102 {
1103         elv_unregister(&mq_deadline);
1104 }
1105
1106 module_init(deadline_init);
1107 module_exit(deadline_exit);
1108
1109 MODULE_AUTHOR("Jens Axboe, Damien Le Moal and Bart Van Assche");
1110 MODULE_LICENSE("GPL");
1111 MODULE_DESCRIPTION("MQ deadline IO scheduler");
This page took 0.097389 seconds and 4 git commands to generate.