]> Git Repo - linux.git/blob - drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
Merge tag 'clang-format-6.8' of https://github.com/ojeda/linux
[linux.git] / drivers / gpu / drm / i915 / gt / intel_breadcrumbs.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2015-2021 Intel Corporation
4  */
5
6 #include <linux/kthread.h>
7 #include <linux/string_helpers.h>
8 #include <trace/events/dma_fence.h>
9 #include <uapi/linux/sched/types.h>
10
11 #include "i915_drv.h"
12 #include "i915_trace.h"
13 #include "intel_breadcrumbs.h"
14 #include "intel_context.h"
15 #include "intel_engine_pm.h"
16 #include "intel_gt_pm.h"
17 #include "intel_gt_requests.h"
18
19 static bool irq_enable(struct intel_breadcrumbs *b)
20 {
21         return intel_engine_irq_enable(b->irq_engine);
22 }
23
24 static void irq_disable(struct intel_breadcrumbs *b)
25 {
26         intel_engine_irq_disable(b->irq_engine);
27 }
28
29 static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
30 {
31         /*
32          * Since we are waiting on a request, the GPU should be busy
33          * and should have its own rpm reference.
34          */
35         if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
36                 return;
37
38         /*
39          * The breadcrumb irq will be disarmed on the interrupt after the
40          * waiters are signaled. This gives us a single interrupt window in
41          * which we can add a new waiter and avoid the cost of re-enabling
42          * the irq.
43          */
44         WRITE_ONCE(b->irq_armed, true);
45
46         /* Requests may have completed before we could enable the interrupt. */
47         if (!b->irq_enabled++ && b->irq_enable(b))
48                 irq_work_queue(&b->irq_work);
49 }
50
51 static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
52 {
53         if (!b->irq_engine)
54                 return;
55
56         spin_lock(&b->irq_lock);
57         if (!b->irq_armed)
58                 __intel_breadcrumbs_arm_irq(b);
59         spin_unlock(&b->irq_lock);
60 }
61
62 static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
63 {
64         GEM_BUG_ON(!b->irq_enabled);
65         if (!--b->irq_enabled)
66                 b->irq_disable(b);
67
68         WRITE_ONCE(b->irq_armed, false);
69         intel_gt_pm_put_async(b->irq_engine->gt);
70 }
71
72 static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
73 {
74         spin_lock(&b->irq_lock);
75         if (b->irq_armed)
76                 __intel_breadcrumbs_disarm_irq(b);
77         spin_unlock(&b->irq_lock);
78 }
79
80 static void add_signaling_context(struct intel_breadcrumbs *b,
81                                   struct intel_context *ce)
82 {
83         lockdep_assert_held(&ce->signal_lock);
84
85         spin_lock(&b->signalers_lock);
86         list_add_rcu(&ce->signal_link, &b->signalers);
87         spin_unlock(&b->signalers_lock);
88 }
89
90 static bool remove_signaling_context(struct intel_breadcrumbs *b,
91                                      struct intel_context *ce)
92 {
93         lockdep_assert_held(&ce->signal_lock);
94
95         if (!list_empty(&ce->signals))
96                 return false;
97
98         spin_lock(&b->signalers_lock);
99         list_del_rcu(&ce->signal_link);
100         spin_unlock(&b->signalers_lock);
101
102         return true;
103 }
104
105 __maybe_unused static bool
106 check_signal_order(struct intel_context *ce, struct i915_request *rq)
107 {
108         if (rq->context != ce)
109                 return false;
110
111         if (!list_is_last(&rq->signal_link, &ce->signals) &&
112             i915_seqno_passed(rq->fence.seqno,
113                               list_next_entry(rq, signal_link)->fence.seqno))
114                 return false;
115
116         if (!list_is_first(&rq->signal_link, &ce->signals) &&
117             i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno,
118                               rq->fence.seqno))
119                 return false;
120
121         return true;
122 }
123
124 static bool
125 __dma_fence_signal(struct dma_fence *fence)
126 {
127         return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
128 }
129
130 static void
131 __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
132 {
133         fence->timestamp = timestamp;
134         set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
135         trace_dma_fence_signaled(fence);
136 }
137
138 static void
139 __dma_fence_signal__notify(struct dma_fence *fence,
140                            const struct list_head *list)
141 {
142         struct dma_fence_cb *cur, *tmp;
143
144         lockdep_assert_held(fence->lock);
145
146         list_for_each_entry_safe(cur, tmp, list, node) {
147                 INIT_LIST_HEAD(&cur->node);
148                 cur->func(fence, cur);
149         }
150 }
151
152 static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
153 {
154         if (b->irq_engine)
155                 intel_engine_add_retire(b->irq_engine, tl);
156 }
157
158 static struct llist_node *
159 slist_add(struct llist_node *node, struct llist_node *head)
160 {
161         node->next = head;
162         return node;
163 }
164
165 static void signal_irq_work(struct irq_work *work)
166 {
167         struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
168         const ktime_t timestamp = ktime_get();
169         struct llist_node *signal, *sn;
170         struct intel_context *ce;
171
172         signal = NULL;
173         if (unlikely(!llist_empty(&b->signaled_requests)))
174                 signal = llist_del_all(&b->signaled_requests);
175
176         /*
177          * Keep the irq armed until the interrupt after all listeners are gone.
178          *
179          * Enabling/disabling the interrupt is rather costly, roughly a couple
180          * of hundred microseconds. If we are proactive and enable/disable
181          * the interrupt around every request that wants a breadcrumb, we
182          * quickly drown in the extra orders of magnitude of latency imposed
183          * on request submission.
184          *
185          * So we try to be lazy, and keep the interrupts enabled until no
186          * more listeners appear within a breadcrumb interrupt interval (that
187          * is until a request completes that no one cares about). The
188          * observation is that listeners come in batches, and will often
189          * listen to a bunch of requests in succession. Though note on icl+,
190          * interrupts are always enabled due to concerns with rc6 being
191          * dysfunctional with per-engine interrupt masking.
192          *
193          * We also try to avoid raising too many interrupts, as they may
194          * be generated by userspace batches and it is unfortunately rather
195          * too easy to drown the CPU under a flood of GPU interrupts. Thus
196          * whenever no one appears to be listening, we turn off the interrupts.
197          * Fewer interrupts should conserve power -- at the very least, fewer
198          * interrupt draw less ire from other users of the system and tools
199          * like powertop.
200          */
201         if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers))
202                 intel_breadcrumbs_disarm_irq(b);
203
204         rcu_read_lock();
205         atomic_inc(&b->signaler_active);
206         list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
207                 struct i915_request *rq;
208
209                 list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
210                         bool release;
211
212                         if (!__i915_request_is_complete(rq))
213                                 break;
214
215                         if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
216                                                 &rq->fence.flags))
217                                 break;
218
219                         /*
220                          * Queue for execution after dropping the signaling
221                          * spinlock as the callback chain may end up adding
222                          * more signalers to the same context or engine.
223                          */
224                         spin_lock(&ce->signal_lock);
225                         list_del_rcu(&rq->signal_link);
226                         release = remove_signaling_context(b, ce);
227                         spin_unlock(&ce->signal_lock);
228                         if (release) {
229                                 if (intel_timeline_is_last(ce->timeline, rq))
230                                         add_retire(b, ce->timeline);
231                                 intel_context_put(ce);
232                         }
233
234                         if (__dma_fence_signal(&rq->fence))
235                                 /* We own signal_node now, xfer to local list */
236                                 signal = slist_add(&rq->signal_node, signal);
237                         else
238                                 i915_request_put(rq);
239                 }
240         }
241         atomic_dec(&b->signaler_active);
242         rcu_read_unlock();
243
244         llist_for_each_safe(signal, sn, signal) {
245                 struct i915_request *rq =
246                         llist_entry(signal, typeof(*rq), signal_node);
247                 struct list_head cb_list;
248
249                 if (rq->engine->sched_engine->retire_inflight_request_prio)
250                         rq->engine->sched_engine->retire_inflight_request_prio(rq);
251
252                 spin_lock(&rq->lock);
253                 list_replace(&rq->fence.cb_list, &cb_list);
254                 __dma_fence_signal__timestamp(&rq->fence, timestamp);
255                 __dma_fence_signal__notify(&rq->fence, &cb_list);
256                 spin_unlock(&rq->lock);
257
258                 i915_request_put(rq);
259         }
260
261         if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers))
262                 intel_breadcrumbs_arm_irq(b);
263 }
264
265 struct intel_breadcrumbs *
266 intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
267 {
268         struct intel_breadcrumbs *b;
269
270         b = kzalloc(sizeof(*b), GFP_KERNEL);
271         if (!b)
272                 return NULL;
273
274         kref_init(&b->ref);
275
276         spin_lock_init(&b->signalers_lock);
277         INIT_LIST_HEAD(&b->signalers);
278         init_llist_head(&b->signaled_requests);
279
280         spin_lock_init(&b->irq_lock);
281         init_irq_work(&b->irq_work, signal_irq_work);
282
283         b->irq_engine = irq_engine;
284         b->irq_enable = irq_enable;
285         b->irq_disable = irq_disable;
286
287         return b;
288 }
289
290 void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
291 {
292         unsigned long flags;
293
294         if (!b->irq_engine)
295                 return;
296
297         spin_lock_irqsave(&b->irq_lock, flags);
298
299         if (b->irq_enabled)
300                 b->irq_enable(b);
301         else
302                 b->irq_disable(b);
303
304         spin_unlock_irqrestore(&b->irq_lock, flags);
305 }
306
307 void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
308 {
309         if (!READ_ONCE(b->irq_armed))
310                 return;
311
312         /* Kick the work once more to drain the signalers, and disarm the irq */
313         irq_work_sync(&b->irq_work);
314         while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
315                 local_irq_disable();
316                 signal_irq_work(&b->irq_work);
317                 local_irq_enable();
318                 cond_resched();
319         }
320 }
321
322 void intel_breadcrumbs_free(struct kref *kref)
323 {
324         struct intel_breadcrumbs *b = container_of(kref, typeof(*b), ref);
325
326         irq_work_sync(&b->irq_work);
327         GEM_BUG_ON(!list_empty(&b->signalers));
328         GEM_BUG_ON(b->irq_armed);
329
330         kfree(b);
331 }
332
333 static void irq_signal_request(struct i915_request *rq,
334                                struct intel_breadcrumbs *b)
335 {
336         if (!__dma_fence_signal(&rq->fence))
337                 return;
338
339         i915_request_get(rq);
340         if (llist_add(&rq->signal_node, &b->signaled_requests))
341                 irq_work_queue(&b->irq_work);
342 }
343
344 static void insert_breadcrumb(struct i915_request *rq)
345 {
346         struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
347         struct intel_context *ce = rq->context;
348         struct list_head *pos;
349
350         if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
351                 return;
352
353         /*
354          * If the request is already completed, we can transfer it
355          * straight onto a signaled list, and queue the irq worker for
356          * its signal completion.
357          */
358         if (__i915_request_is_complete(rq)) {
359                 irq_signal_request(rq, b);
360                 return;
361         }
362
363         if (list_empty(&ce->signals)) {
364                 intel_context_get(ce);
365                 add_signaling_context(b, ce);
366                 pos = &ce->signals;
367         } else {
368                 /*
369                  * We keep the seqno in retirement order, so we can break
370                  * inside intel_engine_signal_breadcrumbs as soon as we've
371                  * passed the last completed request (or seen a request that
372                  * hasn't event started). We could walk the timeline->requests,
373                  * but keeping a separate signalers_list has the advantage of
374                  * hopefully being much smaller than the full list and so
375                  * provides faster iteration and detection when there are no
376                  * more interrupts required for this context.
377                  *
378                  * We typically expect to add new signalers in order, so we
379                  * start looking for our insertion point from the tail of
380                  * the list.
381                  */
382                 list_for_each_prev(pos, &ce->signals) {
383                         struct i915_request *it =
384                                 list_entry(pos, typeof(*it), signal_link);
385
386                         if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
387                                 break;
388                 }
389         }
390
391         i915_request_get(rq);
392         list_add_rcu(&rq->signal_link, pos);
393         GEM_BUG_ON(!check_signal_order(ce, rq));
394         GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
395         set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
396
397         /*
398          * Defer enabling the interrupt to after HW submission and recheck
399          * the request as it may have completed and raised the interrupt as
400          * we were attaching it into the lists.
401          */
402         if (!b->irq_armed || __i915_request_is_complete(rq))
403                 irq_work_queue(&b->irq_work);
404 }
405
406 bool i915_request_enable_breadcrumb(struct i915_request *rq)
407 {
408         struct intel_context *ce = rq->context;
409
410         /* Serialises with i915_request_retire() using rq->lock */
411         if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
412                 return true;
413
414         /*
415          * Peek at i915_request_submit()/i915_request_unsubmit() status.
416          *
417          * If the request is not yet active (and not signaled), we will
418          * attach the breadcrumb later.
419          */
420         if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
421                 return true;
422
423         spin_lock(&ce->signal_lock);
424         if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
425                 insert_breadcrumb(rq);
426         spin_unlock(&ce->signal_lock);
427
428         return true;
429 }
430
431 void i915_request_cancel_breadcrumb(struct i915_request *rq)
432 {
433         struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
434         struct intel_context *ce = rq->context;
435         bool release;
436
437         spin_lock(&ce->signal_lock);
438         if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
439                 spin_unlock(&ce->signal_lock);
440                 return;
441         }
442
443         list_del_rcu(&rq->signal_link);
444         release = remove_signaling_context(b, ce);
445         spin_unlock(&ce->signal_lock);
446         if (release)
447                 intel_context_put(ce);
448
449         if (__i915_request_is_complete(rq))
450                 irq_signal_request(rq, b);
451
452         i915_request_put(rq);
453 }
454
455 void intel_context_remove_breadcrumbs(struct intel_context *ce,
456                                       struct intel_breadcrumbs *b)
457 {
458         struct i915_request *rq, *rn;
459         bool release = false;
460         unsigned long flags;
461
462         spin_lock_irqsave(&ce->signal_lock, flags);
463
464         if (list_empty(&ce->signals))
465                 goto unlock;
466
467         list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) {
468                 GEM_BUG_ON(!__i915_request_is_complete(rq));
469                 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
470                                         &rq->fence.flags))
471                         continue;
472
473                 list_del_rcu(&rq->signal_link);
474                 irq_signal_request(rq, b);
475                 i915_request_put(rq);
476         }
477         release = remove_signaling_context(b, ce);
478
479 unlock:
480         spin_unlock_irqrestore(&ce->signal_lock, flags);
481         if (release)
482                 intel_context_put(ce);
483
484         while (atomic_read(&b->signaler_active))
485                 cpu_relax();
486 }
487
488 static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
489 {
490         struct intel_context *ce;
491         struct i915_request *rq;
492
493         drm_printf(p, "Signals:\n");
494
495         rcu_read_lock();
496         list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
497                 list_for_each_entry_rcu(rq, &ce->signals, signal_link)
498                         drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
499                                    rq->fence.context, rq->fence.seqno,
500                                    __i915_request_is_complete(rq) ? "!" :
501                                    __i915_request_has_started(rq) ? "*" :
502                                    "",
503                                    jiffies_to_msecs(jiffies - rq->emitted_jiffies));
504         }
505         rcu_read_unlock();
506 }
507
508 void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
509                                     struct drm_printer *p)
510 {
511         struct intel_breadcrumbs *b;
512
513         b = engine->breadcrumbs;
514         if (!b)
515                 return;
516
517         drm_printf(p, "IRQ: %s\n", str_enabled_disabled(b->irq_armed));
518         if (!list_empty(&b->signalers))
519                 print_signals(b, p);
520 }
This page took 0.064901 seconds and 4 git commands to generate.