]> Git Repo - J-linux.git/blob - drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
PM: sleep: Fix runtime PM based cpuidle support
[J-linux.git] / drivers / gpu / drm / i915 / gt / intel_engine_heartbeat.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5
6 #include "i915_drv.h"
7 #include "i915_request.h"
8
9 #include "intel_context.h"
10 #include "intel_engine_heartbeat.h"
11 #include "intel_engine_pm.h"
12 #include "intel_engine.h"
13 #include "intel_gt.h"
14 #include "intel_reset.h"
15
16 /*
17  * While the engine is active, we send a periodic pulse along the engine
18  * to check on its health and to flush any idle-barriers. If that request
19  * is stuck, and we fail to preempt it, we declare the engine hung and
20  * issue a reset -- in the hope that restores progress.
21  */
22
23 static bool next_heartbeat(struct intel_engine_cs *engine)
24 {
25         long delay;
26
27         delay = READ_ONCE(engine->props.heartbeat_interval_ms);
28         if (!delay)
29                 return false;
30
31         delay = msecs_to_jiffies_timeout(delay);
32         if (delay >= HZ)
33                 delay = round_jiffies_up_relative(delay);
34         mod_delayed_work(system_highpri_wq, &engine->heartbeat.work, delay + 1);
35
36         return true;
37 }
38
39 static struct i915_request *
40 heartbeat_create(struct intel_context *ce, gfp_t gfp)
41 {
42         struct i915_request *rq;
43
44         intel_context_enter(ce);
45         rq = __i915_request_create(ce, gfp);
46         intel_context_exit(ce);
47
48         return rq;
49 }
50
51 static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
52 {
53         engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
54         i915_request_add_active_barriers(rq);
55         if (!engine->heartbeat.systole && intel_engine_has_heartbeat(engine))
56                 engine->heartbeat.systole = i915_request_get(rq);
57 }
58
59 static void heartbeat_commit(struct i915_request *rq,
60                              const struct i915_sched_attr *attr)
61 {
62         idle_pulse(rq->engine, rq);
63
64         __i915_request_commit(rq);
65         __i915_request_queue(rq, attr);
66 }
67
68 static void show_heartbeat(const struct i915_request *rq,
69                            struct intel_engine_cs *engine)
70 {
71         struct drm_printer p = drm_debug_printer("heartbeat");
72
73         if (!rq) {
74                 intel_engine_dump(engine, &p,
75                                   "%s heartbeat not ticking\n",
76                                   engine->name);
77         } else {
78                 intel_engine_dump(engine, &p,
79                                   "%s heartbeat {seqno:%llx:%lld, prio:%d} not ticking\n",
80                                   engine->name,
81                                   rq->fence.context,
82                                   rq->fence.seqno,
83                                   rq->sched.attr.priority);
84         }
85 }
86
87 static void
88 reset_engine(struct intel_engine_cs *engine, struct i915_request *rq)
89 {
90         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
91                 show_heartbeat(rq, engine);
92
93         if (intel_engine_uses_guc(engine))
94                 /*
95                  * GuC itself is toast or GuC's hang detection
96                  * is disabled. Either way, need to find the
97                  * hang culprit manually.
98                  */
99                 intel_guc_find_hung_context(engine);
100
101         intel_gt_handle_error(engine->gt, engine->mask,
102                               I915_ERROR_CAPTURE,
103                               "stopped heartbeat on %s",
104                               engine->name);
105 }
106
107 static void heartbeat(struct work_struct *wrk)
108 {
109         struct i915_sched_attr attr = { .priority = I915_PRIORITY_MIN };
110         struct intel_engine_cs *engine =
111                 container_of(wrk, typeof(*engine), heartbeat.work.work);
112         struct intel_context *ce = engine->kernel_context;
113         struct i915_request *rq;
114         unsigned long serial;
115
116         /* Just in case everything has gone horribly wrong, give it a kick */
117         intel_engine_flush_submission(engine);
118
119         rq = engine->heartbeat.systole;
120         if (rq && i915_request_completed(rq)) {
121                 i915_request_put(rq);
122                 engine->heartbeat.systole = NULL;
123         }
124
125         if (!intel_engine_pm_get_if_awake(engine))
126                 return;
127
128         if (intel_gt_is_wedged(engine->gt))
129                 goto out;
130
131         if (i915_sched_engine_disabled(engine->sched_engine)) {
132                 reset_engine(engine, engine->heartbeat.systole);
133                 goto out;
134         }
135
136         if (engine->heartbeat.systole) {
137                 long delay = READ_ONCE(engine->props.heartbeat_interval_ms);
138
139                 /* Safeguard against too-fast worker invocations */
140                 if (!time_after(jiffies,
141                                 rq->emitted_jiffies + msecs_to_jiffies(delay)))
142                         goto out;
143
144                 if (!i915_sw_fence_signaled(&rq->submit)) {
145                         /*
146                          * Not yet submitted, system is stalled.
147                          *
148                          * This more often happens for ring submission,
149                          * where all contexts are funnelled into a common
150                          * ringbuffer. If one context is blocked on an
151                          * external fence, not only is it not submitted,
152                          * but all other contexts, including the kernel
153                          * context are stuck waiting for the signal.
154                          */
155                 } else if (engine->sched_engine->schedule &&
156                            rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
157                         /*
158                          * Gradually raise the priority of the heartbeat to
159                          * give high priority work [which presumably desires
160                          * low latency and no jitter] the chance to naturally
161                          * complete before being preempted.
162                          */
163                         attr.priority = 0;
164                         if (rq->sched.attr.priority >= attr.priority)
165                                 attr.priority = I915_PRIORITY_HEARTBEAT;
166                         if (rq->sched.attr.priority >= attr.priority)
167                                 attr.priority = I915_PRIORITY_BARRIER;
168
169                         local_bh_disable();
170                         engine->sched_engine->schedule(rq, &attr);
171                         local_bh_enable();
172                 } else {
173                         reset_engine(engine, rq);
174                 }
175
176                 rq->emitted_jiffies = jiffies;
177                 goto out;
178         }
179
180         serial = READ_ONCE(engine->serial);
181         if (engine->wakeref_serial == serial)
182                 goto out;
183
184         if (!mutex_trylock(&ce->timeline->mutex)) {
185                 /* Unable to lock the kernel timeline, is the engine stuck? */
186                 if (xchg(&engine->heartbeat.blocked, serial) == serial)
187                         intel_gt_handle_error(engine->gt, engine->mask,
188                                               I915_ERROR_CAPTURE,
189                                               "no heartbeat on %s",
190                                               engine->name);
191                 goto out;
192         }
193
194         rq = heartbeat_create(ce, GFP_NOWAIT | __GFP_NOWARN);
195         if (IS_ERR(rq))
196                 goto unlock;
197
198         heartbeat_commit(rq, &attr);
199
200 unlock:
201         mutex_unlock(&ce->timeline->mutex);
202 out:
203         if (!engine->i915->params.enable_hangcheck || !next_heartbeat(engine))
204                 i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
205         intel_engine_pm_put(engine);
206 }
207
208 void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine)
209 {
210         if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
211                 return;
212
213         next_heartbeat(engine);
214 }
215
216 void intel_engine_park_heartbeat(struct intel_engine_cs *engine)
217 {
218         if (cancel_delayed_work(&engine->heartbeat.work))
219                 i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
220 }
221
222 void intel_gt_unpark_heartbeats(struct intel_gt *gt)
223 {
224         struct intel_engine_cs *engine;
225         enum intel_engine_id id;
226
227         for_each_engine(engine, gt, id)
228                 if (intel_engine_pm_is_awake(engine))
229                         intel_engine_unpark_heartbeat(engine);
230 }
231
232 void intel_gt_park_heartbeats(struct intel_gt *gt)
233 {
234         struct intel_engine_cs *engine;
235         enum intel_engine_id id;
236
237         for_each_engine(engine, gt, id)
238                 intel_engine_park_heartbeat(engine);
239 }
240
241 void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
242 {
243         INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat);
244 }
245
246 static int __intel_engine_pulse(struct intel_engine_cs *engine)
247 {
248         struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
249         struct intel_context *ce = engine->kernel_context;
250         struct i915_request *rq;
251
252         lockdep_assert_held(&ce->timeline->mutex);
253         GEM_BUG_ON(!intel_engine_has_preemption(engine));
254         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
255
256         rq = heartbeat_create(ce, GFP_NOWAIT | __GFP_NOWARN);
257         if (IS_ERR(rq))
258                 return PTR_ERR(rq);
259
260         __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
261
262         heartbeat_commit(rq, &attr);
263         GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER);
264
265         return 0;
266 }
267
268 static unsigned long set_heartbeat(struct intel_engine_cs *engine,
269                                    unsigned long delay)
270 {
271         unsigned long old;
272
273         old = xchg(&engine->props.heartbeat_interval_ms, delay);
274         if (delay)
275                 intel_engine_unpark_heartbeat(engine);
276         else
277                 intel_engine_park_heartbeat(engine);
278
279         return old;
280 }
281
282 int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
283                                unsigned long delay)
284 {
285         struct intel_context *ce = engine->kernel_context;
286         int err = 0;
287
288         if (!delay && !intel_engine_has_preempt_reset(engine))
289                 return -ENODEV;
290
291         intel_engine_pm_get(engine);
292
293         err = mutex_lock_interruptible(&ce->timeline->mutex);
294         if (err)
295                 goto out_rpm;
296
297         if (delay != engine->props.heartbeat_interval_ms) {
298                 unsigned long saved = set_heartbeat(engine, delay);
299
300                 /* recheck current execution */
301                 if (intel_engine_has_preemption(engine)) {
302                         err = __intel_engine_pulse(engine);
303                         if (err)
304                                 set_heartbeat(engine, saved);
305                 }
306         }
307
308         mutex_unlock(&ce->timeline->mutex);
309
310 out_rpm:
311         intel_engine_pm_put(engine);
312         return err;
313 }
314
315 int intel_engine_pulse(struct intel_engine_cs *engine)
316 {
317         struct intel_context *ce = engine->kernel_context;
318         int err;
319
320         if (!intel_engine_has_preemption(engine))
321                 return -ENODEV;
322
323         if (!intel_engine_pm_get_if_awake(engine))
324                 return 0;
325
326         err = -EINTR;
327         if (!mutex_lock_interruptible(&ce->timeline->mutex)) {
328                 err = __intel_engine_pulse(engine);
329                 mutex_unlock(&ce->timeline->mutex);
330         }
331
332         intel_engine_flush_submission(engine);
333         intel_engine_pm_put(engine);
334         return err;
335 }
336
337 int intel_engine_flush_barriers(struct intel_engine_cs *engine)
338 {
339         struct i915_sched_attr attr = { .priority = I915_PRIORITY_MIN };
340         struct intel_context *ce = engine->kernel_context;
341         struct i915_request *rq;
342         int err;
343
344         if (llist_empty(&engine->barrier_tasks))
345                 return 0;
346
347         if (!intel_engine_pm_get_if_awake(engine))
348                 return 0;
349
350         if (mutex_lock_interruptible(&ce->timeline->mutex)) {
351                 err = -EINTR;
352                 goto out_rpm;
353         }
354
355         rq = heartbeat_create(ce, GFP_KERNEL);
356         if (IS_ERR(rq)) {
357                 err = PTR_ERR(rq);
358                 goto out_unlock;
359         }
360
361         heartbeat_commit(rq, &attr);
362
363         err = 0;
364 out_unlock:
365         mutex_unlock(&ce->timeline->mutex);
366 out_rpm:
367         intel_engine_pm_put(engine);
368         return err;
369 }
370
371 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
372 #include "selftest_engine_heartbeat.c"
373 #endif
This page took 0.051433 seconds and 4 git commands to generate.