]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
8187a2b7 ZN |
2 | #ifndef _INTEL_RINGBUFFER_H_ |
3 | #define _INTEL_RINGBUFFER_H_ | |
4 | ||
44e895a8 | 5 | #include <linux/hashtable.h> |
741258cd | 6 | #include <linux/seqlock.h> |
e61e0f51 | 7 | |
06fbca71 | 8 | #include "i915_gem_batch_pool.h" |
e61e0f51 | 9 | |
c080363f | 10 | #include "i915_reg.h" |
b46a33e2 | 11 | #include "i915_pmu.h" |
e61e0f51 | 12 | #include "i915_request.h" |
f97fbf96 | 13 | #include "i915_selftest.h" |
a89d1f92 | 14 | #include "i915_timeline.h" |
c080363f | 15 | #include "intel_gpu_commands.h" |
44e895a8 | 16 | |
f636edb2 | 17 | struct drm_printer; |
b7268c5e | 18 | struct i915_sched_attr; |
f636edb2 | 19 | |
44e895a8 BV |
20 | #define I915_CMD_HASH_ORDER 9 |
21 | ||
4712274c OM |
22 | /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, |
23 | * but keeps the logic simple. Indeed, the whole purpose of this macro is just | |
24 | * to give some inclination as to some of the magic values used in the various | |
25 | * workarounds! | |
26 | */ | |
27 | #define CACHELINE_BYTES 64 | |
17ee950d | 28 | #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t)) |
4712274c | 29 | |
57e88531 CW |
30 | struct intel_hw_status_page { |
31 | struct i915_vma *vma; | |
32 | u32 *page_addr; | |
33 | u32 ggtt_offset; | |
8187a2b7 ZN |
34 | }; |
35 | ||
bbdc070a DG |
36 | #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) |
37 | #define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val) | |
cae5852d | 38 | |
bbdc070a DG |
39 | #define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base)) |
40 | #define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val) | |
cae5852d | 41 | |
bbdc070a DG |
42 | #define I915_READ_HEAD(engine) I915_READ(RING_HEAD((engine)->mmio_base)) |
43 | #define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val) | |
cae5852d | 44 | |
bbdc070a DG |
45 | #define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base)) |
46 | #define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val) | |
cae5852d | 47 | |
bbdc070a DG |
48 | #define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base)) |
49 | #define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val) | |
870e86dd | 50 | |
bbdc070a DG |
51 | #define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base)) |
52 | #define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val) | |
e9fea574 | 53 | |
3e78998a BW |
54 | /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to |
55 | * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. | |
56 | */ | |
7e37f889 | 57 | enum intel_engine_hangcheck_action { |
3fe3b030 MK |
58 | ENGINE_IDLE = 0, |
59 | ENGINE_WAIT, | |
60 | ENGINE_ACTIVE_SEQNO, | |
61 | ENGINE_ACTIVE_HEAD, | |
62 | ENGINE_ACTIVE_SUBUNITS, | |
63 | ENGINE_WAIT_KICK, | |
64 | ENGINE_DEAD, | |
f2f4d82f | 65 | }; |
ad8beaea | 66 | |
3fe3b030 MK |
67 | static inline const char * |
68 | hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) | |
69 | { | |
70 | switch (a) { | |
71 | case ENGINE_IDLE: | |
72 | return "idle"; | |
73 | case ENGINE_WAIT: | |
74 | return "wait"; | |
75 | case ENGINE_ACTIVE_SEQNO: | |
76 | return "active seqno"; | |
77 | case ENGINE_ACTIVE_HEAD: | |
78 | return "active head"; | |
79 | case ENGINE_ACTIVE_SUBUNITS: | |
80 | return "active subunits"; | |
81 | case ENGINE_WAIT_KICK: | |
82 | return "wait kick"; | |
83 | case ENGINE_DEAD: | |
84 | return "dead"; | |
85 | } | |
86 | ||
87 | return "unknown"; | |
88 | } | |
b6b0fac0 | 89 | |
f9e61372 | 90 | #define I915_MAX_SLICES 3 |
d3d57927 | 91 | #define I915_MAX_SUBSLICES 8 |
f9e61372 BW |
92 | |
93 | #define instdone_slice_mask(dev_priv__) \ | |
94 | (INTEL_GEN(dev_priv__) == 7 ? \ | |
95 | 1 : INTEL_INFO(dev_priv__)->sseu.slice_mask) | |
96 | ||
97 | #define instdone_subslice_mask(dev_priv__) \ | |
98 | (INTEL_GEN(dev_priv__) == 7 ? \ | |
8cc76693 | 99 | 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0]) |
f9e61372 BW |
100 | |
101 | #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ | |
102 | for ((slice__) = 0, (subslice__) = 0; \ | |
103 | (slice__) < I915_MAX_SLICES; \ | |
104 | (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ | |
105 | (slice__) += ((subslice__) == 0)) \ | |
106 | for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ | |
107 | (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) | |
108 | ||
d636951e BW |
109 | struct intel_instdone { |
110 | u32 instdone; | |
111 | /* The following exist only in the RCS engine */ | |
112 | u32 slice_common; | |
f9e61372 BW |
113 | u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; |
114 | u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; | |
d636951e BW |
115 | }; |
116 | ||
7e37f889 | 117 | struct intel_engine_hangcheck { |
50877445 | 118 | u64 acthd; |
92cab734 | 119 | u32 seqno; |
7e37f889 | 120 | enum intel_engine_hangcheck_action action; |
3fe3b030 | 121 | unsigned long action_timestamp; |
4be17381 | 122 | int deadlock; |
d636951e | 123 | struct intel_instdone instdone; |
e61e0f51 | 124 | struct i915_request *active_request; |
1fd00c0f CW |
125 | bool stalled:1; |
126 | bool wedged:1; | |
92cab734 MK |
127 | }; |
128 | ||
7e37f889 | 129 | struct intel_ring { |
0eb973d3 | 130 | struct i915_vma *vma; |
57e88531 | 131 | void *vaddr; |
8ee14975 | 132 | |
a89d1f92 | 133 | struct i915_timeline *timeline; |
675d9ad7 | 134 | struct list_head request_list; |
643b450a | 135 | struct list_head active_link; |
675d9ad7 | 136 | |
8ee14975 OM |
137 | u32 head; |
138 | u32 tail; | |
e6ba9992 | 139 | u32 emit; |
eca56a35 | 140 | |
605d5b32 CW |
141 | u32 space; |
142 | u32 size; | |
143 | u32 effective_size; | |
8ee14975 OM |
144 | }; |
145 | ||
e2efd130 | 146 | struct i915_gem_context; |
361b027b | 147 | struct drm_i915_reg_table; |
21076372 | 148 | |
17ee950d AS |
149 | /* |
150 | * we use a single page to load ctx workarounds so all of these | |
151 | * values are referred in terms of dwords | |
152 | * | |
153 | * struct i915_wa_ctx_bb: | |
154 | * offset: specifies batch starting position, also helpful in case | |
155 | * if we want to have multiple batches at different offsets based on | |
156 | * some criteria. It is not a requirement at the moment but provides | |
157 | * an option for future use. | |
158 | * size: size of the batch in DWORDS | |
159 | */ | |
48bb74e4 | 160 | struct i915_ctx_workarounds { |
17ee950d AS |
161 | struct i915_wa_ctx_bb { |
162 | u32 offset; | |
163 | u32 size; | |
164 | } indirect_ctx, per_ctx; | |
48bb74e4 | 165 | struct i915_vma *vma; |
17ee950d AS |
166 | }; |
167 | ||
e61e0f51 | 168 | struct i915_request; |
c81d4613 | 169 | |
022d3093 TU |
170 | #define I915_MAX_VCS 4 |
171 | #define I915_MAX_VECS 2 | |
172 | ||
237ae7c7 MW |
173 | /* |
174 | * Engine IDs definitions. | |
175 | * Keep instances of the same type engine together. | |
176 | */ | |
177 | enum intel_engine_id { | |
178 | RCS = 0, | |
179 | BCS, | |
180 | VCS, | |
181 | VCS2, | |
022d3093 TU |
182 | VCS3, |
183 | VCS4, | |
237ae7c7 | 184 | #define _VCS(n) (VCS + (n)) |
022d3093 TU |
185 | VECS, |
186 | VECS2 | |
187 | #define _VECS(n) (VECS + (n)) | |
237ae7c7 MW |
188 | }; |
189 | ||
6c067579 CW |
190 | struct i915_priolist { |
191 | struct rb_node node; | |
192 | struct list_head requests; | |
193 | int priority; | |
194 | }; | |
195 | ||
0f6b79fa CW |
196 | struct st_preempt_hang { |
197 | struct completion completion; | |
198 | bool inject_hang; | |
199 | }; | |
200 | ||
b620e870 MK |
201 | /** |
202 | * struct intel_engine_execlists - execlist submission queue and port state | |
203 | * | |
204 | * The struct intel_engine_execlists represents the combined logical state of | |
205 | * driver and the hardware state for execlist mode of submission. | |
206 | */ | |
207 | struct intel_engine_execlists { | |
208 | /** | |
c6dce8f1 | 209 | * @tasklet: softirq tasklet for bottom handler |
b620e870 | 210 | */ |
c6dce8f1 | 211 | struct tasklet_struct tasklet; |
b620e870 MK |
212 | |
213 | /** | |
214 | * @default_priolist: priority list for I915_PRIORITY_NORMAL | |
215 | */ | |
216 | struct i915_priolist default_priolist; | |
217 | ||
218 | /** | |
219 | * @no_priolist: priority lists disabled | |
220 | */ | |
221 | bool no_priolist; | |
222 | ||
2fc7a06a | 223 | /** |
05f0addd TD |
224 | * @submit_reg: gen-specific execlist submission register |
225 | * set to the ExecList Submission Port (elsp) register pre-Gen11 and to | |
226 | * the ExecList Submission Queue Contents register array for Gen11+ | |
2fc7a06a | 227 | */ |
05f0addd TD |
228 | u32 __iomem *submit_reg; |
229 | ||
230 | /** | |
231 | * @ctrl_reg: the enhanced execlists control register, used to load the | |
232 | * submit queue on the HW and to request preemptions to idle | |
233 | */ | |
234 | u32 __iomem *ctrl_reg; | |
2fc7a06a | 235 | |
b620e870 MK |
236 | /** |
237 | * @port: execlist port states | |
238 | * | |
239 | * For each hardware ELSP (ExecList Submission Port) we keep | |
240 | * track of the last request and the number of times we submitted | |
241 | * that port to hw. We then count the number of times the hw reports | |
242 | * a context completion or preemption. As only one context can | |
243 | * be active on hw, we limit resubmission of context to port[0]. This | |
244 | * is called Lite Restore, of the context. | |
245 | */ | |
246 | struct execlist_port { | |
247 | /** | |
248 | * @request_count: combined request and submission count | |
249 | */ | |
e61e0f51 | 250 | struct i915_request *request_count; |
b620e870 MK |
251 | #define EXECLIST_COUNT_BITS 2 |
252 | #define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) | |
253 | #define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) | |
254 | #define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) | |
255 | #define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) | |
256 | #define port_set(p, packed) ((p)->request_count = (packed)) | |
257 | #define port_isset(p) ((p)->request_count) | |
7a62cc61 | 258 | #define port_index(p, execlists) ((p) - (execlists)->port) |
b620e870 MK |
259 | |
260 | /** | |
261 | * @context_id: context ID for port | |
262 | */ | |
263 | GEM_DEBUG_DECL(u32 context_id); | |
76e70087 MK |
264 | |
265 | #define EXECLIST_MAX_PORTS 2 | |
266 | } port[EXECLIST_MAX_PORTS]; | |
267 | ||
beecec90 | 268 | /** |
4a118ecb CW |
269 | * @active: is the HW active? We consider the HW as active after |
270 | * submitting any context for execution and until we have seen the | |
271 | * last context completion event. After that, we do not expect any | |
272 | * more events until we submit, and so can park the HW. | |
273 | * | |
274 | * As we have a small number of different sources from which we feed | |
275 | * the HW, we track the state of each inside a single bitfield. | |
beecec90 | 276 | */ |
4a118ecb CW |
277 | unsigned int active; |
278 | #define EXECLISTS_ACTIVE_USER 0 | |
279 | #define EXECLISTS_ACTIVE_PREEMPT 1 | |
ba74cb10 | 280 | #define EXECLISTS_ACTIVE_HWACK 2 |
beecec90 | 281 | |
76e70087 MK |
282 | /** |
283 | * @port_mask: number of execlist ports - 1 | |
284 | */ | |
285 | unsigned int port_mask; | |
b620e870 | 286 | |
f6322edd CW |
287 | /** |
288 | * @queue_priority: Highest pending priority. | |
289 | * | |
290 | * When we add requests into the queue, or adjust the priority of | |
291 | * executing requests, we compute the maximum priority of those | |
292 | * pending requests. We can then use this value to determine if | |
293 | * we need to preempt the executing requests to service the queue. | |
294 | */ | |
295 | int queue_priority; | |
296 | ||
b620e870 MK |
297 | /** |
298 | * @queue: queue of requests, in priority lists | |
299 | */ | |
655250a8 | 300 | struct rb_root_cached queue; |
b620e870 MK |
301 | |
302 | /** | |
bc4237ec CW |
303 | * @csb_read: control register for Context Switch buffer |
304 | * | |
305 | * Note this register is always in mmio. | |
b620e870 | 306 | */ |
bc4237ec | 307 | u32 __iomem *csb_read; |
b620e870 MK |
308 | |
309 | /** | |
bc4237ec CW |
310 | * @csb_write: control register for Context Switch buffer |
311 | * | |
312 | * Note this register may be either mmio or HWSP shadow. | |
b620e870 | 313 | */ |
bc4237ec | 314 | u32 *csb_write; |
b620e870 MK |
315 | |
316 | /** | |
bc4237ec CW |
317 | * @csb_status: status array for Context Switch buffer |
318 | * | |
319 | * Note these register may be either mmio or HWSP shadow. | |
b620e870 | 320 | */ |
bc4237ec | 321 | u32 *csb_status; |
d6376374 CW |
322 | |
323 | /** | |
324 | * @preempt_complete_status: expected CSB upon completing preemption | |
325 | */ | |
326 | u32 preempt_complete_status; | |
bc4237ec | 327 | |
f4b58f04 CW |
328 | /** |
329 | * @csb_write_reset: reset value for CSB write pointer | |
330 | * | |
331 | * As the CSB write pointer maybe either in HWSP or as a field | |
332 | * inside an mmio register, we want to reprogram it slightly | |
333 | * differently to avoid later confusion. | |
334 | */ | |
335 | u32 csb_write_reset; | |
336 | ||
bc4237ec CW |
337 | /** |
338 | * @csb_head: context status buffer head | |
339 | */ | |
340 | u8 csb_head; | |
0f6b79fa CW |
341 | |
342 | I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) | |
b620e870 MK |
343 | }; |
344 | ||
6e516148 OM |
345 | #define INTEL_ENGINE_CS_MAX_NAME 8 |
346 | ||
c033666a CW |
347 | struct intel_engine_cs { |
348 | struct drm_i915_private *i915; | |
6e516148 | 349 | char name[INTEL_ENGINE_CS_MAX_NAME]; |
1803fcbc | 350 | |
237ae7c7 | 351 | enum intel_engine_id id; |
237ae7c7 | 352 | unsigned int hw_id; |
63ffbcda | 353 | unsigned int guc_id; |
0908180b | 354 | |
1803fcbc TU |
355 | u8 uabi_id; |
356 | u8 uabi_class; | |
357 | ||
0908180b DCS |
358 | u8 class; |
359 | u8 instance; | |
63ffbcda JL |
360 | u32 context_size; |
361 | u32 mmio_base; | |
63ffbcda | 362 | |
7e37f889 | 363 | struct intel_ring *buffer; |
a89d1f92 CW |
364 | |
365 | struct i915_timeline timeline; | |
8187a2b7 | 366 | |
d2b4b979 | 367 | struct drm_i915_gem_object *default_state; |
fe0c4935 | 368 | void *pinned_default_state; |
4e50f082 | 369 | |
538b257d CW |
370 | unsigned long irq_posted; |
371 | #define ENGINE_IRQ_BREADCRUMB 0 | |
372 | ||
688e6c72 CW |
373 | /* Rather than have every client wait upon all user interrupts, |
374 | * with the herd waking after every interrupt and each doing the | |
375 | * heavyweight seqno dance, we delegate the task (of being the | |
376 | * bottom-half of the user interrupt) to the first client. After | |
377 | * every interrupt, we wake up one client, who does the heavyweight | |
378 | * coherent seqno read and either goes back to sleep (if incomplete), | |
379 | * or wakes up all the completed clients in parallel, before then | |
380 | * transferring the bottom-half status to the next client in the queue. | |
381 | * | |
382 | * Compared to walking the entire list of waiters in a single dedicated | |
383 | * bottom-half, we reduce the latency of the first waiter by avoiding | |
384 | * a context switch, but incur additional coherent seqno reads when | |
385 | * following the chain of request breadcrumbs. Since it is most likely | |
386 | * that we have a single client waiting on each seqno, then reducing | |
387 | * the overhead of waking that client is much preferred. | |
388 | */ | |
389 | struct intel_breadcrumbs { | |
61d3dc70 CW |
390 | spinlock_t irq_lock; /* protects irq_*; irqsafe */ |
391 | struct intel_wait *irq_wait; /* oldest waiter by retirement */ | |
392 | ||
393 | spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ | |
688e6c72 | 394 | struct rb_root waiters; /* sorted by retirement, priority */ |
cd46c545 | 395 | struct list_head signals; /* sorted by retirement */ |
c81d4613 | 396 | struct task_struct *signaler; /* used for fence signalling */ |
cd46c545 | 397 | |
688e6c72 | 398 | struct timer_list fake_irq; /* used after a missed interrupt */ |
83348ba8 CW |
399 | struct timer_list hangcheck; /* detect missed interrupts */ |
400 | ||
2246bea6 | 401 | unsigned int hangcheck_interrupts; |
bcbd5c33 | 402 | unsigned int irq_enabled; |
78796877 | 403 | unsigned int irq_count; |
aca34b6e | 404 | |
67b807a8 | 405 | bool irq_armed : 1; |
f97fbf96 | 406 | I915_SELFTEST_DECLARE(bool mock : 1); |
688e6c72 CW |
407 | } breadcrumbs; |
408 | ||
b46a33e2 TU |
409 | struct { |
410 | /** | |
411 | * @enable: Bitmask of enable sample events on this engine. | |
412 | * | |
413 | * Bits correspond to sample event types, for instance | |
414 | * I915_SAMPLE_QUEUED is bit 0 etc. | |
415 | */ | |
416 | u32 enable; | |
417 | /** | |
418 | * @enable_count: Reference count for the enabled samplers. | |
419 | * | |
420 | * Index number corresponds to the bit number from @enable. | |
421 | */ | |
422 | unsigned int enable_count[I915_PMU_SAMPLE_BITS]; | |
423 | /** | |
424 | * @sample: Counter values for sampling events. | |
425 | * | |
426 | * Our internal timer stores the current counters in this field. | |
427 | */ | |
b552ae44 | 428 | #define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1) |
b46a33e2 TU |
429 | struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; |
430 | } pmu; | |
431 | ||
06fbca71 CW |
432 | /* |
433 | * A pool of objects to use as shadow copies of client batch buffers | |
434 | * when the command parser is enabled. Prevents the client from | |
435 | * modifying the batch contents after software parsing. | |
436 | */ | |
437 | struct i915_gem_batch_pool batch_pool; | |
438 | ||
8187a2b7 | 439 | struct intel_hw_status_page status_page; |
17ee950d | 440 | struct i915_ctx_workarounds wa_ctx; |
56c0f1a7 | 441 | struct i915_vma *scratch; |
8187a2b7 | 442 | |
61ff75ac CW |
443 | u32 irq_keep_mask; /* always keep these interrupts */ |
444 | u32 irq_enable_mask; /* bitmask to enable ring interrupt */ | |
38a0f2db DG |
445 | void (*irq_enable)(struct intel_engine_cs *engine); |
446 | void (*irq_disable)(struct intel_engine_cs *engine); | |
8187a2b7 | 447 | |
38a0f2db | 448 | int (*init_hw)(struct intel_engine_cs *engine); |
5adfb772 CW |
449 | |
450 | struct { | |
451 | struct i915_request *(*prepare)(struct intel_engine_cs *engine); | |
452 | void (*reset)(struct intel_engine_cs *engine, | |
453 | struct i915_request *rq); | |
454 | void (*finish)(struct intel_engine_cs *engine); | |
455 | } reset; | |
8187a2b7 | 456 | |
aba5e278 CW |
457 | void (*park)(struct intel_engine_cs *engine); |
458 | void (*unpark)(struct intel_engine_cs *engine); | |
459 | ||
ff44ad51 CW |
460 | void (*set_default_submission)(struct intel_engine_cs *engine); |
461 | ||
1fc44d9b CW |
462 | struct intel_context *(*context_pin)(struct intel_engine_cs *engine, |
463 | struct i915_gem_context *ctx); | |
464 | ||
e61e0f51 CW |
465 | int (*request_alloc)(struct i915_request *rq); |
466 | int (*init_context)(struct i915_request *rq); | |
86d7f238 | 467 | |
e61e0f51 | 468 | int (*emit_flush)(struct i915_request *request, u32 mode); |
ddd66c51 CW |
469 | #define EMIT_INVALIDATE BIT(0) |
470 | #define EMIT_FLUSH BIT(1) | |
471 | #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) | |
e61e0f51 | 472 | int (*emit_bb_start)(struct i915_request *rq, |
ddd66c51 CW |
473 | u64 offset, u32 length, |
474 | unsigned int dispatch_flags); | |
475 | #define I915_DISPATCH_SECURE BIT(0) | |
476 | #define I915_DISPATCH_PINNED BIT(1) | |
e61e0f51 | 477 | void (*emit_breadcrumb)(struct i915_request *rq, u32 *cs); |
98f29e8d | 478 | int emit_breadcrumb_sz; |
5590af3e CW |
479 | |
480 | /* Pass the request to the hardware queue (e.g. directly into | |
481 | * the legacy ringbuffer or to the end of an execlist). | |
482 | * | |
483 | * This is called from an atomic context with irqs disabled; must | |
484 | * be irq safe. | |
485 | */ | |
e61e0f51 | 486 | void (*submit_request)(struct i915_request *rq); |
5590af3e | 487 | |
0de9136d CW |
488 | /* Call when the priority on a request has changed and it and its |
489 | * dependencies may need rescheduling. Note the request itself may | |
490 | * not be ready to run! | |
491 | * | |
492 | * Called under the struct_mutex. | |
493 | */ | |
b7268c5e CW |
494 | void (*schedule)(struct i915_request *request, |
495 | const struct i915_sched_attr *attr); | |
0de9136d | 496 | |
27a5f61b CW |
497 | /* |
498 | * Cancel all requests on the hardware, or queued for execution. | |
499 | * This should only cancel the ready requests that have been | |
500 | * submitted to the engine (via the engine->submit_request callback). | |
501 | * This is called when marking the device as wedged. | |
502 | */ | |
503 | void (*cancel_requests)(struct intel_engine_cs *engine); | |
504 | ||
b2eadbc8 CW |
505 | /* Some chipsets are not quite as coherent as advertised and need |
506 | * an expensive kick to force a true read of the up-to-date seqno. | |
507 | * However, the up-to-date seqno is not always required and the last | |
508 | * seen value is good enough. Note that the seqno will always be | |
509 | * monotonic, even if not coherent. | |
510 | */ | |
38a0f2db | 511 | void (*irq_seqno_barrier)(struct intel_engine_cs *engine); |
38a0f2db | 512 | void (*cleanup)(struct intel_engine_cs *engine); |
ebc348b2 | 513 | |
3e78998a BW |
514 | /* GEN8 signal/wait table - never trust comments! |
515 | * signal to signal to signal to signal to signal to | |
516 | * RCS VCS BCS VECS VCS2 | |
517 | * -------------------------------------------------------------------- | |
518 | * RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) | | |
519 | * |------------------------------------------------------------------- | |
520 | * VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) | | |
521 | * |------------------------------------------------------------------- | |
522 | * BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) | | |
523 | * |------------------------------------------------------------------- | |
524 | * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) | NOP (0x90) | VCS2 (0x98) | | |
525 | * |------------------------------------------------------------------- | |
526 | * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP (0xc0) | | |
527 | * |------------------------------------------------------------------- | |
528 | * | |
529 | * Generalization: | |
530 | * f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id) | |
531 | * ie. transpose of g(x, y) | |
532 | * | |
533 | * sync from sync from sync from sync from sync from | |
534 | * RCS VCS BCS VECS VCS2 | |
535 | * -------------------------------------------------------------------- | |
536 | * RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) | | |
537 | * |------------------------------------------------------------------- | |
538 | * VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) | | |
539 | * |------------------------------------------------------------------- | |
540 | * BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) | | |
541 | * |------------------------------------------------------------------- | |
542 | * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) | NOP (0x90) | VCS2 (0xb8) | | |
543 | * |------------------------------------------------------------------- | |
544 | * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) | NOP (0xc0) | | |
545 | * |------------------------------------------------------------------- | |
546 | * | |
547 | * Generalization: | |
548 | * g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id) | |
549 | * ie. transpose of f(x, y) | |
550 | */ | |
ebc348b2 | 551 | struct { |
318f89ca TU |
552 | #define GEN6_SEMAPHORE_LAST VECS_HW |
553 | #define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1) | |
554 | #define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0) | |
79e6770c CW |
555 | struct { |
556 | /* our mbox written by others */ | |
557 | u32 wait[GEN6_NUM_SEMAPHORES]; | |
558 | /* mboxes this ring signals to */ | |
559 | i915_reg_t signal[GEN6_NUM_SEMAPHORES]; | |
560 | } mbox; | |
78325f2d BW |
561 | |
562 | /* AKA wait() */ | |
e61e0f51 CW |
563 | int (*sync_to)(struct i915_request *rq, |
564 | struct i915_request *signal); | |
565 | u32 *(*signal)(struct i915_request *rq, u32 *cs); | |
ebc348b2 | 566 | } semaphore; |
ad776f8b | 567 | |
b620e870 | 568 | struct intel_engine_execlists execlists; |
4da46e1e | 569 | |
e8a9c58f CW |
570 | /* Contexts are pinned whilst they are active on the GPU. The last |
571 | * context executed remains active whilst the GPU is idle - the | |
572 | * switch away and write to the context object only occurs on the | |
573 | * next execution. Contexts are only unpinned on retirement of the | |
574 | * following request ensuring that we can always write to the object | |
575 | * on the context switch even after idling. Across suspend, we switch | |
576 | * to the kernel context and trash it as the save may not happen | |
577 | * before the hardware is powered down. | |
578 | */ | |
1fc44d9b | 579 | struct intel_context *last_retired_context; |
e8a9c58f | 580 | |
3fc03069 CD |
581 | /* status_notifier: list of callbacks for context-switch changes */ |
582 | struct atomic_notifier_head context_status_notifier; | |
583 | ||
7e37f889 | 584 | struct intel_engine_hangcheck hangcheck; |
92cab734 | 585 | |
439e2ee4 | 586 | #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) |
cf669b4e | 587 | #define I915_ENGINE_SUPPORTS_STATS BIT(1) |
2a694feb | 588 | #define I915_ENGINE_HAS_PREEMPTION BIT(2) |
439e2ee4 | 589 | unsigned int flags; |
44e895a8 | 590 | |
351e3db2 | 591 | /* |
44e895a8 | 592 | * Table of commands the command parser needs to know about |
33a051a5 | 593 | * for this engine. |
351e3db2 | 594 | */ |
44e895a8 | 595 | DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); |
351e3db2 BV |
596 | |
597 | /* | |
598 | * Table of registers allowed in commands that read/write registers. | |
599 | */ | |
361b027b JJ |
600 | const struct drm_i915_reg_table *reg_tables; |
601 | int reg_table_count; | |
351e3db2 BV |
602 | |
603 | /* | |
604 | * Returns the bitmask for the length field of the specified command. | |
605 | * Return 0 for an unrecognized/invalid command. | |
606 | * | |
33a051a5 | 607 | * If the command parser finds an entry for a command in the engine's |
351e3db2 | 608 | * cmd_tables, it gets the command's length based on the table entry. |
33a051a5 CW |
609 | * If not, it calls this function to determine the per-engine length |
610 | * field encoding for the command (i.e. different opcode ranges use | |
611 | * certain bits to encode the command length in the header). | |
351e3db2 BV |
612 | */ |
613 | u32 (*get_cmd_length_mask)(u32 cmd_header); | |
30e17b78 TU |
614 | |
615 | struct { | |
616 | /** | |
617 | * @lock: Lock protecting the below fields. | |
618 | */ | |
741258cd | 619 | seqlock_t lock; |
30e17b78 TU |
620 | /** |
621 | * @enabled: Reference count indicating number of listeners. | |
622 | */ | |
623 | unsigned int enabled; | |
624 | /** | |
625 | * @active: Number of contexts currently scheduled in. | |
626 | */ | |
627 | unsigned int active; | |
628 | /** | |
629 | * @enabled_at: Timestamp when busy stats were enabled. | |
630 | */ | |
631 | ktime_t enabled_at; | |
632 | /** | |
633 | * @start: Timestamp of the last idle to active transition. | |
634 | * | |
635 | * Idle is defined as active == 0, active is active > 0. | |
636 | */ | |
637 | ktime_t start; | |
638 | /** | |
639 | * @total: Total time this engine was busy. | |
640 | * | |
641 | * Accumulated time not counting the most recent block in cases | |
642 | * where engine is currently busy (active > 0). | |
643 | */ | |
644 | ktime_t total; | |
645 | } stats; | |
8187a2b7 ZN |
646 | }; |
647 | ||
2a694feb CW |
648 | static inline bool |
649 | intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) | |
439e2ee4 TU |
650 | { |
651 | return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; | |
652 | } | |
653 | ||
2a694feb CW |
654 | static inline bool |
655 | intel_engine_supports_stats(const struct intel_engine_cs *engine) | |
cf669b4e TU |
656 | { |
657 | return engine->flags & I915_ENGINE_SUPPORTS_STATS; | |
658 | } | |
659 | ||
2a694feb CW |
660 | static inline bool |
661 | intel_engine_has_preemption(const struct intel_engine_cs *engine) | |
662 | { | |
663 | return engine->flags & I915_ENGINE_HAS_PREEMPTION; | |
664 | } | |
665 | ||
666 | static inline bool __execlists_need_preempt(int prio, int last) | |
667 | { | |
668 | return prio > max(0, last); | |
669 | } | |
670 | ||
4a118ecb CW |
671 | static inline void |
672 | execlists_set_active(struct intel_engine_execlists *execlists, | |
673 | unsigned int bit) | |
674 | { | |
675 | __set_bit(bit, (unsigned long *)&execlists->active); | |
676 | } | |
677 | ||
f2605207 CW |
678 | static inline bool |
679 | execlists_set_active_once(struct intel_engine_execlists *execlists, | |
680 | unsigned int bit) | |
681 | { | |
682 | return !__test_and_set_bit(bit, (unsigned long *)&execlists->active); | |
683 | } | |
684 | ||
4a118ecb CW |
685 | static inline void |
686 | execlists_clear_active(struct intel_engine_execlists *execlists, | |
687 | unsigned int bit) | |
688 | { | |
689 | __clear_bit(bit, (unsigned long *)&execlists->active); | |
690 | } | |
691 | ||
0051163a CW |
692 | static inline void |
693 | execlists_clear_all_active(struct intel_engine_execlists *execlists) | |
694 | { | |
695 | execlists->active = 0; | |
696 | } | |
697 | ||
4a118ecb CW |
698 | static inline bool |
699 | execlists_is_active(const struct intel_engine_execlists *execlists, | |
700 | unsigned int bit) | |
701 | { | |
702 | return test_bit(bit, (unsigned long *)&execlists->active); | |
703 | } | |
704 | ||
f2605207 CW |
705 | void execlists_user_begin(struct intel_engine_execlists *execlists, |
706 | const struct execlist_port *port); | |
707 | void execlists_user_end(struct intel_engine_execlists *execlists); | |
708 | ||
c41937fd MW |
709 | void |
710 | execlists_cancel_port_requests(struct intel_engine_execlists * const execlists); | |
711 | ||
712 | void | |
713 | execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); | |
714 | ||
76e70087 MK |
715 | static inline unsigned int |
716 | execlists_num_ports(const struct intel_engine_execlists * const execlists) | |
717 | { | |
718 | return execlists->port_mask + 1; | |
719 | } | |
720 | ||
f2605207 | 721 | static inline struct execlist_port * |
7a62cc61 MK |
722 | execlists_port_complete(struct intel_engine_execlists * const execlists, |
723 | struct execlist_port * const port) | |
724 | { | |
76e70087 | 725 | const unsigned int m = execlists->port_mask; |
7a62cc61 MK |
726 | |
727 | GEM_BUG_ON(port_index(port, execlists) != 0); | |
4a118ecb | 728 | GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); |
7a62cc61 | 729 | |
76e70087 MK |
730 | memmove(port, port + 1, m * sizeof(struct execlist_port)); |
731 | memset(port + m, 0, sizeof(struct execlist_port)); | |
f2605207 CW |
732 | |
733 | return port; | |
7a62cc61 MK |
734 | } |
735 | ||
59ce1310 | 736 | static inline unsigned int |
67d97da3 | 737 | intel_engine_flag(const struct intel_engine_cs *engine) |
96154f2f | 738 | { |
59ce1310 | 739 | return BIT(engine->id); |
96154f2f SV |
740 | } |
741 | ||
8187a2b7 | 742 | static inline u32 |
3ceda3a4 | 743 | intel_read_status_page(const struct intel_engine_cs *engine, int reg) |
8187a2b7 | 744 | { |
4225d0f2 | 745 | /* Ensure that the compiler doesn't optimize away the load. */ |
5dd8e50c | 746 | return READ_ONCE(engine->status_page.page_addr[reg]); |
8187a2b7 ZN |
747 | } |
748 | ||
b70ec5bf | 749 | static inline void |
9a29dd85 | 750 | intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) |
b70ec5bf | 751 | { |
9a29dd85 CW |
752 | /* Writing into the status page should be done sparingly. Since |
753 | * we do when we are uncertain of the device state, we take a bit | |
754 | * of extra paranoia to try and ensure that the HWS takes the value | |
755 | * we give and that it doesn't end up trapped inside the CPU! | |
756 | */ | |
757 | if (static_cpu_has(X86_FEATURE_CLFLUSH)) { | |
758 | mb(); | |
759 | clflush(&engine->status_page.page_addr[reg]); | |
760 | engine->status_page.page_addr[reg] = value; | |
761 | clflush(&engine->status_page.page_addr[reg]); | |
762 | mb(); | |
763 | } else { | |
764 | WRITE_ONCE(engine->status_page.page_addr[reg], value); | |
765 | } | |
b70ec5bf MK |
766 | } |
767 | ||
e2828914 | 768 | /* |
311bd68e CW |
769 | * Reads a dword out of the status page, which is written to from the command |
770 | * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or | |
771 | * MI_STORE_DATA_IMM. | |
772 | * | |
773 | * The following dwords have a reserved meaning: | |
774 | * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes. | |
775 | * 0x04: ring 0 head pointer | |
776 | * 0x05: ring 1 head pointer (915-class) | |
777 | * 0x06: ring 2 head pointer (915-class) | |
778 | * 0x10-0x1b: Context status DWords (GM45) | |
779 | * 0x1f: Last written status offset. (GM45) | |
b07da53c | 780 | * 0x20-0x2f: Reserved (Gen6+) |
311bd68e | 781 | * |
b07da53c | 782 | * The area from dword 0x30 to 0x3ff is available for driver usage. |
311bd68e | 783 | */ |
b07da53c | 784 | #define I915_GEM_HWS_INDEX 0x30 |
7c17d377 | 785 | #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) |
3b8a8a30 MW |
786 | #define I915_GEM_HWS_PREEMPT_INDEX 0x32 |
787 | #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT) | |
b07da53c | 788 | #define I915_GEM_HWS_SCRATCH_INDEX 0x40 |
9a289771 | 789 | #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) |
311bd68e | 790 | |
6d2cb5aa | 791 | #define I915_HWS_CSB_BUF0_INDEX 0x10 |
767a983a CW |
792 | #define I915_HWS_CSB_WRITE_INDEX 0x1f |
793 | #define CNL_HWS_CSB_WRITE_INDEX 0x2f | |
6d2cb5aa | 794 | |
7e37f889 | 795 | struct intel_ring * |
65fcb806 | 796 | intel_engine_create_ring(struct intel_engine_cs *engine, |
a89d1f92 | 797 | struct i915_timeline *timeline, |
65fcb806 | 798 | int size); |
5503cb0d | 799 | int intel_ring_pin(struct intel_ring *ring); |
e6ba9992 | 800 | void intel_ring_reset(struct intel_ring *ring, u32 tail); |
95aebcb2 | 801 | unsigned int intel_ring_update_space(struct intel_ring *ring); |
aad29fbb | 802 | void intel_ring_unpin(struct intel_ring *ring); |
7e37f889 | 803 | void intel_ring_free(struct intel_ring *ring); |
84c2377f | 804 | |
7e37f889 CW |
805 | void intel_engine_stop(struct intel_engine_cs *engine); |
806 | void intel_engine_cleanup(struct intel_engine_cs *engine); | |
96f298aa | 807 | |
821ed7df CW |
808 | void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); |
809 | ||
e61e0f51 | 810 | int __must_check intel_ring_cacheline_align(struct i915_request *rq); |
406ea8d2 | 811 | |
fd138212 | 812 | int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes); |
e61e0f51 | 813 | u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n); |
406ea8d2 | 814 | |
e61e0f51 | 815 | static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) |
09246732 | 816 | { |
8f942018 CW |
817 | /* Dummy function. |
818 | * | |
819 | * This serves as a placeholder in the code so that the reader | |
820 | * can compare against the preceding intel_ring_begin() and | |
821 | * check that the number of dwords emitted matches the space | |
822 | * reserved for the command packet (i.e. the value passed to | |
823 | * intel_ring_begin()). | |
c5efa1ad | 824 | */ |
e61e0f51 | 825 | GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); |
8f942018 CW |
826 | } |
827 | ||
e61e0f51 | 828 | static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) |
450362d3 CW |
829 | { |
830 | return pos & (ring->size - 1); | |
831 | } | |
832 | ||
41d37680 CW |
833 | static inline bool |
834 | intel_ring_offset_valid(const struct intel_ring *ring, | |
835 | unsigned int pos) | |
836 | { | |
837 | if (pos & -ring->size) /* must be strictly within the ring */ | |
838 | return false; | |
839 | ||
840 | if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */ | |
841 | return false; | |
842 | ||
843 | return true; | |
844 | } | |
845 | ||
e61e0f51 | 846 | static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) |
8f942018 CW |
847 | { |
848 | /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ | |
e61e0f51 CW |
849 | u32 offset = addr - rq->ring->vaddr; |
850 | GEM_BUG_ON(offset > rq->ring->size); | |
851 | return intel_ring_wrap(rq->ring, offset); | |
09246732 | 852 | } |
406ea8d2 | 853 | |
ed1501d4 CW |
854 | static inline void |
855 | assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) | |
856 | { | |
41d37680 | 857 | GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); |
605d5b32 CW |
858 | |
859 | /* | |
860 | * "Ring Buffer Use" | |
861 | * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 | |
862 | * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 | |
863 | * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 | |
864 | * "If the Ring Buffer Head Pointer and the Tail Pointer are on the | |
865 | * same cacheline, the Head Pointer must not be greater than the Tail | |
866 | * Pointer." | |
867 | * | |
868 | * We use ring->head as the last known location of the actual RING_HEAD, | |
869 | * it may have advanced but in the worst case it is equally the same | |
870 | * as ring->head and so we should never program RING_TAIL to advance | |
871 | * into the same cacheline as ring->head. | |
872 | */ | |
873 | #define cacheline(a) round_down(a, CACHELINE_BYTES) | |
874 | GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && | |
875 | tail < ring->head); | |
876 | #undef cacheline | |
ed1501d4 CW |
877 | } |
878 | ||
e6ba9992 CW |
879 | static inline unsigned int |
880 | intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) | |
881 | { | |
882 | /* Whilst writes to the tail are strictly order, there is no | |
883 | * serialisation between readers and the writers. The tail may be | |
e61e0f51 | 884 | * read by i915_request_retire() just as it is being updated |
e6ba9992 CW |
885 | * by execlists, as although the breadcrumb is complete, the context |
886 | * switch hasn't been seen. | |
887 | */ | |
888 | assert_ring_tail_valid(ring, tail); | |
889 | ring->tail = tail; | |
890 | return tail; | |
891 | } | |
09246732 | 892 | |
73cb9701 | 893 | void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); |
8187a2b7 | 894 | |
019bf277 TU |
895 | void intel_engine_setup_common(struct intel_engine_cs *engine); |
896 | int intel_engine_init_common(struct intel_engine_cs *engine); | |
96a945aa | 897 | void intel_engine_cleanup_common(struct intel_engine_cs *engine); |
019bf277 | 898 | |
d9d117e4 CW |
899 | int intel_engine_create_scratch(struct intel_engine_cs *engine, |
900 | unsigned int size); | |
901 | void intel_engine_cleanup_scratch(struct intel_engine_cs *engine); | |
902 | ||
8b3e2d36 TU |
903 | int intel_init_render_ring_buffer(struct intel_engine_cs *engine); |
904 | int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); | |
8b3e2d36 TU |
905 | int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); |
906 | int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); | |
8187a2b7 | 907 | |
3f6e9822 CW |
908 | int intel_engine_stop_cs(struct intel_engine_cs *engine); |
909 | ||
3ceda3a4 CW |
910 | u64 intel_engine_get_active_head(const struct intel_engine_cs *engine); |
911 | u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); | |
1b36595f | 912 | |
1b7744e7 CW |
913 | static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) |
914 | { | |
915 | return intel_read_status_page(engine, I915_GEM_HWS_INDEX); | |
916 | } | |
79f321b7 | 917 | |
cb399eab CW |
918 | static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) |
919 | { | |
920 | /* We are only peeking at the tail of the submit queue (and not the | |
921 | * queue itself) in order to gain a hint as to the current active | |
922 | * state of the engine. Callers are not expected to be taking | |
923 | * engine->timeline->lock, nor are they expected to be concerned | |
924 | * wtih serialising this hint with anything, so document it as | |
925 | * a hint and nothing more. | |
926 | */ | |
a89d1f92 | 927 | return READ_ONCE(engine->timeline.seqno); |
cb399eab CW |
928 | } |
929 | ||
0e704476 CW |
930 | void intel_engine_get_instdone(struct intel_engine_cs *engine, |
931 | struct intel_instdone *instdone); | |
932 | ||
29b1b415 JH |
933 | /* |
934 | * Arbitrary size for largest possible 'add request' sequence. The code paths | |
935 | * are complex and variable. Empirical measurement shows that the worst case | |
596e5efc CW |
936 | * is BDW at 192 bytes (6 + 6 + 36 dwords), then ILK at 136 bytes. However, |
937 | * we need to allocate double the largest single packet within that emission | |
938 | * to account for tail wraparound (so 6 + 6 + 72 dwords for BDW). | |
29b1b415 | 939 | */ |
596e5efc | 940 | #define MIN_SPACE_FOR_ADD_REQUEST 336 |
29b1b415 | 941 | |
a58c01aa CW |
942 | static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) |
943 | { | |
57e88531 | 944 | return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; |
a58c01aa CW |
945 | } |
946 | ||
3b8a8a30 MW |
947 | static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) |
948 | { | |
949 | return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR; | |
950 | } | |
951 | ||
688e6c72 | 952 | /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ |
688e6c72 CW |
953 | int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); |
954 | ||
e3be4079 | 955 | static inline void intel_wait_init(struct intel_wait *wait) |
688e6c72 CW |
956 | { |
957 | wait->tsk = current; | |
e3be4079 | 958 | wait->request = NULL; |
754c9fd5 CW |
959 | } |
960 | ||
961 | static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) | |
962 | { | |
963 | wait->tsk = current; | |
964 | wait->seqno = seqno; | |
965 | } | |
966 | ||
967 | static inline bool intel_wait_has_seqno(const struct intel_wait *wait) | |
968 | { | |
969 | return wait->seqno; | |
970 | } | |
971 | ||
972 | static inline bool | |
973 | intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) | |
974 | { | |
688e6c72 | 975 | wait->seqno = seqno; |
754c9fd5 CW |
976 | return intel_wait_has_seqno(wait); |
977 | } | |
978 | ||
979 | static inline bool | |
980 | intel_wait_update_request(struct intel_wait *wait, | |
e61e0f51 | 981 | const struct i915_request *rq) |
754c9fd5 | 982 | { |
e61e0f51 | 983 | return intel_wait_update_seqno(wait, i915_request_global_seqno(rq)); |
754c9fd5 CW |
984 | } |
985 | ||
986 | static inline bool | |
987 | intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) | |
988 | { | |
989 | return wait->seqno == seqno; | |
990 | } | |
991 | ||
992 | static inline bool | |
993 | intel_wait_check_request(const struct intel_wait *wait, | |
e61e0f51 | 994 | const struct i915_request *rq) |
754c9fd5 | 995 | { |
e61e0f51 | 996 | return intel_wait_check_seqno(wait, i915_request_global_seqno(rq)); |
688e6c72 CW |
997 | } |
998 | ||
999 | static inline bool intel_wait_complete(const struct intel_wait *wait) | |
1000 | { | |
1001 | return RB_EMPTY_NODE(&wait->node); | |
1002 | } | |
1003 | ||
1004 | bool intel_engine_add_wait(struct intel_engine_cs *engine, | |
1005 | struct intel_wait *wait); | |
1006 | void intel_engine_remove_wait(struct intel_engine_cs *engine, | |
1007 | struct intel_wait *wait); | |
6f9ec414 | 1008 | bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup); |
e61e0f51 | 1009 | void intel_engine_cancel_signaling(struct i915_request *request); |
688e6c72 | 1010 | |
dbd6ef29 | 1011 | static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) |
688e6c72 | 1012 | { |
61d3dc70 | 1013 | return READ_ONCE(engine->breadcrumbs.irq_wait); |
688e6c72 CW |
1014 | } |
1015 | ||
8d769ea7 CW |
1016 | unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); |
1017 | #define ENGINE_WAKEUP_WAITER BIT(0) | |
67b807a8 CW |
1018 | #define ENGINE_WAKEUP_ASLEEP BIT(1) |
1019 | ||
bcbd5c33 CW |
1020 | void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); |
1021 | void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); | |
1022 | ||
67b807a8 CW |
1023 | void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); |
1024 | void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); | |
688e6c72 | 1025 | |
ad07dfcd | 1026 | void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); |
688e6c72 | 1027 | void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); |
688e6c72 | 1028 | |
9f235dfa TU |
1029 | static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) |
1030 | { | |
1031 | memset(batch, 0, 6 * sizeof(u32)); | |
1032 | ||
1033 | batch[0] = GFX_OP_PIPE_CONTROL(6); | |
1034 | batch[1] = flags; | |
1035 | batch[2] = offset; | |
1036 | ||
1037 | return batch + 6; | |
1038 | } | |
1039 | ||
df77cd83 MW |
1040 | static inline u32 * |
1041 | gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset) | |
1042 | { | |
1043 | /* We're using qword write, offset should be aligned to 8 bytes. */ | |
1044 | GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); | |
1045 | ||
1046 | /* w/a for post sync ops following a GPGPU operation we | |
1047 | * need a prior CS_STALL, which is emitted by the flush | |
1048 | * following the batch. | |
1049 | */ | |
1050 | *cs++ = GFX_OP_PIPE_CONTROL(6); | |
1051 | *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | | |
1052 | PIPE_CONTROL_QW_WRITE; | |
1053 | *cs++ = gtt_offset; | |
1054 | *cs++ = 0; | |
1055 | *cs++ = value; | |
1056 | /* We're thrashing one dword of HWS. */ | |
1057 | *cs++ = 0; | |
1058 | ||
1059 | return cs; | |
1060 | } | |
1061 | ||
1062 | static inline u32 * | |
1063 | gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) | |
1064 | { | |
1065 | /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ | |
1066 | GEM_BUG_ON(gtt_offset & (1 << 5)); | |
1067 | /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ | |
1068 | GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); | |
1069 | ||
1070 | *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; | |
1071 | *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT; | |
1072 | *cs++ = 0; | |
1073 | *cs++ = value; | |
1074 | ||
1075 | return cs; | |
1076 | } | |
1077 | ||
4fdd5b4e CW |
1078 | void intel_engines_sanitize(struct drm_i915_private *i915); |
1079 | ||
5400367a | 1080 | bool intel_engine_is_idle(struct intel_engine_cs *engine); |
05425249 | 1081 | bool intel_engines_are_idle(struct drm_i915_private *dev_priv); |
5400367a | 1082 | |
20ccd4d3 | 1083 | bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine); |
01278cb1 | 1084 | void intel_engine_lost_context(struct intel_engine_cs *engine); |
20ccd4d3 | 1085 | |
aba5e278 CW |
1086 | void intel_engines_park(struct drm_i915_private *i915); |
1087 | void intel_engines_unpark(struct drm_i915_private *i915); | |
1088 | ||
ff44ad51 | 1089 | void intel_engines_reset_default_submission(struct drm_i915_private *i915); |
d2b4b979 | 1090 | unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); |
ff44ad51 | 1091 | |
90cad095 | 1092 | bool intel_engine_can_store_dword(struct intel_engine_cs *engine); |
f2f5c061 | 1093 | |
0db18b17 CW |
1094 | __printf(3, 4) |
1095 | void intel_engine_dump(struct intel_engine_cs *engine, | |
1096 | struct drm_printer *m, | |
1097 | const char *header, ...); | |
f636edb2 | 1098 | |
b46a33e2 TU |
1099 | struct intel_engine_cs * |
1100 | intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance); | |
1101 | ||
30e17b78 TU |
1102 | static inline void intel_engine_context_in(struct intel_engine_cs *engine) |
1103 | { | |
1104 | unsigned long flags; | |
1105 | ||
1106 | if (READ_ONCE(engine->stats.enabled) == 0) | |
1107 | return; | |
1108 | ||
741258cd | 1109 | write_seqlock_irqsave(&engine->stats.lock, flags); |
30e17b78 TU |
1110 | |
1111 | if (engine->stats.enabled > 0) { | |
1112 | if (engine->stats.active++ == 0) | |
1113 | engine->stats.start = ktime_get(); | |
1114 | GEM_BUG_ON(engine->stats.active == 0); | |
1115 | } | |
1116 | ||
741258cd | 1117 | write_sequnlock_irqrestore(&engine->stats.lock, flags); |
30e17b78 TU |
1118 | } |
1119 | ||
1120 | static inline void intel_engine_context_out(struct intel_engine_cs *engine) | |
1121 | { | |
1122 | unsigned long flags; | |
1123 | ||
1124 | if (READ_ONCE(engine->stats.enabled) == 0) | |
1125 | return; | |
1126 | ||
741258cd | 1127 | write_seqlock_irqsave(&engine->stats.lock, flags); |
30e17b78 TU |
1128 | |
1129 | if (engine->stats.enabled > 0) { | |
1130 | ktime_t last; | |
1131 | ||
1132 | if (engine->stats.active && --engine->stats.active == 0) { | |
1133 | /* | |
1134 | * Decrement the active context count and in case GPU | |
1135 | * is now idle add up to the running total. | |
1136 | */ | |
1137 | last = ktime_sub(ktime_get(), engine->stats.start); | |
1138 | ||
1139 | engine->stats.total = ktime_add(engine->stats.total, | |
1140 | last); | |
1141 | } else if (engine->stats.active == 0) { | |
1142 | /* | |
1143 | * After turning on engine stats, context out might be | |
1144 | * the first event in which case we account from the | |
1145 | * time stats gathering was turned on. | |
1146 | */ | |
1147 | last = ktime_sub(ktime_get(), engine->stats.enabled_at); | |
1148 | ||
1149 | engine->stats.total = ktime_add(engine->stats.total, | |
1150 | last); | |
1151 | } | |
1152 | } | |
1153 | ||
741258cd | 1154 | write_sequnlock_irqrestore(&engine->stats.lock, flags); |
30e17b78 TU |
1155 | } |
1156 | ||
1157 | int intel_enable_engine_stats(struct intel_engine_cs *engine); | |
1158 | void intel_disable_engine_stats(struct intel_engine_cs *engine); | |
1159 | ||
1160 | ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); | |
1161 | ||
0f6b79fa CW |
1162 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
1163 | ||
1164 | static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) | |
1165 | { | |
1166 | if (!execlists->preempt_hang.inject_hang) | |
1167 | return false; | |
1168 | ||
1169 | complete(&execlists->preempt_hang.completion); | |
1170 | return true; | |
1171 | } | |
1172 | ||
1173 | #else | |
1174 | ||
1175 | static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) | |
1176 | { | |
1177 | return false; | |
1178 | } | |
1179 | ||
1180 | #endif | |
1181 | ||
8187a2b7 | 1182 | #endif /* _INTEL_RINGBUFFER_H_ */ |