]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
8187a2b7 ZN |
2 | #ifndef _INTEL_RINGBUFFER_H_ |
3 | #define _INTEL_RINGBUFFER_H_ | |
4 | ||
44e895a8 | 5 | #include <linux/hashtable.h> |
741258cd | 6 | #include <linux/seqlock.h> |
e61e0f51 | 7 | |
06fbca71 | 8 | #include "i915_gem_batch_pool.h" |
e61e0f51 | 9 | |
c080363f | 10 | #include "i915_reg.h" |
b46a33e2 | 11 | #include "i915_pmu.h" |
e61e0f51 | 12 | #include "i915_request.h" |
f97fbf96 | 13 | #include "i915_selftest.h" |
a89d1f92 | 14 | #include "i915_timeline.h" |
c080363f | 15 | #include "intel_gpu_commands.h" |
44e895a8 | 16 | |
f636edb2 | 17 | struct drm_printer; |
b7268c5e | 18 | struct i915_sched_attr; |
f636edb2 | 19 | |
44e895a8 BV |
20 | #define I915_CMD_HASH_ORDER 9 |
21 | ||
4712274c OM |
22 | /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, |
23 | * but keeps the logic simple. Indeed, the whole purpose of this macro is just | |
24 | * to give some inclination as to some of the magic values used in the various | |
25 | * workarounds! | |
26 | */ | |
27 | #define CACHELINE_BYTES 64 | |
17ee950d | 28 | #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t)) |
4712274c | 29 | |
57e88531 CW |
30 | struct intel_hw_status_page { |
31 | struct i915_vma *vma; | |
32 | u32 *page_addr; | |
33 | u32 ggtt_offset; | |
8187a2b7 ZN |
34 | }; |
35 | ||
bbdc070a DG |
36 | #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) |
37 | #define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val) | |
cae5852d | 38 | |
bbdc070a DG |
39 | #define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base)) |
40 | #define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val) | |
cae5852d | 41 | |
bbdc070a DG |
42 | #define I915_READ_HEAD(engine) I915_READ(RING_HEAD((engine)->mmio_base)) |
43 | #define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val) | |
cae5852d | 44 | |
bbdc070a DG |
45 | #define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base)) |
46 | #define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val) | |
cae5852d | 47 | |
bbdc070a DG |
48 | #define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base)) |
49 | #define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val) | |
870e86dd | 50 | |
bbdc070a DG |
51 | #define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base)) |
52 | #define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val) | |
e9fea574 | 53 | |
3e78998a BW |
54 | /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to |
55 | * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. | |
56 | */ | |
7e37f889 | 57 | enum intel_engine_hangcheck_action { |
3fe3b030 MK |
58 | ENGINE_IDLE = 0, |
59 | ENGINE_WAIT, | |
60 | ENGINE_ACTIVE_SEQNO, | |
61 | ENGINE_ACTIVE_HEAD, | |
62 | ENGINE_ACTIVE_SUBUNITS, | |
63 | ENGINE_WAIT_KICK, | |
64 | ENGINE_DEAD, | |
f2f4d82f | 65 | }; |
ad8beaea | 66 | |
3fe3b030 MK |
67 | static inline const char * |
68 | hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) | |
69 | { | |
70 | switch (a) { | |
71 | case ENGINE_IDLE: | |
72 | return "idle"; | |
73 | case ENGINE_WAIT: | |
74 | return "wait"; | |
75 | case ENGINE_ACTIVE_SEQNO: | |
76 | return "active seqno"; | |
77 | case ENGINE_ACTIVE_HEAD: | |
78 | return "active head"; | |
79 | case ENGINE_ACTIVE_SUBUNITS: | |
80 | return "active subunits"; | |
81 | case ENGINE_WAIT_KICK: | |
82 | return "wait kick"; | |
83 | case ENGINE_DEAD: | |
84 | return "dead"; | |
85 | } | |
86 | ||
87 | return "unknown"; | |
88 | } | |
b6b0fac0 | 89 | |
f9e61372 | 90 | #define I915_MAX_SLICES 3 |
d3d57927 | 91 | #define I915_MAX_SUBSLICES 8 |
f9e61372 BW |
92 | |
93 | #define instdone_slice_mask(dev_priv__) \ | |
94 | (INTEL_GEN(dev_priv__) == 7 ? \ | |
95 | 1 : INTEL_INFO(dev_priv__)->sseu.slice_mask) | |
96 | ||
97 | #define instdone_subslice_mask(dev_priv__) \ | |
98 | (INTEL_GEN(dev_priv__) == 7 ? \ | |
8cc76693 | 99 | 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0]) |
f9e61372 BW |
100 | |
101 | #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ | |
102 | for ((slice__) = 0, (subslice__) = 0; \ | |
103 | (slice__) < I915_MAX_SLICES; \ | |
104 | (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ | |
105 | (slice__) += ((subslice__) == 0)) \ | |
106 | for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ | |
107 | (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) | |
108 | ||
d636951e BW |
109 | struct intel_instdone { |
110 | u32 instdone; | |
111 | /* The following exist only in the RCS engine */ | |
112 | u32 slice_common; | |
f9e61372 BW |
113 | u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; |
114 | u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; | |
d636951e BW |
115 | }; |
116 | ||
7e37f889 | 117 | struct intel_engine_hangcheck { |
50877445 | 118 | u64 acthd; |
92cab734 | 119 | u32 seqno; |
7e37f889 | 120 | enum intel_engine_hangcheck_action action; |
3fe3b030 | 121 | unsigned long action_timestamp; |
4be17381 | 122 | int deadlock; |
d636951e | 123 | struct intel_instdone instdone; |
e61e0f51 | 124 | struct i915_request *active_request; |
1fd00c0f CW |
125 | bool stalled:1; |
126 | bool wedged:1; | |
92cab734 MK |
127 | }; |
128 | ||
7e37f889 | 129 | struct intel_ring { |
0eb973d3 | 130 | struct i915_vma *vma; |
57e88531 | 131 | void *vaddr; |
8ee14975 | 132 | |
a89d1f92 | 133 | struct i915_timeline *timeline; |
675d9ad7 | 134 | struct list_head request_list; |
643b450a | 135 | struct list_head active_link; |
675d9ad7 | 136 | |
8ee14975 OM |
137 | u32 head; |
138 | u32 tail; | |
e6ba9992 | 139 | u32 emit; |
eca56a35 | 140 | |
605d5b32 CW |
141 | u32 space; |
142 | u32 size; | |
143 | u32 effective_size; | |
8ee14975 OM |
144 | }; |
145 | ||
e2efd130 | 146 | struct i915_gem_context; |
361b027b | 147 | struct drm_i915_reg_table; |
21076372 | 148 | |
17ee950d AS |
149 | /* |
150 | * we use a single page to load ctx workarounds so all of these | |
151 | * values are referred in terms of dwords | |
152 | * | |
153 | * struct i915_wa_ctx_bb: | |
154 | * offset: specifies batch starting position, also helpful in case | |
155 | * if we want to have multiple batches at different offsets based on | |
156 | * some criteria. It is not a requirement at the moment but provides | |
157 | * an option for future use. | |
158 | * size: size of the batch in DWORDS | |
159 | */ | |
48bb74e4 | 160 | struct i915_ctx_workarounds { |
17ee950d AS |
161 | struct i915_wa_ctx_bb { |
162 | u32 offset; | |
163 | u32 size; | |
164 | } indirect_ctx, per_ctx; | |
48bb74e4 | 165 | struct i915_vma *vma; |
17ee950d AS |
166 | }; |
167 | ||
e61e0f51 | 168 | struct i915_request; |
c81d4613 | 169 | |
022d3093 TU |
170 | #define I915_MAX_VCS 4 |
171 | #define I915_MAX_VECS 2 | |
172 | ||
237ae7c7 MW |
173 | /* |
174 | * Engine IDs definitions. | |
175 | * Keep instances of the same type engine together. | |
176 | */ | |
177 | enum intel_engine_id { | |
178 | RCS = 0, | |
179 | BCS, | |
180 | VCS, | |
181 | VCS2, | |
022d3093 TU |
182 | VCS3, |
183 | VCS4, | |
237ae7c7 | 184 | #define _VCS(n) (VCS + (n)) |
022d3093 TU |
185 | VECS, |
186 | VECS2 | |
187 | #define _VECS(n) (VECS + (n)) | |
237ae7c7 MW |
188 | }; |
189 | ||
6c067579 CW |
190 | struct i915_priolist { |
191 | struct rb_node node; | |
192 | struct list_head requests; | |
193 | int priority; | |
194 | }; | |
195 | ||
b620e870 MK |
196 | /** |
197 | * struct intel_engine_execlists - execlist submission queue and port state | |
198 | * | |
199 | * The struct intel_engine_execlists represents the combined logical state of | |
200 | * driver and the hardware state for execlist mode of submission. | |
201 | */ | |
202 | struct intel_engine_execlists { | |
203 | /** | |
c6dce8f1 | 204 | * @tasklet: softirq tasklet for bottom handler |
b620e870 | 205 | */ |
c6dce8f1 | 206 | struct tasklet_struct tasklet; |
b620e870 MK |
207 | |
208 | /** | |
209 | * @default_priolist: priority list for I915_PRIORITY_NORMAL | |
210 | */ | |
211 | struct i915_priolist default_priolist; | |
212 | ||
213 | /** | |
214 | * @no_priolist: priority lists disabled | |
215 | */ | |
216 | bool no_priolist; | |
217 | ||
2fc7a06a | 218 | /** |
05f0addd TD |
219 | * @submit_reg: gen-specific execlist submission register |
220 | * set to the ExecList Submission Port (elsp) register pre-Gen11 and to | |
221 | * the ExecList Submission Queue Contents register array for Gen11+ | |
2fc7a06a | 222 | */ |
05f0addd TD |
223 | u32 __iomem *submit_reg; |
224 | ||
225 | /** | |
226 | * @ctrl_reg: the enhanced execlists control register, used to load the | |
227 | * submit queue on the HW and to request preemptions to idle | |
228 | */ | |
229 | u32 __iomem *ctrl_reg; | |
2fc7a06a | 230 | |
b620e870 MK |
231 | /** |
232 | * @port: execlist port states | |
233 | * | |
234 | * For each hardware ELSP (ExecList Submission Port) we keep | |
235 | * track of the last request and the number of times we submitted | |
236 | * that port to hw. We then count the number of times the hw reports | |
237 | * a context completion or preemption. As only one context can | |
238 | * be active on hw, we limit resubmission of context to port[0]. This | |
239 | * is called Lite Restore, of the context. | |
240 | */ | |
241 | struct execlist_port { | |
242 | /** | |
243 | * @request_count: combined request and submission count | |
244 | */ | |
e61e0f51 | 245 | struct i915_request *request_count; |
b620e870 MK |
246 | #define EXECLIST_COUNT_BITS 2 |
247 | #define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) | |
248 | #define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) | |
249 | #define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) | |
250 | #define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) | |
251 | #define port_set(p, packed) ((p)->request_count = (packed)) | |
252 | #define port_isset(p) ((p)->request_count) | |
7a62cc61 | 253 | #define port_index(p, execlists) ((p) - (execlists)->port) |
b620e870 MK |
254 | |
255 | /** | |
256 | * @context_id: context ID for port | |
257 | */ | |
258 | GEM_DEBUG_DECL(u32 context_id); | |
76e70087 MK |
259 | |
260 | #define EXECLIST_MAX_PORTS 2 | |
261 | } port[EXECLIST_MAX_PORTS]; | |
262 | ||
beecec90 | 263 | /** |
4a118ecb CW |
264 | * @active: is the HW active? We consider the HW as active after |
265 | * submitting any context for execution and until we have seen the | |
266 | * last context completion event. After that, we do not expect any | |
267 | * more events until we submit, and so can park the HW. | |
268 | * | |
269 | * As we have a small number of different sources from which we feed | |
270 | * the HW, we track the state of each inside a single bitfield. | |
beecec90 | 271 | */ |
4a118ecb CW |
272 | unsigned int active; |
273 | #define EXECLISTS_ACTIVE_USER 0 | |
274 | #define EXECLISTS_ACTIVE_PREEMPT 1 | |
ba74cb10 | 275 | #define EXECLISTS_ACTIVE_HWACK 2 |
beecec90 | 276 | |
76e70087 MK |
277 | /** |
278 | * @port_mask: number of execlist ports - 1 | |
279 | */ | |
280 | unsigned int port_mask; | |
b620e870 | 281 | |
f6322edd CW |
282 | /** |
283 | * @queue_priority: Highest pending priority. | |
284 | * | |
285 | * When we add requests into the queue, or adjust the priority of | |
286 | * executing requests, we compute the maximum priority of those | |
287 | * pending requests. We can then use this value to determine if | |
288 | * we need to preempt the executing requests to service the queue. | |
289 | */ | |
290 | int queue_priority; | |
291 | ||
b620e870 MK |
292 | /** |
293 | * @queue: queue of requests, in priority lists | |
294 | */ | |
295 | struct rb_root queue; | |
296 | ||
297 | /** | |
298 | * @first: leftmost level in priority @queue | |
299 | */ | |
300 | struct rb_node *first; | |
301 | ||
302 | /** | |
303 | * @fw_domains: forcewake domains for irq tasklet | |
304 | */ | |
305 | unsigned int fw_domains; | |
306 | ||
307 | /** | |
308 | * @csb_head: context status buffer head | |
309 | */ | |
310 | unsigned int csb_head; | |
311 | ||
312 | /** | |
313 | * @csb_use_mmio: access csb through mmio, instead of hwsp | |
314 | */ | |
315 | bool csb_use_mmio; | |
d6376374 CW |
316 | |
317 | /** | |
318 | * @preempt_complete_status: expected CSB upon completing preemption | |
319 | */ | |
320 | u32 preempt_complete_status; | |
b620e870 MK |
321 | }; |
322 | ||
6e516148 OM |
323 | #define INTEL_ENGINE_CS_MAX_NAME 8 |
324 | ||
c033666a CW |
325 | struct intel_engine_cs { |
326 | struct drm_i915_private *i915; | |
6e516148 | 327 | char name[INTEL_ENGINE_CS_MAX_NAME]; |
1803fcbc | 328 | |
237ae7c7 | 329 | enum intel_engine_id id; |
237ae7c7 | 330 | unsigned int hw_id; |
63ffbcda | 331 | unsigned int guc_id; |
0908180b | 332 | |
1803fcbc TU |
333 | u8 uabi_id; |
334 | u8 uabi_class; | |
335 | ||
0908180b DCS |
336 | u8 class; |
337 | u8 instance; | |
63ffbcda JL |
338 | u32 context_size; |
339 | u32 mmio_base; | |
63ffbcda | 340 | |
7e37f889 | 341 | struct intel_ring *buffer; |
a89d1f92 CW |
342 | |
343 | struct i915_timeline timeline; | |
8187a2b7 | 344 | |
d2b4b979 | 345 | struct drm_i915_gem_object *default_state; |
fe0c4935 | 346 | void *pinned_default_state; |
4e50f082 | 347 | |
2246bea6 | 348 | atomic_t irq_count; |
538b257d CW |
349 | unsigned long irq_posted; |
350 | #define ENGINE_IRQ_BREADCRUMB 0 | |
f747026c | 351 | #define ENGINE_IRQ_EXECLIST 1 |
538b257d | 352 | |
688e6c72 CW |
353 | /* Rather than have every client wait upon all user interrupts, |
354 | * with the herd waking after every interrupt and each doing the | |
355 | * heavyweight seqno dance, we delegate the task (of being the | |
356 | * bottom-half of the user interrupt) to the first client. After | |
357 | * every interrupt, we wake up one client, who does the heavyweight | |
358 | * coherent seqno read and either goes back to sleep (if incomplete), | |
359 | * or wakes up all the completed clients in parallel, before then | |
360 | * transferring the bottom-half status to the next client in the queue. | |
361 | * | |
362 | * Compared to walking the entire list of waiters in a single dedicated | |
363 | * bottom-half, we reduce the latency of the first waiter by avoiding | |
364 | * a context switch, but incur additional coherent seqno reads when | |
365 | * following the chain of request breadcrumbs. Since it is most likely | |
366 | * that we have a single client waiting on each seqno, then reducing | |
367 | * the overhead of waking that client is much preferred. | |
368 | */ | |
369 | struct intel_breadcrumbs { | |
61d3dc70 CW |
370 | spinlock_t irq_lock; /* protects irq_*; irqsafe */ |
371 | struct intel_wait *irq_wait; /* oldest waiter by retirement */ | |
372 | ||
373 | spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ | |
688e6c72 | 374 | struct rb_root waiters; /* sorted by retirement, priority */ |
cd46c545 | 375 | struct list_head signals; /* sorted by retirement */ |
c81d4613 | 376 | struct task_struct *signaler; /* used for fence signalling */ |
cd46c545 | 377 | |
688e6c72 | 378 | struct timer_list fake_irq; /* used after a missed interrupt */ |
83348ba8 CW |
379 | struct timer_list hangcheck; /* detect missed interrupts */ |
380 | ||
2246bea6 | 381 | unsigned int hangcheck_interrupts; |
bcbd5c33 | 382 | unsigned int irq_enabled; |
aca34b6e | 383 | |
67b807a8 | 384 | bool irq_armed : 1; |
f97fbf96 | 385 | I915_SELFTEST_DECLARE(bool mock : 1); |
688e6c72 CW |
386 | } breadcrumbs; |
387 | ||
b46a33e2 TU |
388 | struct { |
389 | /** | |
390 | * @enable: Bitmask of enable sample events on this engine. | |
391 | * | |
392 | * Bits correspond to sample event types, for instance | |
393 | * I915_SAMPLE_QUEUED is bit 0 etc. | |
394 | */ | |
395 | u32 enable; | |
396 | /** | |
397 | * @enable_count: Reference count for the enabled samplers. | |
398 | * | |
399 | * Index number corresponds to the bit number from @enable. | |
400 | */ | |
401 | unsigned int enable_count[I915_PMU_SAMPLE_BITS]; | |
402 | /** | |
403 | * @sample: Counter values for sampling events. | |
404 | * | |
405 | * Our internal timer stores the current counters in this field. | |
406 | */ | |
b552ae44 | 407 | #define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1) |
b46a33e2 TU |
408 | struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; |
409 | } pmu; | |
410 | ||
06fbca71 CW |
411 | /* |
412 | * A pool of objects to use as shadow copies of client batch buffers | |
413 | * when the command parser is enabled. Prevents the client from | |
414 | * modifying the batch contents after software parsing. | |
415 | */ | |
416 | struct i915_gem_batch_pool batch_pool; | |
417 | ||
8187a2b7 | 418 | struct intel_hw_status_page status_page; |
17ee950d | 419 | struct i915_ctx_workarounds wa_ctx; |
56c0f1a7 | 420 | struct i915_vma *scratch; |
8187a2b7 | 421 | |
61ff75ac CW |
422 | u32 irq_keep_mask; /* always keep these interrupts */ |
423 | u32 irq_enable_mask; /* bitmask to enable ring interrupt */ | |
38a0f2db DG |
424 | void (*irq_enable)(struct intel_engine_cs *engine); |
425 | void (*irq_disable)(struct intel_engine_cs *engine); | |
8187a2b7 | 426 | |
38a0f2db | 427 | int (*init_hw)(struct intel_engine_cs *engine); |
5adfb772 CW |
428 | |
429 | struct { | |
430 | struct i915_request *(*prepare)(struct intel_engine_cs *engine); | |
431 | void (*reset)(struct intel_engine_cs *engine, | |
432 | struct i915_request *rq); | |
433 | void (*finish)(struct intel_engine_cs *engine); | |
434 | } reset; | |
8187a2b7 | 435 | |
aba5e278 CW |
436 | void (*park)(struct intel_engine_cs *engine); |
437 | void (*unpark)(struct intel_engine_cs *engine); | |
438 | ||
ff44ad51 CW |
439 | void (*set_default_submission)(struct intel_engine_cs *engine); |
440 | ||
1fc44d9b CW |
441 | struct intel_context *(*context_pin)(struct intel_engine_cs *engine, |
442 | struct i915_gem_context *ctx); | |
443 | ||
e61e0f51 CW |
444 | int (*request_alloc)(struct i915_request *rq); |
445 | int (*init_context)(struct i915_request *rq); | |
86d7f238 | 446 | |
e61e0f51 | 447 | int (*emit_flush)(struct i915_request *request, u32 mode); |
ddd66c51 CW |
448 | #define EMIT_INVALIDATE BIT(0) |
449 | #define EMIT_FLUSH BIT(1) | |
450 | #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) | |
e61e0f51 | 451 | int (*emit_bb_start)(struct i915_request *rq, |
ddd66c51 CW |
452 | u64 offset, u32 length, |
453 | unsigned int dispatch_flags); | |
454 | #define I915_DISPATCH_SECURE BIT(0) | |
455 | #define I915_DISPATCH_PINNED BIT(1) | |
456 | #define I915_DISPATCH_RS BIT(2) | |
e61e0f51 | 457 | void (*emit_breadcrumb)(struct i915_request *rq, u32 *cs); |
98f29e8d | 458 | int emit_breadcrumb_sz; |
5590af3e CW |
459 | |
460 | /* Pass the request to the hardware queue (e.g. directly into | |
461 | * the legacy ringbuffer or to the end of an execlist). | |
462 | * | |
463 | * This is called from an atomic context with irqs disabled; must | |
464 | * be irq safe. | |
465 | */ | |
e61e0f51 | 466 | void (*submit_request)(struct i915_request *rq); |
5590af3e | 467 | |
0de9136d CW |
468 | /* Call when the priority on a request has changed and it and its |
469 | * dependencies may need rescheduling. Note the request itself may | |
470 | * not be ready to run! | |
471 | * | |
472 | * Called under the struct_mutex. | |
473 | */ | |
b7268c5e CW |
474 | void (*schedule)(struct i915_request *request, |
475 | const struct i915_sched_attr *attr); | |
0de9136d | 476 | |
27a5f61b CW |
477 | /* |
478 | * Cancel all requests on the hardware, or queued for execution. | |
479 | * This should only cancel the ready requests that have been | |
480 | * submitted to the engine (via the engine->submit_request callback). | |
481 | * This is called when marking the device as wedged. | |
482 | */ | |
483 | void (*cancel_requests)(struct intel_engine_cs *engine); | |
484 | ||
b2eadbc8 CW |
485 | /* Some chipsets are not quite as coherent as advertised and need |
486 | * an expensive kick to force a true read of the up-to-date seqno. | |
487 | * However, the up-to-date seqno is not always required and the last | |
488 | * seen value is good enough. Note that the seqno will always be | |
489 | * monotonic, even if not coherent. | |
490 | */ | |
38a0f2db | 491 | void (*irq_seqno_barrier)(struct intel_engine_cs *engine); |
38a0f2db | 492 | void (*cleanup)(struct intel_engine_cs *engine); |
ebc348b2 | 493 | |
3e78998a BW |
494 | /* GEN8 signal/wait table - never trust comments! |
495 | * signal to signal to signal to signal to signal to | |
496 | * RCS VCS BCS VECS VCS2 | |
497 | * -------------------------------------------------------------------- | |
498 | * RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) | | |
499 | * |------------------------------------------------------------------- | |
500 | * VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) | | |
501 | * |------------------------------------------------------------------- | |
502 | * BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) | | |
503 | * |------------------------------------------------------------------- | |
504 | * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) | NOP (0x90) | VCS2 (0x98) | | |
505 | * |------------------------------------------------------------------- | |
506 | * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP (0xc0) | | |
507 | * |------------------------------------------------------------------- | |
508 | * | |
509 | * Generalization: | |
510 | * f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id) | |
511 | * ie. transpose of g(x, y) | |
512 | * | |
513 | * sync from sync from sync from sync from sync from | |
514 | * RCS VCS BCS VECS VCS2 | |
515 | * -------------------------------------------------------------------- | |
516 | * RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) | | |
517 | * |------------------------------------------------------------------- | |
518 | * VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) | | |
519 | * |------------------------------------------------------------------- | |
520 | * BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) | | |
521 | * |------------------------------------------------------------------- | |
522 | * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) | NOP (0x90) | VCS2 (0xb8) | | |
523 | * |------------------------------------------------------------------- | |
524 | * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) | NOP (0xc0) | | |
525 | * |------------------------------------------------------------------- | |
526 | * | |
527 | * Generalization: | |
528 | * g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id) | |
529 | * ie. transpose of f(x, y) | |
530 | */ | |
ebc348b2 | 531 | struct { |
318f89ca TU |
532 | #define GEN6_SEMAPHORE_LAST VECS_HW |
533 | #define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1) | |
534 | #define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0) | |
79e6770c CW |
535 | struct { |
536 | /* our mbox written by others */ | |
537 | u32 wait[GEN6_NUM_SEMAPHORES]; | |
538 | /* mboxes this ring signals to */ | |
539 | i915_reg_t signal[GEN6_NUM_SEMAPHORES]; | |
540 | } mbox; | |
78325f2d BW |
541 | |
542 | /* AKA wait() */ | |
e61e0f51 CW |
543 | int (*sync_to)(struct i915_request *rq, |
544 | struct i915_request *signal); | |
545 | u32 *(*signal)(struct i915_request *rq, u32 *cs); | |
ebc348b2 | 546 | } semaphore; |
ad776f8b | 547 | |
b620e870 | 548 | struct intel_engine_execlists execlists; |
4da46e1e | 549 | |
e8a9c58f CW |
550 | /* Contexts are pinned whilst they are active on the GPU. The last |
551 | * context executed remains active whilst the GPU is idle - the | |
552 | * switch away and write to the context object only occurs on the | |
553 | * next execution. Contexts are only unpinned on retirement of the | |
554 | * following request ensuring that we can always write to the object | |
555 | * on the context switch even after idling. Across suspend, we switch | |
556 | * to the kernel context and trash it as the save may not happen | |
557 | * before the hardware is powered down. | |
558 | */ | |
1fc44d9b | 559 | struct intel_context *last_retired_context; |
e8a9c58f | 560 | |
3fc03069 CD |
561 | /* status_notifier: list of callbacks for context-switch changes */ |
562 | struct atomic_notifier_head context_status_notifier; | |
563 | ||
7e37f889 | 564 | struct intel_engine_hangcheck hangcheck; |
92cab734 | 565 | |
439e2ee4 | 566 | #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) |
cf669b4e | 567 | #define I915_ENGINE_SUPPORTS_STATS BIT(1) |
2a694feb | 568 | #define I915_ENGINE_HAS_PREEMPTION BIT(2) |
439e2ee4 | 569 | unsigned int flags; |
44e895a8 | 570 | |
351e3db2 | 571 | /* |
44e895a8 | 572 | * Table of commands the command parser needs to know about |
33a051a5 | 573 | * for this engine. |
351e3db2 | 574 | */ |
44e895a8 | 575 | DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); |
351e3db2 BV |
576 | |
577 | /* | |
578 | * Table of registers allowed in commands that read/write registers. | |
579 | */ | |
361b027b JJ |
580 | const struct drm_i915_reg_table *reg_tables; |
581 | int reg_table_count; | |
351e3db2 BV |
582 | |
583 | /* | |
584 | * Returns the bitmask for the length field of the specified command. | |
585 | * Return 0 for an unrecognized/invalid command. | |
586 | * | |
33a051a5 | 587 | * If the command parser finds an entry for a command in the engine's |
351e3db2 | 588 | * cmd_tables, it gets the command's length based on the table entry. |
33a051a5 CW |
589 | * If not, it calls this function to determine the per-engine length |
590 | * field encoding for the command (i.e. different opcode ranges use | |
591 | * certain bits to encode the command length in the header). | |
351e3db2 BV |
592 | */ |
593 | u32 (*get_cmd_length_mask)(u32 cmd_header); | |
30e17b78 TU |
594 | |
595 | struct { | |
596 | /** | |
597 | * @lock: Lock protecting the below fields. | |
598 | */ | |
741258cd | 599 | seqlock_t lock; |
30e17b78 TU |
600 | /** |
601 | * @enabled: Reference count indicating number of listeners. | |
602 | */ | |
603 | unsigned int enabled; | |
604 | /** | |
605 | * @active: Number of contexts currently scheduled in. | |
606 | */ | |
607 | unsigned int active; | |
608 | /** | |
609 | * @enabled_at: Timestamp when busy stats were enabled. | |
610 | */ | |
611 | ktime_t enabled_at; | |
612 | /** | |
613 | * @start: Timestamp of the last idle to active transition. | |
614 | * | |
615 | * Idle is defined as active == 0, active is active > 0. | |
616 | */ | |
617 | ktime_t start; | |
618 | /** | |
619 | * @total: Total time this engine was busy. | |
620 | * | |
621 | * Accumulated time not counting the most recent block in cases | |
622 | * where engine is currently busy (active > 0). | |
623 | */ | |
624 | ktime_t total; | |
625 | } stats; | |
8187a2b7 ZN |
626 | }; |
627 | ||
2a694feb CW |
628 | static inline bool |
629 | intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) | |
439e2ee4 TU |
630 | { |
631 | return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; | |
632 | } | |
633 | ||
2a694feb CW |
634 | static inline bool |
635 | intel_engine_supports_stats(const struct intel_engine_cs *engine) | |
cf669b4e TU |
636 | { |
637 | return engine->flags & I915_ENGINE_SUPPORTS_STATS; | |
638 | } | |
639 | ||
2a694feb CW |
640 | static inline bool |
641 | intel_engine_has_preemption(const struct intel_engine_cs *engine) | |
642 | { | |
643 | return engine->flags & I915_ENGINE_HAS_PREEMPTION; | |
644 | } | |
645 | ||
646 | static inline bool __execlists_need_preempt(int prio, int last) | |
647 | { | |
648 | return prio > max(0, last); | |
649 | } | |
650 | ||
4a118ecb CW |
651 | static inline void |
652 | execlists_set_active(struct intel_engine_execlists *execlists, | |
653 | unsigned int bit) | |
654 | { | |
655 | __set_bit(bit, (unsigned long *)&execlists->active); | |
656 | } | |
657 | ||
f2605207 CW |
658 | static inline bool |
659 | execlists_set_active_once(struct intel_engine_execlists *execlists, | |
660 | unsigned int bit) | |
661 | { | |
662 | return !__test_and_set_bit(bit, (unsigned long *)&execlists->active); | |
663 | } | |
664 | ||
4a118ecb CW |
665 | static inline void |
666 | execlists_clear_active(struct intel_engine_execlists *execlists, | |
667 | unsigned int bit) | |
668 | { | |
669 | __clear_bit(bit, (unsigned long *)&execlists->active); | |
670 | } | |
671 | ||
672 | static inline bool | |
673 | execlists_is_active(const struct intel_engine_execlists *execlists, | |
674 | unsigned int bit) | |
675 | { | |
676 | return test_bit(bit, (unsigned long *)&execlists->active); | |
677 | } | |
678 | ||
f2605207 CW |
679 | void execlists_user_begin(struct intel_engine_execlists *execlists, |
680 | const struct execlist_port *port); | |
681 | void execlists_user_end(struct intel_engine_execlists *execlists); | |
682 | ||
c41937fd MW |
683 | void |
684 | execlists_cancel_port_requests(struct intel_engine_execlists * const execlists); | |
685 | ||
686 | void | |
687 | execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); | |
688 | ||
76e70087 MK |
689 | static inline unsigned int |
690 | execlists_num_ports(const struct intel_engine_execlists * const execlists) | |
691 | { | |
692 | return execlists->port_mask + 1; | |
693 | } | |
694 | ||
f2605207 | 695 | static inline struct execlist_port * |
7a62cc61 MK |
696 | execlists_port_complete(struct intel_engine_execlists * const execlists, |
697 | struct execlist_port * const port) | |
698 | { | |
76e70087 | 699 | const unsigned int m = execlists->port_mask; |
7a62cc61 MK |
700 | |
701 | GEM_BUG_ON(port_index(port, execlists) != 0); | |
4a118ecb | 702 | GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); |
7a62cc61 | 703 | |
76e70087 MK |
704 | memmove(port, port + 1, m * sizeof(struct execlist_port)); |
705 | memset(port + m, 0, sizeof(struct execlist_port)); | |
f2605207 CW |
706 | |
707 | return port; | |
7a62cc61 MK |
708 | } |
709 | ||
59ce1310 | 710 | static inline unsigned int |
67d97da3 | 711 | intel_engine_flag(const struct intel_engine_cs *engine) |
96154f2f | 712 | { |
59ce1310 | 713 | return BIT(engine->id); |
96154f2f SV |
714 | } |
715 | ||
8187a2b7 | 716 | static inline u32 |
3ceda3a4 | 717 | intel_read_status_page(const struct intel_engine_cs *engine, int reg) |
8187a2b7 | 718 | { |
4225d0f2 | 719 | /* Ensure that the compiler doesn't optimize away the load. */ |
5dd8e50c | 720 | return READ_ONCE(engine->status_page.page_addr[reg]); |
8187a2b7 ZN |
721 | } |
722 | ||
b70ec5bf | 723 | static inline void |
9a29dd85 | 724 | intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) |
b70ec5bf | 725 | { |
9a29dd85 CW |
726 | /* Writing into the status page should be done sparingly. Since |
727 | * we do when we are uncertain of the device state, we take a bit | |
728 | * of extra paranoia to try and ensure that the HWS takes the value | |
729 | * we give and that it doesn't end up trapped inside the CPU! | |
730 | */ | |
731 | if (static_cpu_has(X86_FEATURE_CLFLUSH)) { | |
732 | mb(); | |
733 | clflush(&engine->status_page.page_addr[reg]); | |
734 | engine->status_page.page_addr[reg] = value; | |
735 | clflush(&engine->status_page.page_addr[reg]); | |
736 | mb(); | |
737 | } else { | |
738 | WRITE_ONCE(engine->status_page.page_addr[reg], value); | |
739 | } | |
b70ec5bf MK |
740 | } |
741 | ||
e2828914 | 742 | /* |
311bd68e CW |
743 | * Reads a dword out of the status page, which is written to from the command |
744 | * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or | |
745 | * MI_STORE_DATA_IMM. | |
746 | * | |
747 | * The following dwords have a reserved meaning: | |
748 | * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes. | |
749 | * 0x04: ring 0 head pointer | |
750 | * 0x05: ring 1 head pointer (915-class) | |
751 | * 0x06: ring 2 head pointer (915-class) | |
752 | * 0x10-0x1b: Context status DWords (GM45) | |
753 | * 0x1f: Last written status offset. (GM45) | |
b07da53c | 754 | * 0x20-0x2f: Reserved (Gen6+) |
311bd68e | 755 | * |
b07da53c | 756 | * The area from dword 0x30 to 0x3ff is available for driver usage. |
311bd68e | 757 | */ |
b07da53c | 758 | #define I915_GEM_HWS_INDEX 0x30 |
7c17d377 | 759 | #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) |
3b8a8a30 MW |
760 | #define I915_GEM_HWS_PREEMPT_INDEX 0x32 |
761 | #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT) | |
b07da53c | 762 | #define I915_GEM_HWS_SCRATCH_INDEX 0x40 |
9a289771 | 763 | #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) |
311bd68e | 764 | |
6d2cb5aa | 765 | #define I915_HWS_CSB_BUF0_INDEX 0x10 |
767a983a CW |
766 | #define I915_HWS_CSB_WRITE_INDEX 0x1f |
767 | #define CNL_HWS_CSB_WRITE_INDEX 0x2f | |
6d2cb5aa | 768 | |
7e37f889 | 769 | struct intel_ring * |
65fcb806 | 770 | intel_engine_create_ring(struct intel_engine_cs *engine, |
a89d1f92 | 771 | struct i915_timeline *timeline, |
65fcb806 | 772 | int size); |
d822bb18 CW |
773 | int intel_ring_pin(struct intel_ring *ring, |
774 | struct drm_i915_private *i915, | |
775 | unsigned int offset_bias); | |
e6ba9992 | 776 | void intel_ring_reset(struct intel_ring *ring, u32 tail); |
95aebcb2 | 777 | unsigned int intel_ring_update_space(struct intel_ring *ring); |
aad29fbb | 778 | void intel_ring_unpin(struct intel_ring *ring); |
7e37f889 | 779 | void intel_ring_free(struct intel_ring *ring); |
84c2377f | 780 | |
7e37f889 CW |
781 | void intel_engine_stop(struct intel_engine_cs *engine); |
782 | void intel_engine_cleanup(struct intel_engine_cs *engine); | |
96f298aa | 783 | |
821ed7df CW |
784 | void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); |
785 | ||
e61e0f51 | 786 | int __must_check intel_ring_cacheline_align(struct i915_request *rq); |
406ea8d2 | 787 | |
fd138212 | 788 | int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes); |
e61e0f51 | 789 | u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n); |
406ea8d2 | 790 | |
e61e0f51 | 791 | static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) |
09246732 | 792 | { |
8f942018 CW |
793 | /* Dummy function. |
794 | * | |
795 | * This serves as a placeholder in the code so that the reader | |
796 | * can compare against the preceding intel_ring_begin() and | |
797 | * check that the number of dwords emitted matches the space | |
798 | * reserved for the command packet (i.e. the value passed to | |
799 | * intel_ring_begin()). | |
c5efa1ad | 800 | */ |
e61e0f51 | 801 | GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); |
8f942018 CW |
802 | } |
803 | ||
e61e0f51 | 804 | static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) |
450362d3 CW |
805 | { |
806 | return pos & (ring->size - 1); | |
807 | } | |
808 | ||
41d37680 CW |
809 | static inline bool |
810 | intel_ring_offset_valid(const struct intel_ring *ring, | |
811 | unsigned int pos) | |
812 | { | |
813 | if (pos & -ring->size) /* must be strictly within the ring */ | |
814 | return false; | |
815 | ||
816 | if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */ | |
817 | return false; | |
818 | ||
819 | return true; | |
820 | } | |
821 | ||
e61e0f51 | 822 | static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) |
8f942018 CW |
823 | { |
824 | /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ | |
e61e0f51 CW |
825 | u32 offset = addr - rq->ring->vaddr; |
826 | GEM_BUG_ON(offset > rq->ring->size); | |
827 | return intel_ring_wrap(rq->ring, offset); | |
09246732 | 828 | } |
406ea8d2 | 829 | |
ed1501d4 CW |
830 | static inline void |
831 | assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) | |
832 | { | |
41d37680 | 833 | GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); |
605d5b32 CW |
834 | |
835 | /* | |
836 | * "Ring Buffer Use" | |
837 | * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 | |
838 | * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 | |
839 | * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 | |
840 | * "If the Ring Buffer Head Pointer and the Tail Pointer are on the | |
841 | * same cacheline, the Head Pointer must not be greater than the Tail | |
842 | * Pointer." | |
843 | * | |
844 | * We use ring->head as the last known location of the actual RING_HEAD, | |
845 | * it may have advanced but in the worst case it is equally the same | |
846 | * as ring->head and so we should never program RING_TAIL to advance | |
847 | * into the same cacheline as ring->head. | |
848 | */ | |
849 | #define cacheline(a) round_down(a, CACHELINE_BYTES) | |
850 | GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && | |
851 | tail < ring->head); | |
852 | #undef cacheline | |
ed1501d4 CW |
853 | } |
854 | ||
e6ba9992 CW |
855 | static inline unsigned int |
856 | intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) | |
857 | { | |
858 | /* Whilst writes to the tail are strictly order, there is no | |
859 | * serialisation between readers and the writers. The tail may be | |
e61e0f51 | 860 | * read by i915_request_retire() just as it is being updated |
e6ba9992 CW |
861 | * by execlists, as although the breadcrumb is complete, the context |
862 | * switch hasn't been seen. | |
863 | */ | |
864 | assert_ring_tail_valid(ring, tail); | |
865 | ring->tail = tail; | |
866 | return tail; | |
867 | } | |
09246732 | 868 | |
73cb9701 | 869 | void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); |
8187a2b7 | 870 | |
019bf277 TU |
871 | void intel_engine_setup_common(struct intel_engine_cs *engine); |
872 | int intel_engine_init_common(struct intel_engine_cs *engine); | |
96a945aa | 873 | void intel_engine_cleanup_common(struct intel_engine_cs *engine); |
019bf277 | 874 | |
d9d117e4 CW |
875 | int intel_engine_create_scratch(struct intel_engine_cs *engine, |
876 | unsigned int size); | |
877 | void intel_engine_cleanup_scratch(struct intel_engine_cs *engine); | |
878 | ||
8b3e2d36 TU |
879 | int intel_init_render_ring_buffer(struct intel_engine_cs *engine); |
880 | int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); | |
8b3e2d36 TU |
881 | int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); |
882 | int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); | |
8187a2b7 | 883 | |
3f6e9822 CW |
884 | int intel_engine_stop_cs(struct intel_engine_cs *engine); |
885 | ||
3ceda3a4 CW |
886 | u64 intel_engine_get_active_head(const struct intel_engine_cs *engine); |
887 | u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); | |
1b36595f | 888 | |
1b7744e7 CW |
889 | static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) |
890 | { | |
891 | return intel_read_status_page(engine, I915_GEM_HWS_INDEX); | |
892 | } | |
79f321b7 | 893 | |
cb399eab CW |
894 | static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) |
895 | { | |
896 | /* We are only peeking at the tail of the submit queue (and not the | |
897 | * queue itself) in order to gain a hint as to the current active | |
898 | * state of the engine. Callers are not expected to be taking | |
899 | * engine->timeline->lock, nor are they expected to be concerned | |
900 | * wtih serialising this hint with anything, so document it as | |
901 | * a hint and nothing more. | |
902 | */ | |
a89d1f92 | 903 | return READ_ONCE(engine->timeline.seqno); |
cb399eab CW |
904 | } |
905 | ||
0e704476 CW |
906 | void intel_engine_get_instdone(struct intel_engine_cs *engine, |
907 | struct intel_instdone *instdone); | |
908 | ||
29b1b415 JH |
909 | /* |
910 | * Arbitrary size for largest possible 'add request' sequence. The code paths | |
911 | * are complex and variable. Empirical measurement shows that the worst case | |
596e5efc CW |
912 | * is BDW at 192 bytes (6 + 6 + 36 dwords), then ILK at 136 bytes. However, |
913 | * we need to allocate double the largest single packet within that emission | |
914 | * to account for tail wraparound (so 6 + 6 + 72 dwords for BDW). | |
29b1b415 | 915 | */ |
596e5efc | 916 | #define MIN_SPACE_FOR_ADD_REQUEST 336 |
29b1b415 | 917 | |
a58c01aa CW |
918 | static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) |
919 | { | |
57e88531 | 920 | return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; |
a58c01aa CW |
921 | } |
922 | ||
3b8a8a30 MW |
923 | static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) |
924 | { | |
925 | return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR; | |
926 | } | |
927 | ||
688e6c72 | 928 | /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ |
688e6c72 CW |
929 | int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); |
930 | ||
56299fb7 | 931 | static inline void intel_wait_init(struct intel_wait *wait, |
e61e0f51 | 932 | struct i915_request *rq) |
688e6c72 CW |
933 | { |
934 | wait->tsk = current; | |
56299fb7 | 935 | wait->request = rq; |
754c9fd5 CW |
936 | } |
937 | ||
938 | static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) | |
939 | { | |
940 | wait->tsk = current; | |
941 | wait->seqno = seqno; | |
942 | } | |
943 | ||
944 | static inline bool intel_wait_has_seqno(const struct intel_wait *wait) | |
945 | { | |
946 | return wait->seqno; | |
947 | } | |
948 | ||
949 | static inline bool | |
950 | intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) | |
951 | { | |
688e6c72 | 952 | wait->seqno = seqno; |
754c9fd5 CW |
953 | return intel_wait_has_seqno(wait); |
954 | } | |
955 | ||
956 | static inline bool | |
957 | intel_wait_update_request(struct intel_wait *wait, | |
e61e0f51 | 958 | const struct i915_request *rq) |
754c9fd5 | 959 | { |
e61e0f51 | 960 | return intel_wait_update_seqno(wait, i915_request_global_seqno(rq)); |
754c9fd5 CW |
961 | } |
962 | ||
963 | static inline bool | |
964 | intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) | |
965 | { | |
966 | return wait->seqno == seqno; | |
967 | } | |
968 | ||
969 | static inline bool | |
970 | intel_wait_check_request(const struct intel_wait *wait, | |
e61e0f51 | 971 | const struct i915_request *rq) |
754c9fd5 | 972 | { |
e61e0f51 | 973 | return intel_wait_check_seqno(wait, i915_request_global_seqno(rq)); |
688e6c72 CW |
974 | } |
975 | ||
976 | static inline bool intel_wait_complete(const struct intel_wait *wait) | |
977 | { | |
978 | return RB_EMPTY_NODE(&wait->node); | |
979 | } | |
980 | ||
981 | bool intel_engine_add_wait(struct intel_engine_cs *engine, | |
982 | struct intel_wait *wait); | |
983 | void intel_engine_remove_wait(struct intel_engine_cs *engine, | |
984 | struct intel_wait *wait); | |
6f9ec414 | 985 | bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup); |
e61e0f51 | 986 | void intel_engine_cancel_signaling(struct i915_request *request); |
688e6c72 | 987 | |
dbd6ef29 | 988 | static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) |
688e6c72 | 989 | { |
61d3dc70 | 990 | return READ_ONCE(engine->breadcrumbs.irq_wait); |
688e6c72 CW |
991 | } |
992 | ||
8d769ea7 CW |
993 | unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); |
994 | #define ENGINE_WAKEUP_WAITER BIT(0) | |
67b807a8 CW |
995 | #define ENGINE_WAKEUP_ASLEEP BIT(1) |
996 | ||
bcbd5c33 CW |
997 | void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); |
998 | void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); | |
999 | ||
67b807a8 CW |
1000 | void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); |
1001 | void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); | |
688e6c72 | 1002 | |
ad07dfcd | 1003 | void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); |
688e6c72 | 1004 | void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); |
688e6c72 | 1005 | |
9f235dfa TU |
1006 | static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) |
1007 | { | |
1008 | memset(batch, 0, 6 * sizeof(u32)); | |
1009 | ||
1010 | batch[0] = GFX_OP_PIPE_CONTROL(6); | |
1011 | batch[1] = flags; | |
1012 | batch[2] = offset; | |
1013 | ||
1014 | return batch + 6; | |
1015 | } | |
1016 | ||
df77cd83 MW |
1017 | static inline u32 * |
1018 | gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset) | |
1019 | { | |
1020 | /* We're using qword write, offset should be aligned to 8 bytes. */ | |
1021 | GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); | |
1022 | ||
1023 | /* w/a for post sync ops following a GPGPU operation we | |
1024 | * need a prior CS_STALL, which is emitted by the flush | |
1025 | * following the batch. | |
1026 | */ | |
1027 | *cs++ = GFX_OP_PIPE_CONTROL(6); | |
1028 | *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | | |
1029 | PIPE_CONTROL_QW_WRITE; | |
1030 | *cs++ = gtt_offset; | |
1031 | *cs++ = 0; | |
1032 | *cs++ = value; | |
1033 | /* We're thrashing one dword of HWS. */ | |
1034 | *cs++ = 0; | |
1035 | ||
1036 | return cs; | |
1037 | } | |
1038 | ||
1039 | static inline u32 * | |
1040 | gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) | |
1041 | { | |
1042 | /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ | |
1043 | GEM_BUG_ON(gtt_offset & (1 << 5)); | |
1044 | /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ | |
1045 | GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); | |
1046 | ||
1047 | *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; | |
1048 | *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT; | |
1049 | *cs++ = 0; | |
1050 | *cs++ = value; | |
1051 | ||
1052 | return cs; | |
1053 | } | |
1054 | ||
5400367a | 1055 | bool intel_engine_is_idle(struct intel_engine_cs *engine); |
05425249 | 1056 | bool intel_engines_are_idle(struct drm_i915_private *dev_priv); |
5400367a | 1057 | |
20ccd4d3 | 1058 | bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine); |
01278cb1 | 1059 | void intel_engine_lost_context(struct intel_engine_cs *engine); |
20ccd4d3 | 1060 | |
aba5e278 CW |
1061 | void intel_engines_park(struct drm_i915_private *i915); |
1062 | void intel_engines_unpark(struct drm_i915_private *i915); | |
1063 | ||
ff44ad51 | 1064 | void intel_engines_reset_default_submission(struct drm_i915_private *i915); |
d2b4b979 | 1065 | unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); |
ff44ad51 | 1066 | |
90cad095 | 1067 | bool intel_engine_can_store_dword(struct intel_engine_cs *engine); |
f2f5c061 | 1068 | |
0db18b17 CW |
1069 | __printf(3, 4) |
1070 | void intel_engine_dump(struct intel_engine_cs *engine, | |
1071 | struct drm_printer *m, | |
1072 | const char *header, ...); | |
f636edb2 | 1073 | |
b46a33e2 TU |
1074 | struct intel_engine_cs * |
1075 | intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance); | |
1076 | ||
30e17b78 TU |
1077 | static inline void intel_engine_context_in(struct intel_engine_cs *engine) |
1078 | { | |
1079 | unsigned long flags; | |
1080 | ||
1081 | if (READ_ONCE(engine->stats.enabled) == 0) | |
1082 | return; | |
1083 | ||
741258cd | 1084 | write_seqlock_irqsave(&engine->stats.lock, flags); |
30e17b78 TU |
1085 | |
1086 | if (engine->stats.enabled > 0) { | |
1087 | if (engine->stats.active++ == 0) | |
1088 | engine->stats.start = ktime_get(); | |
1089 | GEM_BUG_ON(engine->stats.active == 0); | |
1090 | } | |
1091 | ||
741258cd | 1092 | write_sequnlock_irqrestore(&engine->stats.lock, flags); |
30e17b78 TU |
1093 | } |
1094 | ||
1095 | static inline void intel_engine_context_out(struct intel_engine_cs *engine) | |
1096 | { | |
1097 | unsigned long flags; | |
1098 | ||
1099 | if (READ_ONCE(engine->stats.enabled) == 0) | |
1100 | return; | |
1101 | ||
741258cd | 1102 | write_seqlock_irqsave(&engine->stats.lock, flags); |
30e17b78 TU |
1103 | |
1104 | if (engine->stats.enabled > 0) { | |
1105 | ktime_t last; | |
1106 | ||
1107 | if (engine->stats.active && --engine->stats.active == 0) { | |
1108 | /* | |
1109 | * Decrement the active context count and in case GPU | |
1110 | * is now idle add up to the running total. | |
1111 | */ | |
1112 | last = ktime_sub(ktime_get(), engine->stats.start); | |
1113 | ||
1114 | engine->stats.total = ktime_add(engine->stats.total, | |
1115 | last); | |
1116 | } else if (engine->stats.active == 0) { | |
1117 | /* | |
1118 | * After turning on engine stats, context out might be | |
1119 | * the first event in which case we account from the | |
1120 | * time stats gathering was turned on. | |
1121 | */ | |
1122 | last = ktime_sub(ktime_get(), engine->stats.enabled_at); | |
1123 | ||
1124 | engine->stats.total = ktime_add(engine->stats.total, | |
1125 | last); | |
1126 | } | |
1127 | } | |
1128 | ||
741258cd | 1129 | write_sequnlock_irqrestore(&engine->stats.lock, flags); |
30e17b78 TU |
1130 | } |
1131 | ||
1132 | int intel_enable_engine_stats(struct intel_engine_cs *engine); | |
1133 | void intel_disable_engine_stats(struct intel_engine_cs *engine); | |
1134 | ||
1135 | ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); | |
1136 | ||
8187a2b7 | 1137 | #endif /* _INTEL_RINGBUFFER_H_ */ |