]> Git Repo - linux.git/blob - drivers/gpu/drm/i915/gt/gen8_engine_cs.c
Merge tag 'cxl-for-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
[linux.git] / drivers / gpu / drm / i915 / gt / gen8_engine_cs.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2014 Intel Corporation
4  */
5
6 #include "gen8_engine_cs.h"
7 #include "i915_drv.h"
8 #include "intel_engine_regs.h"
9 #include "intel_gpu_commands.h"
10 #include "intel_lrc.h"
11 #include "intel_ring.h"
12
13 int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode)
14 {
15         bool vf_flush_wa = false, dc_flush_wa = false;
16         u32 *cs, flags = 0;
17         int len;
18
19         flags |= PIPE_CONTROL_CS_STALL;
20
21         if (mode & EMIT_FLUSH) {
22                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
23                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
24                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
25                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
26         }
27
28         if (mode & EMIT_INVALIDATE) {
29                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
30                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
31                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
32                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
33                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
34                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
35                 flags |= PIPE_CONTROL_QW_WRITE;
36                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
37
38                 /*
39                  * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
40                  * pipe control.
41                  */
42                 if (GRAPHICS_VER(rq->engine->i915) == 9)
43                         vf_flush_wa = true;
44
45                 /* WaForGAMHang:kbl */
46                 if (IS_KBL_GRAPHICS_STEP(rq->engine->i915, 0, STEP_C0))
47                         dc_flush_wa = true;
48         }
49
50         len = 6;
51
52         if (vf_flush_wa)
53                 len += 6;
54
55         if (dc_flush_wa)
56                 len += 12;
57
58         cs = intel_ring_begin(rq, len);
59         if (IS_ERR(cs))
60                 return PTR_ERR(cs);
61
62         if (vf_flush_wa)
63                 cs = gen8_emit_pipe_control(cs, 0, 0);
64
65         if (dc_flush_wa)
66                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
67                                             0);
68
69         cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
70
71         if (dc_flush_wa)
72                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
73
74         intel_ring_advance(rq, cs);
75
76         return 0;
77 }
78
79 int gen8_emit_flush_xcs(struct i915_request *rq, u32 mode)
80 {
81         u32 cmd, *cs;
82
83         cs = intel_ring_begin(rq, 4);
84         if (IS_ERR(cs))
85                 return PTR_ERR(cs);
86
87         cmd = MI_FLUSH_DW + 1;
88
89         /*
90          * We always require a command barrier so that subsequent
91          * commands, such as breadcrumb interrupts, are strictly ordered
92          * wrt the contents of the write cache being flushed to memory
93          * (and thus being coherent from the CPU).
94          */
95         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
96
97         if (mode & EMIT_INVALIDATE) {
98                 cmd |= MI_INVALIDATE_TLB;
99                 if (rq->engine->class == VIDEO_DECODE_CLASS)
100                         cmd |= MI_INVALIDATE_BSD;
101         }
102
103         *cs++ = cmd;
104         *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
105         *cs++ = 0; /* upper addr */
106         *cs++ = 0; /* value */
107         intel_ring_advance(rq, cs);
108
109         return 0;
110 }
111
112 int gen11_emit_flush_rcs(struct i915_request *rq, u32 mode)
113 {
114         if (mode & EMIT_FLUSH) {
115                 u32 *cs;
116                 u32 flags = 0;
117
118                 flags |= PIPE_CONTROL_CS_STALL;
119
120                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
121                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
122                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
123                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
124                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
125                 flags |= PIPE_CONTROL_QW_WRITE;
126                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
127
128                 cs = intel_ring_begin(rq, 6);
129                 if (IS_ERR(cs))
130                         return PTR_ERR(cs);
131
132                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
133                 intel_ring_advance(rq, cs);
134         }
135
136         if (mode & EMIT_INVALIDATE) {
137                 u32 *cs;
138                 u32 flags = 0;
139
140                 flags |= PIPE_CONTROL_CS_STALL;
141
142                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
143                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
144                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
145                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
146                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
147                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
148                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
149                 flags |= PIPE_CONTROL_QW_WRITE;
150                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
151
152                 cs = intel_ring_begin(rq, 6);
153                 if (IS_ERR(cs))
154                         return PTR_ERR(cs);
155
156                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
157                 intel_ring_advance(rq, cs);
158         }
159
160         return 0;
161 }
162
163 static u32 preparser_disable(bool state)
164 {
165         return MI_ARB_CHECK | 1 << 8 | state;
166 }
167
168 u32 *gen12_emit_aux_table_inv(u32 *cs, const i915_reg_t inv_reg)
169 {
170         *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
171         *cs++ = i915_mmio_reg_offset(inv_reg);
172         *cs++ = AUX_INV;
173         *cs++ = MI_NOOP;
174
175         return cs;
176 }
177
178 int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
179 {
180         struct intel_engine_cs *engine = rq->engine;
181
182         if (mode & EMIT_FLUSH) {
183                 u32 flags = 0;
184                 u32 *cs;
185
186                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
187                 flags |= PIPE_CONTROL_FLUSH_L3;
188                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
189                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
190                 /* Wa_1409600907:tgl,adl-p */
191                 flags |= PIPE_CONTROL_DEPTH_STALL;
192                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
193                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
194
195                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
196                 flags |= PIPE_CONTROL_QW_WRITE;
197
198                 flags |= PIPE_CONTROL_CS_STALL;
199
200                 if (!HAS_3D_PIPELINE(engine->i915))
201                         flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
202                 else if (engine->class == COMPUTE_CLASS)
203                         flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
204
205                 cs = intel_ring_begin(rq, 6);
206                 if (IS_ERR(cs))
207                         return PTR_ERR(cs);
208
209                 cs = gen12_emit_pipe_control(cs,
210                                              PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
211                                              flags, LRC_PPHWSP_SCRATCH_ADDR);
212                 intel_ring_advance(rq, cs);
213         }
214
215         if (mode & EMIT_INVALIDATE) {
216                 u32 flags = 0;
217                 u32 *cs, count;
218
219                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
220                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
221                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
222                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
223                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
224                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
225                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
226
227                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
228                 flags |= PIPE_CONTROL_QW_WRITE;
229
230                 flags |= PIPE_CONTROL_CS_STALL;
231
232                 if (!HAS_3D_PIPELINE(engine->i915))
233                         flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
234                 else if (engine->class == COMPUTE_CLASS)
235                         flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
236
237                 if (!HAS_FLAT_CCS(rq->engine->i915))
238                         count = 8 + 4;
239                 else
240                         count = 8;
241
242                 cs = intel_ring_begin(rq, count);
243                 if (IS_ERR(cs))
244                         return PTR_ERR(cs);
245
246                 /*
247                  * Prevent the pre-parser from skipping past the TLB
248                  * invalidate and loading a stale page for the batch
249                  * buffer / request payload.
250                  */
251                 *cs++ = preparser_disable(true);
252
253                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
254
255                 if (!HAS_FLAT_CCS(rq->engine->i915)) {
256                         /* hsdes: 1809175790 */
257                         cs = gen12_emit_aux_table_inv(cs, GEN12_GFX_CCS_AUX_NV);
258                 }
259
260                 *cs++ = preparser_disable(false);
261                 intel_ring_advance(rq, cs);
262         }
263
264         return 0;
265 }
266
267 int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
268 {
269         intel_engine_mask_t aux_inv = 0;
270         u32 cmd, *cs;
271
272         cmd = 4;
273         if (mode & EMIT_INVALIDATE) {
274                 cmd += 2;
275
276                 if (!HAS_FLAT_CCS(rq->engine->i915) &&
277                     (rq->engine->class == VIDEO_DECODE_CLASS ||
278                      rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
279                         aux_inv = rq->engine->mask &
280                                 ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
281                         if (aux_inv)
282                                 cmd += 4;
283                 }
284         }
285
286         cs = intel_ring_begin(rq, cmd);
287         if (IS_ERR(cs))
288                 return PTR_ERR(cs);
289
290         if (mode & EMIT_INVALIDATE)
291                 *cs++ = preparser_disable(true);
292
293         cmd = MI_FLUSH_DW + 1;
294
295         /*
296          * We always require a command barrier so that subsequent
297          * commands, such as breadcrumb interrupts, are strictly ordered
298          * wrt the contents of the write cache being flushed to memory
299          * (and thus being coherent from the CPU).
300          */
301         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
302
303         if (mode & EMIT_INVALIDATE) {
304                 cmd |= MI_INVALIDATE_TLB;
305                 if (rq->engine->class == VIDEO_DECODE_CLASS)
306                         cmd |= MI_INVALIDATE_BSD;
307         }
308
309         *cs++ = cmd;
310         *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
311         *cs++ = 0; /* upper addr */
312         *cs++ = 0; /* value */
313
314         if (aux_inv) { /* hsdes: 1809175790 */
315                 if (rq->engine->class == VIDEO_DECODE_CLASS)
316                         cs = gen12_emit_aux_table_inv(cs, GEN12_VD0_AUX_NV);
317                 else
318                         cs = gen12_emit_aux_table_inv(cs, GEN12_VE0_AUX_NV);
319         }
320
321         if (mode & EMIT_INVALIDATE)
322                 *cs++ = preparser_disable(false);
323
324         intel_ring_advance(rq, cs);
325
326         return 0;
327 }
328
329 static u32 preempt_address(struct intel_engine_cs *engine)
330 {
331         return (i915_ggtt_offset(engine->status_page.vma) +
332                 I915_GEM_HWS_PREEMPT_ADDR);
333 }
334
335 static u32 hwsp_offset(const struct i915_request *rq)
336 {
337         const struct intel_timeline *tl;
338
339         /* Before the request is executed, the timeline is fixed */
340         tl = rcu_dereference_protected(rq->timeline,
341                                        !i915_request_signaled(rq));
342
343         /* See the comment in i915_request_active_seqno(). */
344         return page_mask_bits(tl->hwsp_offset) + offset_in_page(rq->hwsp_seqno);
345 }
346
347 int gen8_emit_init_breadcrumb(struct i915_request *rq)
348 {
349         u32 *cs;
350
351         GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
352         if (!i915_request_timeline(rq)->has_initial_breadcrumb)
353                 return 0;
354
355         cs = intel_ring_begin(rq, 6);
356         if (IS_ERR(cs))
357                 return PTR_ERR(cs);
358
359         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
360         *cs++ = hwsp_offset(rq);
361         *cs++ = 0;
362         *cs++ = rq->fence.seqno - 1;
363
364         /*
365          * Check if we have been preempted before we even get started.
366          *
367          * After this point i915_request_started() reports true, even if
368          * we get preempted and so are no longer running.
369          *
370          * i915_request_started() is used during preemption processing
371          * to decide if the request is currently inside the user payload
372          * or spinning on a kernel semaphore (or earlier). For no-preemption
373          * requests, we do allow preemption on the semaphore before the user
374          * payload, but do not allow preemption once the request is started.
375          *
376          * i915_request_started() is similarly used during GPU hangs to
377          * determine if the user's payload was guilty, and if so, the
378          * request is banned. Before the request is started, it is assumed
379          * to be unharmed and an innocent victim of another's hang.
380          */
381         *cs++ = MI_NOOP;
382         *cs++ = MI_ARB_CHECK;
383
384         intel_ring_advance(rq, cs);
385
386         /* Record the updated position of the request's payload */
387         rq->infix = intel_ring_offset(rq, cs);
388
389         __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
390
391         return 0;
392 }
393
394 static int __gen125_emit_bb_start(struct i915_request *rq,
395                                   u64 offset, u32 len,
396                                   const unsigned int flags,
397                                   u32 arb)
398 {
399         struct intel_context *ce = rq->context;
400         u32 wa_offset = lrc_indirect_bb(ce);
401         u32 *cs;
402
403         cs = intel_ring_begin(rq, 12);
404         if (IS_ERR(cs))
405                 return PTR_ERR(cs);
406
407         *cs++ = MI_ARB_ON_OFF | arb;
408
409         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
410                 MI_SRM_LRM_GLOBAL_GTT |
411                 MI_LRI_LRM_CS_MMIO;
412         *cs++ = i915_mmio_reg_offset(RING_PREDICATE_RESULT(0));
413         *cs++ = wa_offset + DG2_PREDICATE_RESULT_WA;
414         *cs++ = 0;
415
416         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
417                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
418         *cs++ = lower_32_bits(offset);
419         *cs++ = upper_32_bits(offset);
420
421         /* Fixup stray MI_SET_PREDICATE as it prevents us executing the ring */
422         *cs++ = MI_BATCH_BUFFER_START_GEN8;
423         *cs++ = wa_offset + DG2_PREDICATE_RESULT_BB;
424         *cs++ = 0;
425
426         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
427
428         intel_ring_advance(rq, cs);
429
430         return 0;
431 }
432
433 int gen125_emit_bb_start_noarb(struct i915_request *rq,
434                                u64 offset, u32 len,
435                                const unsigned int flags)
436 {
437         return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE);
438 }
439
440 int gen125_emit_bb_start(struct i915_request *rq,
441                          u64 offset, u32 len,
442                          const unsigned int flags)
443 {
444         return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE);
445 }
446
447 int gen8_emit_bb_start_noarb(struct i915_request *rq,
448                              u64 offset, u32 len,
449                              const unsigned int flags)
450 {
451         u32 *cs;
452
453         cs = intel_ring_begin(rq, 4);
454         if (IS_ERR(cs))
455                 return PTR_ERR(cs);
456
457         /*
458          * WaDisableCtxRestoreArbitration:bdw,chv
459          *
460          * We don't need to perform MI_ARB_ENABLE as often as we do (in
461          * particular all the gen that do not need the w/a at all!), if we
462          * took care to make sure that on every switch into this context
463          * (both ordinary and for preemption) that arbitrartion was enabled
464          * we would be fine.  However, for gen8 there is another w/a that
465          * requires us to not preempt inside GPGPU execution, so we keep
466          * arbitration disabled for gen8 batches. Arbitration will be
467          * re-enabled before we close the request
468          * (engine->emit_fini_breadcrumb).
469          */
470         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
471
472         /* FIXME(BDW+): Address space and security selectors. */
473         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
474                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
475         *cs++ = lower_32_bits(offset);
476         *cs++ = upper_32_bits(offset);
477
478         intel_ring_advance(rq, cs);
479
480         return 0;
481 }
482
483 int gen8_emit_bb_start(struct i915_request *rq,
484                        u64 offset, u32 len,
485                        const unsigned int flags)
486 {
487         u32 *cs;
488
489         if (unlikely(i915_request_has_nopreempt(rq)))
490                 return gen8_emit_bb_start_noarb(rq, offset, len, flags);
491
492         cs = intel_ring_begin(rq, 6);
493         if (IS_ERR(cs))
494                 return PTR_ERR(cs);
495
496         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
497
498         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
499                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
500         *cs++ = lower_32_bits(offset);
501         *cs++ = upper_32_bits(offset);
502
503         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
504         *cs++ = MI_NOOP;
505
506         intel_ring_advance(rq, cs);
507
508         return 0;
509 }
510
511 static void assert_request_valid(struct i915_request *rq)
512 {
513         struct intel_ring *ring __maybe_unused = rq->ring;
514
515         /* Can we unwind this request without appearing to go forwards? */
516         GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0);
517 }
518
519 /*
520  * Reserve space for 2 NOOPs at the end of each request to be
521  * used as a workaround for not being allowed to do lite
522  * restore with HEAD==TAIL (WaIdleLiteRestore).
523  */
524 static u32 *gen8_emit_wa_tail(struct i915_request *rq, u32 *cs)
525 {
526         /* Ensure there's always at least one preemption point per-request. */
527         *cs++ = MI_ARB_CHECK;
528         *cs++ = MI_NOOP;
529         rq->wa_tail = intel_ring_offset(rq, cs);
530
531         /* Check that entire request is less than half the ring */
532         assert_request_valid(rq);
533
534         return cs;
535 }
536
537 static u32 *emit_preempt_busywait(struct i915_request *rq, u32 *cs)
538 {
539         *cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */
540         *cs++ = MI_SEMAPHORE_WAIT |
541                 MI_SEMAPHORE_GLOBAL_GTT |
542                 MI_SEMAPHORE_POLL |
543                 MI_SEMAPHORE_SAD_EQ_SDD;
544         *cs++ = 0;
545         *cs++ = preempt_address(rq->engine);
546         *cs++ = 0;
547         *cs++ = MI_NOOP;
548
549         return cs;
550 }
551
552 static __always_inline u32*
553 gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
554 {
555         *cs++ = MI_USER_INTERRUPT;
556
557         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
558         if (intel_engine_has_semaphores(rq->engine) &&
559             !intel_uc_uses_guc_submission(&rq->engine->gt->uc))
560                 cs = emit_preempt_busywait(rq, cs);
561
562         rq->tail = intel_ring_offset(rq, cs);
563         assert_ring_tail_valid(rq->ring, rq->tail);
564
565         return gen8_emit_wa_tail(rq, cs);
566 }
567
568 static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
569 {
570         return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
571 }
572
573 u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
574 {
575         return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
576 }
577
578 u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
579 {
580         cs = gen8_emit_pipe_control(cs,
581                                     PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
582                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
583                                     PIPE_CONTROL_DC_FLUSH_ENABLE,
584                                     0);
585
586         /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
587         cs = gen8_emit_ggtt_write_rcs(cs,
588                                       rq->fence.seqno,
589                                       hwsp_offset(rq),
590                                       PIPE_CONTROL_FLUSH_ENABLE |
591                                       PIPE_CONTROL_CS_STALL);
592
593         return gen8_emit_fini_breadcrumb_tail(rq, cs);
594 }
595
596 u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
597 {
598         cs = gen8_emit_ggtt_write_rcs(cs,
599                                       rq->fence.seqno,
600                                       hwsp_offset(rq),
601                                       PIPE_CONTROL_CS_STALL |
602                                       PIPE_CONTROL_TILE_CACHE_FLUSH |
603                                       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
604                                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
605                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
606                                       PIPE_CONTROL_FLUSH_ENABLE);
607
608         return gen8_emit_fini_breadcrumb_tail(rq, cs);
609 }
610
611 /*
612  * Note that the CS instruction pre-parser will not stall on the breadcrumb
613  * flush and will continue pre-fetching the instructions after it before the
614  * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
615  * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
616  * of the next request before the memory has been flushed, we're guaranteed that
617  * we won't access the batch itself too early.
618  * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
619  * so, if the current request is modifying an instruction in the next request on
620  * the same intel_context, we might pre-fetch and then execute the pre-update
621  * instruction. To avoid this, the users of self-modifying code should either
622  * disable the parser around the code emitting the memory writes, via a new flag
623  * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
624  * the in-kernel use-cases we've opted to use a separate context, see
625  * reloc_gpu() as an example.
626  * All the above applies only to the instructions themselves. Non-inline data
627  * used by the instructions is not pre-fetched.
628  */
629
630 static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs)
631 {
632         *cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */
633         *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
634                 MI_SEMAPHORE_GLOBAL_GTT |
635                 MI_SEMAPHORE_POLL |
636                 MI_SEMAPHORE_SAD_EQ_SDD;
637         *cs++ = 0;
638         *cs++ = preempt_address(rq->engine);
639         *cs++ = 0;
640         *cs++ = 0;
641
642         return cs;
643 }
644
645 /* Wa_14014475959:dg2 */
646 #define CCS_SEMAPHORE_PPHWSP_OFFSET     0x540
647 static u32 ccs_semaphore_offset(struct i915_request *rq)
648 {
649         return i915_ggtt_offset(rq->context->state) +
650                 (LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET;
651 }
652
653 /* Wa_14014475959:dg2 */
654 static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)
655 {
656         int i;
657
658         *cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL |
659                 MI_ATOMIC_MOVE;
660         *cs++ = ccs_semaphore_offset(rq);
661         *cs++ = 0;
662         *cs++ = 1;
663
664         /*
665          * When MI_ATOMIC_INLINE_DATA set this command must be 11 DW + (1 NOP)
666          * to align. 4 DWs above + 8 filler DWs here.
667          */
668         for (i = 0; i < 8; ++i)
669                 *cs++ = 0;
670
671         *cs++ = MI_SEMAPHORE_WAIT |
672                 MI_SEMAPHORE_GLOBAL_GTT |
673                 MI_SEMAPHORE_POLL |
674                 MI_SEMAPHORE_SAD_EQ_SDD;
675         *cs++ = 0;
676         *cs++ = ccs_semaphore_offset(rq);
677         *cs++ = 0;
678
679         return cs;
680 }
681
682 static __always_inline u32*
683 gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
684 {
685         *cs++ = MI_USER_INTERRUPT;
686
687         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
688         if (intel_engine_has_semaphores(rq->engine) &&
689             !intel_uc_uses_guc_submission(&rq->engine->gt->uc))
690                 cs = gen12_emit_preempt_busywait(rq, cs);
691
692         /* Wa_14014475959:dg2 */
693         if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine))
694                 cs = ccs_emit_wa_busywait(rq, cs);
695
696         rq->tail = intel_ring_offset(rq, cs);
697         assert_ring_tail_valid(rq->ring, rq->tail);
698
699         return gen8_emit_wa_tail(rq, cs);
700 }
701
702 u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
703 {
704         /* XXX Stalling flush before seqno write; post-sync not */
705         cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0));
706         return gen12_emit_fini_breadcrumb_tail(rq, cs);
707 }
708
709 u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
710 {
711         struct drm_i915_private *i915 = rq->engine->i915;
712         u32 flags = (PIPE_CONTROL_CS_STALL |
713                      PIPE_CONTROL_TILE_CACHE_FLUSH |
714                      PIPE_CONTROL_FLUSH_L3 |
715                      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
716                      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
717                      PIPE_CONTROL_DC_FLUSH_ENABLE |
718                      PIPE_CONTROL_FLUSH_ENABLE);
719
720         if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
721                 /* Wa_1409600907 */
722                 flags |= PIPE_CONTROL_DEPTH_STALL;
723
724         if (!HAS_3D_PIPELINE(rq->engine->i915))
725                 flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
726         else if (rq->engine->class == COMPUTE_CLASS)
727                 flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
728
729         cs = gen12_emit_ggtt_write_rcs(cs,
730                                        rq->fence.seqno,
731                                        hwsp_offset(rq),
732                                        PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
733                                        flags);
734
735         return gen12_emit_fini_breadcrumb_tail(rq, cs);
736 }
This page took 0.076386 seconds and 4 git commands to generate.