]> Git Repo - linux.git/blob - drivers/gpu/drm/i915/gt/gen8_engine_cs.c
Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux...
[linux.git] / drivers / gpu / drm / i915 / gt / gen8_engine_cs.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2014 Intel Corporation
4  */
5
6 #include "gen8_engine_cs.h"
7 #include "i915_drv.h"
8 #include "intel_engine_regs.h"
9 #include "intel_gpu_commands.h"
10 #include "intel_lrc.h"
11 #include "intel_ring.h"
12
13 int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode)
14 {
15         bool vf_flush_wa = false, dc_flush_wa = false;
16         u32 *cs, flags = 0;
17         int len;
18
19         flags |= PIPE_CONTROL_CS_STALL;
20
21         if (mode & EMIT_FLUSH) {
22                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
23                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
24                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
25                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
26         }
27
28         if (mode & EMIT_INVALIDATE) {
29                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
30                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
31                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
32                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
33                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
34                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
35                 flags |= PIPE_CONTROL_QW_WRITE;
36                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
37
38                 /*
39                  * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
40                  * pipe control.
41                  */
42                 if (GRAPHICS_VER(rq->engine->i915) == 9)
43                         vf_flush_wa = true;
44
45                 /* WaForGAMHang:kbl */
46                 if (IS_KBL_GRAPHICS_STEP(rq->engine->i915, 0, STEP_C0))
47                         dc_flush_wa = true;
48         }
49
50         len = 6;
51
52         if (vf_flush_wa)
53                 len += 6;
54
55         if (dc_flush_wa)
56                 len += 12;
57
58         cs = intel_ring_begin(rq, len);
59         if (IS_ERR(cs))
60                 return PTR_ERR(cs);
61
62         if (vf_flush_wa)
63                 cs = gen8_emit_pipe_control(cs, 0, 0);
64
65         if (dc_flush_wa)
66                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
67                                             0);
68
69         cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
70
71         if (dc_flush_wa)
72                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
73
74         intel_ring_advance(rq, cs);
75
76         return 0;
77 }
78
79 int gen8_emit_flush_xcs(struct i915_request *rq, u32 mode)
80 {
81         u32 cmd, *cs;
82
83         cs = intel_ring_begin(rq, 4);
84         if (IS_ERR(cs))
85                 return PTR_ERR(cs);
86
87         cmd = MI_FLUSH_DW + 1;
88
89         /*
90          * We always require a command barrier so that subsequent
91          * commands, such as breadcrumb interrupts, are strictly ordered
92          * wrt the contents of the write cache being flushed to memory
93          * (and thus being coherent from the CPU).
94          */
95         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
96
97         if (mode & EMIT_INVALIDATE) {
98                 cmd |= MI_INVALIDATE_TLB;
99                 if (rq->engine->class == VIDEO_DECODE_CLASS)
100                         cmd |= MI_INVALIDATE_BSD;
101         }
102
103         *cs++ = cmd;
104         *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
105         *cs++ = 0; /* upper addr */
106         *cs++ = 0; /* value */
107         intel_ring_advance(rq, cs);
108
109         return 0;
110 }
111
112 int gen11_emit_flush_rcs(struct i915_request *rq, u32 mode)
113 {
114         if (mode & EMIT_FLUSH) {
115                 u32 *cs;
116                 u32 flags = 0;
117
118                 flags |= PIPE_CONTROL_CS_STALL;
119
120                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
121                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
122                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
123                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
124                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
125                 flags |= PIPE_CONTROL_QW_WRITE;
126                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
127
128                 cs = intel_ring_begin(rq, 6);
129                 if (IS_ERR(cs))
130                         return PTR_ERR(cs);
131
132                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
133                 intel_ring_advance(rq, cs);
134         }
135
136         if (mode & EMIT_INVALIDATE) {
137                 u32 *cs;
138                 u32 flags = 0;
139
140                 flags |= PIPE_CONTROL_CS_STALL;
141
142                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
143                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
144                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
145                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
146                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
147                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
148                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
149                 flags |= PIPE_CONTROL_QW_WRITE;
150                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
151
152                 cs = intel_ring_begin(rq, 6);
153                 if (IS_ERR(cs))
154                         return PTR_ERR(cs);
155
156                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
157                 intel_ring_advance(rq, cs);
158         }
159
160         return 0;
161 }
162
163 static u32 preparser_disable(bool state)
164 {
165         return MI_ARB_CHECK | 1 << 8 | state;
166 }
167
168 u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg)
169 {
170         u32 gsi_offset = gt->uncore->gsi_offset;
171
172         *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
173         *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
174         *cs++ = AUX_INV;
175         *cs++ = MI_NOOP;
176
177         return cs;
178 }
179
180 static int mtl_dummy_pipe_control(struct i915_request *rq)
181 {
182         /* Wa_14016712196 */
183         if (IS_MTL_GRAPHICS_STEP(rq->engine->i915, M, STEP_A0, STEP_B0) ||
184             IS_MTL_GRAPHICS_STEP(rq->engine->i915, P, STEP_A0, STEP_B0)) {
185                 u32 *cs;
186
187                 /* dummy PIPE_CONTROL + depth flush */
188                 cs = intel_ring_begin(rq, 6);
189                 if (IS_ERR(cs))
190                         return PTR_ERR(cs);
191                 cs = gen12_emit_pipe_control(cs,
192                                              0,
193                                              PIPE_CONTROL_DEPTH_CACHE_FLUSH,
194                                              LRC_PPHWSP_SCRATCH_ADDR);
195                 intel_ring_advance(rq, cs);
196         }
197
198         return 0;
199 }
200
201 int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
202 {
203         struct intel_engine_cs *engine = rq->engine;
204
205         if (mode & EMIT_FLUSH) {
206                 u32 flags = 0;
207                 int err;
208                 u32 *cs;
209
210                 err = mtl_dummy_pipe_control(rq);
211                 if (err)
212                         return err;
213
214                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
215                 flags |= PIPE_CONTROL_FLUSH_L3;
216                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
217                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
218                 /* Wa_1409600907:tgl,adl-p */
219                 flags |= PIPE_CONTROL_DEPTH_STALL;
220                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
221                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
222
223                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
224                 flags |= PIPE_CONTROL_QW_WRITE;
225
226                 flags |= PIPE_CONTROL_CS_STALL;
227
228                 if (!HAS_3D_PIPELINE(engine->i915))
229                         flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
230                 else if (engine->class == COMPUTE_CLASS)
231                         flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
232
233                 cs = intel_ring_begin(rq, 6);
234                 if (IS_ERR(cs))
235                         return PTR_ERR(cs);
236
237                 cs = gen12_emit_pipe_control(cs,
238                                              PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
239                                              flags, LRC_PPHWSP_SCRATCH_ADDR);
240                 intel_ring_advance(rq, cs);
241         }
242
243         if (mode & EMIT_INVALIDATE) {
244                 u32 flags = 0;
245                 u32 *cs, count;
246                 int err;
247
248                 err = mtl_dummy_pipe_control(rq);
249                 if (err)
250                         return err;
251
252                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
253                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
254                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
255                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
256                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
257                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
258                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
259
260                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
261                 flags |= PIPE_CONTROL_QW_WRITE;
262
263                 flags |= PIPE_CONTROL_CS_STALL;
264
265                 if (!HAS_3D_PIPELINE(engine->i915))
266                         flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
267                 else if (engine->class == COMPUTE_CLASS)
268                         flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
269
270                 if (!HAS_FLAT_CCS(rq->engine->i915))
271                         count = 8 + 4;
272                 else
273                         count = 8;
274
275                 cs = intel_ring_begin(rq, count);
276                 if (IS_ERR(cs))
277                         return PTR_ERR(cs);
278
279                 /*
280                  * Prevent the pre-parser from skipping past the TLB
281                  * invalidate and loading a stale page for the batch
282                  * buffer / request payload.
283                  */
284                 *cs++ = preparser_disable(true);
285
286                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
287
288                 if (!HAS_FLAT_CCS(rq->engine->i915)) {
289                         /* hsdes: 1809175790 */
290                         cs = gen12_emit_aux_table_inv(rq->engine->gt,
291                                                       cs, GEN12_GFX_CCS_AUX_NV);
292                 }
293
294                 *cs++ = preparser_disable(false);
295                 intel_ring_advance(rq, cs);
296         }
297
298         return 0;
299 }
300
301 int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
302 {
303         intel_engine_mask_t aux_inv = 0;
304         u32 cmd, *cs;
305
306         cmd = 4;
307         if (mode & EMIT_INVALIDATE) {
308                 cmd += 2;
309
310                 if (!HAS_FLAT_CCS(rq->engine->i915) &&
311                     (rq->engine->class == VIDEO_DECODE_CLASS ||
312                      rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
313                         aux_inv = rq->engine->mask &
314                                 ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
315                         if (aux_inv)
316                                 cmd += 4;
317                 }
318         }
319
320         cs = intel_ring_begin(rq, cmd);
321         if (IS_ERR(cs))
322                 return PTR_ERR(cs);
323
324         if (mode & EMIT_INVALIDATE)
325                 *cs++ = preparser_disable(true);
326
327         cmd = MI_FLUSH_DW + 1;
328
329         /*
330          * We always require a command barrier so that subsequent
331          * commands, such as breadcrumb interrupts, are strictly ordered
332          * wrt the contents of the write cache being flushed to memory
333          * (and thus being coherent from the CPU).
334          */
335         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
336
337         if (mode & EMIT_INVALIDATE) {
338                 cmd |= MI_INVALIDATE_TLB;
339                 if (rq->engine->class == VIDEO_DECODE_CLASS)
340                         cmd |= MI_INVALIDATE_BSD;
341         }
342
343         *cs++ = cmd;
344         *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
345         *cs++ = 0; /* upper addr */
346         *cs++ = 0; /* value */
347
348         if (aux_inv) { /* hsdes: 1809175790 */
349                 if (rq->engine->class == VIDEO_DECODE_CLASS)
350                         cs = gen12_emit_aux_table_inv(rq->engine->gt,
351                                                       cs, GEN12_VD0_AUX_NV);
352                 else
353                         cs = gen12_emit_aux_table_inv(rq->engine->gt,
354                                                       cs, GEN12_VE0_AUX_NV);
355         }
356
357         if (mode & EMIT_INVALIDATE)
358                 *cs++ = preparser_disable(false);
359
360         intel_ring_advance(rq, cs);
361
362         return 0;
363 }
364
365 static u32 preempt_address(struct intel_engine_cs *engine)
366 {
367         return (i915_ggtt_offset(engine->status_page.vma) +
368                 I915_GEM_HWS_PREEMPT_ADDR);
369 }
370
371 static u32 hwsp_offset(const struct i915_request *rq)
372 {
373         const struct intel_timeline *tl;
374
375         /* Before the request is executed, the timeline is fixed */
376         tl = rcu_dereference_protected(rq->timeline,
377                                        !i915_request_signaled(rq));
378
379         /* See the comment in i915_request_active_seqno(). */
380         return page_mask_bits(tl->hwsp_offset) + offset_in_page(rq->hwsp_seqno);
381 }
382
383 int gen8_emit_init_breadcrumb(struct i915_request *rq)
384 {
385         u32 *cs;
386
387         GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
388         if (!i915_request_timeline(rq)->has_initial_breadcrumb)
389                 return 0;
390
391         cs = intel_ring_begin(rq, 6);
392         if (IS_ERR(cs))
393                 return PTR_ERR(cs);
394
395         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
396         *cs++ = hwsp_offset(rq);
397         *cs++ = 0;
398         *cs++ = rq->fence.seqno - 1;
399
400         /*
401          * Check if we have been preempted before we even get started.
402          *
403          * After this point i915_request_started() reports true, even if
404          * we get preempted and so are no longer running.
405          *
406          * i915_request_started() is used during preemption processing
407          * to decide if the request is currently inside the user payload
408          * or spinning on a kernel semaphore (or earlier). For no-preemption
409          * requests, we do allow preemption on the semaphore before the user
410          * payload, but do not allow preemption once the request is started.
411          *
412          * i915_request_started() is similarly used during GPU hangs to
413          * determine if the user's payload was guilty, and if so, the
414          * request is banned. Before the request is started, it is assumed
415          * to be unharmed and an innocent victim of another's hang.
416          */
417         *cs++ = MI_NOOP;
418         *cs++ = MI_ARB_CHECK;
419
420         intel_ring_advance(rq, cs);
421
422         /* Record the updated position of the request's payload */
423         rq->infix = intel_ring_offset(rq, cs);
424
425         __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
426
427         return 0;
428 }
429
430 static int __xehp_emit_bb_start(struct i915_request *rq,
431                                 u64 offset, u32 len,
432                                 const unsigned int flags,
433                                 u32 arb)
434 {
435         struct intel_context *ce = rq->context;
436         u32 wa_offset = lrc_indirect_bb(ce);
437         u32 *cs;
438
439         GEM_BUG_ON(!ce->wa_bb_page);
440
441         cs = intel_ring_begin(rq, 12);
442         if (IS_ERR(cs))
443                 return PTR_ERR(cs);
444
445         *cs++ = MI_ARB_ON_OFF | arb;
446
447         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
448                 MI_SRM_LRM_GLOBAL_GTT |
449                 MI_LRI_LRM_CS_MMIO;
450         *cs++ = i915_mmio_reg_offset(RING_PREDICATE_RESULT(0));
451         *cs++ = wa_offset + DG2_PREDICATE_RESULT_WA;
452         *cs++ = 0;
453
454         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
455                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
456         *cs++ = lower_32_bits(offset);
457         *cs++ = upper_32_bits(offset);
458
459         /* Fixup stray MI_SET_PREDICATE as it prevents us executing the ring */
460         *cs++ = MI_BATCH_BUFFER_START_GEN8;
461         *cs++ = wa_offset + DG2_PREDICATE_RESULT_BB;
462         *cs++ = 0;
463
464         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
465
466         intel_ring_advance(rq, cs);
467
468         return 0;
469 }
470
471 int xehp_emit_bb_start_noarb(struct i915_request *rq,
472                              u64 offset, u32 len,
473                              const unsigned int flags)
474 {
475         return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE);
476 }
477
478 int xehp_emit_bb_start(struct i915_request *rq,
479                        u64 offset, u32 len,
480                        const unsigned int flags)
481 {
482         return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE);
483 }
484
485 int gen8_emit_bb_start_noarb(struct i915_request *rq,
486                              u64 offset, u32 len,
487                              const unsigned int flags)
488 {
489         u32 *cs;
490
491         cs = intel_ring_begin(rq, 4);
492         if (IS_ERR(cs))
493                 return PTR_ERR(cs);
494
495         /*
496          * WaDisableCtxRestoreArbitration:bdw,chv
497          *
498          * We don't need to perform MI_ARB_ENABLE as often as we do (in
499          * particular all the gen that do not need the w/a at all!), if we
500          * took care to make sure that on every switch into this context
501          * (both ordinary and for preemption) that arbitrartion was enabled
502          * we would be fine.  However, for gen8 there is another w/a that
503          * requires us to not preempt inside GPGPU execution, so we keep
504          * arbitration disabled for gen8 batches. Arbitration will be
505          * re-enabled before we close the request
506          * (engine->emit_fini_breadcrumb).
507          */
508         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
509
510         /* FIXME(BDW+): Address space and security selectors. */
511         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
512                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
513         *cs++ = lower_32_bits(offset);
514         *cs++ = upper_32_bits(offset);
515
516         intel_ring_advance(rq, cs);
517
518         return 0;
519 }
520
521 int gen8_emit_bb_start(struct i915_request *rq,
522                        u64 offset, u32 len,
523                        const unsigned int flags)
524 {
525         u32 *cs;
526
527         if (unlikely(i915_request_has_nopreempt(rq)))
528                 return gen8_emit_bb_start_noarb(rq, offset, len, flags);
529
530         cs = intel_ring_begin(rq, 6);
531         if (IS_ERR(cs))
532                 return PTR_ERR(cs);
533
534         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
535
536         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
537                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
538         *cs++ = lower_32_bits(offset);
539         *cs++ = upper_32_bits(offset);
540
541         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
542         *cs++ = MI_NOOP;
543
544         intel_ring_advance(rq, cs);
545
546         return 0;
547 }
548
549 static void assert_request_valid(struct i915_request *rq)
550 {
551         struct intel_ring *ring __maybe_unused = rq->ring;
552
553         /* Can we unwind this request without appearing to go forwards? */
554         GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0);
555 }
556
557 /*
558  * Reserve space for 2 NOOPs at the end of each request to be
559  * used as a workaround for not being allowed to do lite
560  * restore with HEAD==TAIL (WaIdleLiteRestore).
561  */
562 static u32 *gen8_emit_wa_tail(struct i915_request *rq, u32 *cs)
563 {
564         /* Ensure there's always at least one preemption point per-request. */
565         *cs++ = MI_ARB_CHECK;
566         *cs++ = MI_NOOP;
567         rq->wa_tail = intel_ring_offset(rq, cs);
568
569         /* Check that entire request is less than half the ring */
570         assert_request_valid(rq);
571
572         return cs;
573 }
574
575 static u32 *emit_preempt_busywait(struct i915_request *rq, u32 *cs)
576 {
577         *cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */
578         *cs++ = MI_SEMAPHORE_WAIT |
579                 MI_SEMAPHORE_GLOBAL_GTT |
580                 MI_SEMAPHORE_POLL |
581                 MI_SEMAPHORE_SAD_EQ_SDD;
582         *cs++ = 0;
583         *cs++ = preempt_address(rq->engine);
584         *cs++ = 0;
585         *cs++ = MI_NOOP;
586
587         return cs;
588 }
589
590 static __always_inline u32*
591 gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
592 {
593         *cs++ = MI_USER_INTERRUPT;
594
595         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
596         if (intel_engine_has_semaphores(rq->engine) &&
597             !intel_uc_uses_guc_submission(&rq->engine->gt->uc))
598                 cs = emit_preempt_busywait(rq, cs);
599
600         rq->tail = intel_ring_offset(rq, cs);
601         assert_ring_tail_valid(rq->ring, rq->tail);
602
603         return gen8_emit_wa_tail(rq, cs);
604 }
605
606 static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
607 {
608         return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
609 }
610
611 u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
612 {
613         return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
614 }
615
616 u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
617 {
618         cs = gen8_emit_pipe_control(cs,
619                                     PIPE_CONTROL_CS_STALL |
620                                     PIPE_CONTROL_TLB_INVALIDATE |
621                                     PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
622                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
623                                     PIPE_CONTROL_DC_FLUSH_ENABLE,
624                                     0);
625
626         /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
627         cs = gen8_emit_ggtt_write_rcs(cs,
628                                       rq->fence.seqno,
629                                       hwsp_offset(rq),
630                                       PIPE_CONTROL_FLUSH_ENABLE |
631                                       PIPE_CONTROL_CS_STALL);
632
633         return gen8_emit_fini_breadcrumb_tail(rq, cs);
634 }
635
636 u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
637 {
638         cs = gen8_emit_pipe_control(cs,
639                                     PIPE_CONTROL_CS_STALL |
640                                     PIPE_CONTROL_TLB_INVALIDATE |
641                                     PIPE_CONTROL_TILE_CACHE_FLUSH |
642                                     PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
643                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
644                                     PIPE_CONTROL_DC_FLUSH_ENABLE,
645                                     0);
646
647         /*XXX: Look at gen8_emit_fini_breadcrumb_rcs */
648         cs = gen8_emit_ggtt_write_rcs(cs,
649                                       rq->fence.seqno,
650                                       hwsp_offset(rq),
651                                       PIPE_CONTROL_FLUSH_ENABLE |
652                                       PIPE_CONTROL_CS_STALL);
653
654         return gen8_emit_fini_breadcrumb_tail(rq, cs);
655 }
656
657 /*
658  * Note that the CS instruction pre-parser will not stall on the breadcrumb
659  * flush and will continue pre-fetching the instructions after it before the
660  * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
661  * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
662  * of the next request before the memory has been flushed, we're guaranteed that
663  * we won't access the batch itself too early.
664  * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
665  * so, if the current request is modifying an instruction in the next request on
666  * the same intel_context, we might pre-fetch and then execute the pre-update
667  * instruction. To avoid this, the users of self-modifying code should either
668  * disable the parser around the code emitting the memory writes, via a new flag
669  * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
670  * the in-kernel use-cases we've opted to use a separate context, see
671  * reloc_gpu() as an example.
672  * All the above applies only to the instructions themselves. Non-inline data
673  * used by the instructions is not pre-fetched.
674  */
675
676 static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs)
677 {
678         *cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */
679         *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
680                 MI_SEMAPHORE_GLOBAL_GTT |
681                 MI_SEMAPHORE_POLL |
682                 MI_SEMAPHORE_SAD_EQ_SDD;
683         *cs++ = 0;
684         *cs++ = preempt_address(rq->engine);
685         *cs++ = 0;
686         *cs++ = 0;
687
688         return cs;
689 }
690
691 /* Wa_14014475959:dg2 */
692 #define CCS_SEMAPHORE_PPHWSP_OFFSET     0x540
693 static u32 ccs_semaphore_offset(struct i915_request *rq)
694 {
695         return i915_ggtt_offset(rq->context->state) +
696                 (LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET;
697 }
698
699 /* Wa_14014475959:dg2 */
700 static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)
701 {
702         int i;
703
704         *cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL |
705                 MI_ATOMIC_MOVE;
706         *cs++ = ccs_semaphore_offset(rq);
707         *cs++ = 0;
708         *cs++ = 1;
709
710         /*
711          * When MI_ATOMIC_INLINE_DATA set this command must be 11 DW + (1 NOP)
712          * to align. 4 DWs above + 8 filler DWs here.
713          */
714         for (i = 0; i < 8; ++i)
715                 *cs++ = 0;
716
717         *cs++ = MI_SEMAPHORE_WAIT |
718                 MI_SEMAPHORE_GLOBAL_GTT |
719                 MI_SEMAPHORE_POLL |
720                 MI_SEMAPHORE_SAD_EQ_SDD;
721         *cs++ = 0;
722         *cs++ = ccs_semaphore_offset(rq);
723         *cs++ = 0;
724
725         return cs;
726 }
727
728 static __always_inline u32*
729 gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
730 {
731         *cs++ = MI_USER_INTERRUPT;
732
733         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
734         if (intel_engine_has_semaphores(rq->engine) &&
735             !intel_uc_uses_guc_submission(&rq->engine->gt->uc))
736                 cs = gen12_emit_preempt_busywait(rq, cs);
737
738         /* Wa_14014475959:dg2 */
739         if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine))
740                 cs = ccs_emit_wa_busywait(rq, cs);
741
742         rq->tail = intel_ring_offset(rq, cs);
743         assert_ring_tail_valid(rq->ring, rq->tail);
744
745         return gen8_emit_wa_tail(rq, cs);
746 }
747
748 u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
749 {
750         /* XXX Stalling flush before seqno write; post-sync not */
751         cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0));
752         return gen12_emit_fini_breadcrumb_tail(rq, cs);
753 }
754
755 u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
756 {
757         struct drm_i915_private *i915 = rq->engine->i915;
758         u32 flags = (PIPE_CONTROL_CS_STALL |
759                      PIPE_CONTROL_TLB_INVALIDATE |
760                      PIPE_CONTROL_TILE_CACHE_FLUSH |
761                      PIPE_CONTROL_FLUSH_L3 |
762                      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
763                      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
764                      PIPE_CONTROL_DC_FLUSH_ENABLE |
765                      PIPE_CONTROL_FLUSH_ENABLE);
766
767         /* Wa_14016712196 */
768         if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
769             IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
770                 /* dummy PIPE_CONTROL + depth flush */
771                 cs = gen12_emit_pipe_control(cs, 0,
772                                              PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0);
773
774         if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
775                 /* Wa_1409600907 */
776                 flags |= PIPE_CONTROL_DEPTH_STALL;
777
778         if (!HAS_3D_PIPELINE(rq->engine->i915))
779                 flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
780         else if (rq->engine->class == COMPUTE_CLASS)
781                 flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
782
783         cs = gen12_emit_pipe_control(cs, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0);
784
785         /*XXX: Look at gen8_emit_fini_breadcrumb_rcs */
786         cs = gen12_emit_ggtt_write_rcs(cs,
787                                        rq->fence.seqno,
788                                        hwsp_offset(rq),
789                                        0,
790                                        PIPE_CONTROL_FLUSH_ENABLE |
791                                        PIPE_CONTROL_CS_STALL);
792
793         return gen12_emit_fini_breadcrumb_tail(rq, cs);
794 }
This page took 0.080545 seconds and 4 git commands to generate.