]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
Merge tag 'v5.10-rc1' into asoc-5.10
[linux.git] / drivers / gpu / drm / amd / amdgpu / sdma_v5_2.c
1 /*
2  * Copyright 2019 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28
29 #include "amdgpu.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_trace.h"
32
33 #include "gc/gc_10_3_0_offset.h"
34 #include "gc/gc_10_3_0_sh_mask.h"
35 #include "ivsrcid/sdma0/irqsrcs_sdma0_5_0.h"
36 #include "ivsrcid/sdma1/irqsrcs_sdma1_5_0.h"
37 #include "ivsrcid/sdma2/irqsrcs_sdma2_5_0.h"
38 #include "ivsrcid/sdma3/irqsrcs_sdma3_5_0.h"
39
40 #include "soc15_common.h"
41 #include "soc15.h"
42 #include "navi10_sdma_pkt_open.h"
43 #include "nbio_v2_3.h"
44 #include "sdma_common.h"
45 #include "sdma_v5_2.h"
46
47 MODULE_FIRMWARE("amdgpu/sienna_cichlid_sdma.bin");
48 MODULE_FIRMWARE("amdgpu/navy_flounder_sdma.bin");
49
50 #define SDMA1_REG_OFFSET 0x600
51 #define SDMA3_REG_OFFSET 0x400
52 #define SDMA0_HYP_DEC_REG_START 0x5880
53 #define SDMA0_HYP_DEC_REG_END 0x5893
54 #define SDMA1_HYP_DEC_REG_OFFSET 0x20
55
56 static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev);
57 static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev);
58 static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev);
59 static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev);
60
61 static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
62 {
63         u32 base;
64
65         if (internal_offset >= SDMA0_HYP_DEC_REG_START &&
66             internal_offset <= SDMA0_HYP_DEC_REG_END) {
67                 base = adev->reg_offset[GC_HWIP][0][1];
68                 if (instance != 0)
69                         internal_offset += SDMA1_HYP_DEC_REG_OFFSET * instance;
70         } else {
71                 if (instance < 2) {
72                         base = adev->reg_offset[GC_HWIP][0][0];
73                         if (instance == 1)
74                                 internal_offset += SDMA1_REG_OFFSET;
75                 } else {
76                         base = adev->reg_offset[GC_HWIP][0][2];
77                         if (instance == 3)
78                                 internal_offset += SDMA3_REG_OFFSET;
79                 }
80         }
81
82         return base + internal_offset;
83 }
84
85 static void sdma_v5_2_init_golden_registers(struct amdgpu_device *adev)
86 {
87         switch (adev->asic_type) {
88         case CHIP_SIENNA_CICHLID:
89         case CHIP_NAVY_FLOUNDER:
90                 break;
91         default:
92                 break;
93         }
94 }
95
96 static int sdma_v5_2_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
97 {
98         int err = 0;
99         const struct sdma_firmware_header_v1_0 *hdr;
100
101         err = amdgpu_ucode_validate(sdma_inst->fw);
102         if (err)
103                 return err;
104
105         hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data;
106         sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
107         sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
108
109         if (sdma_inst->feature_version >= 20)
110                 sdma_inst->burst_nop = true;
111
112         return 0;
113 }
114
115 static void sdma_v5_2_destroy_inst_ctx(struct amdgpu_device *adev)
116 {
117         int i;
118
119         for (i = 0; i < adev->sdma.num_instances; i++) {
120                 release_firmware(adev->sdma.instance[i].fw);
121                 adev->sdma.instance[i].fw = NULL;
122
123                 if (adev->asic_type == CHIP_SIENNA_CICHLID)
124                         break;
125         }
126
127         memset((void*)adev->sdma.instance, 0,
128                sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
129 }
130
131 /**
132  * sdma_v5_2_init_microcode - load ucode images from disk
133  *
134  * @adev: amdgpu_device pointer
135  *
136  * Use the firmware interface to load the ucode images into
137  * the driver (not loaded into hw).
138  * Returns 0 on success, error on failure.
139  */
140
141 // emulation only, won't work on real chip
142 // navi10 real chip need to use PSP to load firmware
143 static int sdma_v5_2_init_microcode(struct amdgpu_device *adev)
144 {
145         const char *chip_name;
146         char fw_name[40];
147         int err = 0, i;
148         struct amdgpu_firmware_info *info = NULL;
149         const struct common_firmware_header *header = NULL;
150
151         if (amdgpu_sriov_vf(adev))
152                 return 0;
153
154         DRM_DEBUG("\n");
155
156         switch (adev->asic_type) {
157         case CHIP_SIENNA_CICHLID:
158                 chip_name = "sienna_cichlid";
159                 break;
160         case CHIP_NAVY_FLOUNDER:
161                 chip_name = "navy_flounder";
162                 break;
163         default:
164                 BUG();
165         }
166
167         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
168
169         err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev);
170         if (err)
171                 goto out;
172
173         err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[0]);
174         if (err)
175                 goto out;
176
177         for (i = 1; i < adev->sdma.num_instances; i++) {
178                 if (adev->asic_type == CHIP_SIENNA_CICHLID ||
179                     adev->asic_type == CHIP_NAVY_FLOUNDER) {
180                         memcpy((void*)&adev->sdma.instance[i],
181                                (void*)&adev->sdma.instance[0],
182                                sizeof(struct amdgpu_sdma_instance));
183                 } else {
184                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i);
185                         err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
186                         if (err)
187                                 goto out;
188
189                         err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[0]);
190                         if (err)
191                                 goto out;
192                 }
193         }
194
195         DRM_DEBUG("psp_load == '%s'\n",
196                   adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
197
198         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
199                 for (i = 0; i < adev->sdma.num_instances; i++) {
200                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
201                         info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
202                         info->fw = adev->sdma.instance[i].fw;
203                         header = (const struct common_firmware_header *)info->fw->data;
204                         adev->firmware.fw_size +=
205                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
206                 }
207         }
208
209 out:
210         if (err) {
211                 DRM_ERROR("sdma_v5_2: Failed to load firmware \"%s\"\n", fw_name);
212                 sdma_v5_2_destroy_inst_ctx(adev);
213         }
214         return err;
215 }
216
217 static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring)
218 {
219         unsigned ret;
220
221         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
222         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
223         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
224         amdgpu_ring_write(ring, 1);
225         ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
226         amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
227
228         return ret;
229 }
230
231 static void sdma_v5_2_ring_patch_cond_exec(struct amdgpu_ring *ring,
232                                            unsigned offset)
233 {
234         unsigned cur;
235
236         BUG_ON(offset > ring->buf_mask);
237         BUG_ON(ring->ring[offset] != 0x55aa55aa);
238
239         cur = (ring->wptr - 1) & ring->buf_mask;
240         if (cur > offset)
241                 ring->ring[offset] = cur - offset;
242         else
243                 ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
244 }
245
246 /**
247  * sdma_v5_2_ring_get_rptr - get the current read pointer
248  *
249  * @ring: amdgpu ring pointer
250  *
251  * Get the current rptr from the hardware (NAVI10+).
252  */
253 static uint64_t sdma_v5_2_ring_get_rptr(struct amdgpu_ring *ring)
254 {
255         u64 *rptr;
256
257         /* XXX check if swapping is necessary on BE */
258         rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
259
260         DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
261         return ((*rptr) >> 2);
262 }
263
264 /**
265  * sdma_v5_2_ring_get_wptr - get the current write pointer
266  *
267  * @ring: amdgpu ring pointer
268  *
269  * Get the current wptr from the hardware (NAVI10+).
270  */
271 static uint64_t sdma_v5_2_ring_get_wptr(struct amdgpu_ring *ring)
272 {
273         struct amdgpu_device *adev = ring->adev;
274         u64 wptr;
275
276         if (ring->use_doorbell) {
277                 /* XXX check if swapping is necessary on BE */
278                 wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
279                 DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
280         } else {
281                 wptr = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI));
282                 wptr = wptr << 32;
283                 wptr |= RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR));
284                 DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", ring->me, wptr);
285         }
286
287         return wptr >> 2;
288 }
289
290 /**
291  * sdma_v5_2_ring_set_wptr - commit the write pointer
292  *
293  * @ring: amdgpu ring pointer
294  *
295  * Write the wptr back to the hardware (NAVI10+).
296  */
297 static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring)
298 {
299         struct amdgpu_device *adev = ring->adev;
300
301         DRM_DEBUG("Setting write pointer\n");
302         if (ring->use_doorbell) {
303                 DRM_DEBUG("Using doorbell -- "
304                                 "wptr_offs == 0x%08x "
305                                 "lower_32_bits(ring->wptr) << 2 == 0x%08x "
306                                 "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
307                                 ring->wptr_offs,
308                                 lower_32_bits(ring->wptr << 2),
309                                 upper_32_bits(ring->wptr << 2));
310                 /* XXX check if swapping is necessary on BE */
311                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2);
312                 adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2);
313                 DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
314                                 ring->doorbell_index, ring->wptr << 2);
315                 WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
316         } else {
317                 DRM_DEBUG("Not using doorbell -- "
318                                 "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
319                                 "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
320                                 ring->me,
321                                 lower_32_bits(ring->wptr << 2),
322                                 ring->me,
323                                 upper_32_bits(ring->wptr << 2));
324                 WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR),
325                         lower_32_bits(ring->wptr << 2));
326                 WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI),
327                         upper_32_bits(ring->wptr << 2));
328         }
329 }
330
331 static void sdma_v5_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
332 {
333         struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
334         int i;
335
336         for (i = 0; i < count; i++)
337                 if (sdma && sdma->burst_nop && (i == 0))
338                         amdgpu_ring_write(ring, ring->funcs->nop |
339                                 SDMA_PKT_NOP_HEADER_COUNT(count - 1));
340                 else
341                         amdgpu_ring_write(ring, ring->funcs->nop);
342 }
343
344 /**
345  * sdma_v5_2_ring_emit_ib - Schedule an IB on the DMA engine
346  *
347  * @ring: amdgpu ring pointer
348  * @ib: IB object to schedule
349  *
350  * Schedule an IB in the DMA ring.
351  */
352 static void sdma_v5_2_ring_emit_ib(struct amdgpu_ring *ring,
353                                    struct amdgpu_job *job,
354                                    struct amdgpu_ib *ib,
355                                    uint32_t flags)
356 {
357         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
358         uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
359
360         /* An IB packet must end on a 8 DW boundary--the next dword
361          * must be on a 8-dword boundary. Our IB packet below is 6
362          * dwords long, thus add x number of NOPs, such that, in
363          * modular arithmetic,
364          * wptr + 6 + x = 8k, k >= 0, which in C is,
365          * (wptr + 6 + x) % 8 = 0.
366          * The expression below, is a solution of x.
367          */
368         sdma_v5_2_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
369
370         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
371                           SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
372         /* base must be 32 byte aligned */
373         amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
374         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
375         amdgpu_ring_write(ring, ib->length_dw);
376         amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
377         amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
378 }
379
380 /**
381  * sdma_v5_2_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
382  *
383  * @ring: amdgpu ring pointer
384  *
385  * Emit an hdp flush packet on the requested DMA ring.
386  */
387 static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
388 {
389         struct amdgpu_device *adev = ring->adev;
390         u32 ref_and_mask = 0;
391         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
392
393         ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
394
395         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
396                           SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
397                           SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
398         amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
399         amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
400         amdgpu_ring_write(ring, ref_and_mask); /* reference */
401         amdgpu_ring_write(ring, ref_and_mask); /* mask */
402         amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
403                           SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
404 }
405
406 /**
407  * sdma_v5_2_ring_emit_fence - emit a fence on the DMA ring
408  *
409  * @ring: amdgpu ring pointer
410  * @fence: amdgpu fence object
411  *
412  * Add a DMA fence packet to the ring to write
413  * the fence seq number and DMA trap packet to generate
414  * an interrupt if needed.
415  */
416 static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
417                                       unsigned flags)
418 {
419         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
420         /* write the fence */
421         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) |
422                           SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
423         /* zero in first two bits */
424         BUG_ON(addr & 0x3);
425         amdgpu_ring_write(ring, lower_32_bits(addr));
426         amdgpu_ring_write(ring, upper_32_bits(addr));
427         amdgpu_ring_write(ring, lower_32_bits(seq));
428
429         /* optionally write high bits as well */
430         if (write64bit) {
431                 addr += 4;
432                 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) |
433                                   SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
434                 /* zero in first two bits */
435                 BUG_ON(addr & 0x3);
436                 amdgpu_ring_write(ring, lower_32_bits(addr));
437                 amdgpu_ring_write(ring, upper_32_bits(addr));
438                 amdgpu_ring_write(ring, upper_32_bits(seq));
439         }
440
441         if (flags & AMDGPU_FENCE_FLAG_INT) {
442                 /* generate an interrupt */
443                 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
444                 amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
445         }
446 }
447
448
449 /**
450  * sdma_v5_2_gfx_stop - stop the gfx async dma engines
451  *
452  * @adev: amdgpu_device pointer
453  *
454  * Stop the gfx async dma ring buffers.
455  */
456 static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev)
457 {
458         struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
459         struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
460         struct amdgpu_ring *sdma2 = &adev->sdma.instance[2].ring;
461         struct amdgpu_ring *sdma3 = &adev->sdma.instance[3].ring;
462         u32 rb_cntl, ib_cntl;
463         int i;
464
465         if ((adev->mman.buffer_funcs_ring == sdma0) ||
466             (adev->mman.buffer_funcs_ring == sdma1) ||
467             (adev->mman.buffer_funcs_ring == sdma2) ||
468             (adev->mman.buffer_funcs_ring == sdma3))
469                 amdgpu_ttm_set_buffer_funcs_status(adev, false);
470
471         for (i = 0; i < adev->sdma.num_instances; i++) {
472                 rb_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
473                 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
474                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
475                 ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
476                 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
477                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
478         }
479
480         sdma0->sched.ready = false;
481         sdma1->sched.ready = false;
482         sdma2->sched.ready = false;
483         sdma3->sched.ready = false;
484 }
485
486 /**
487  * sdma_v5_2_rlc_stop - stop the compute async dma engines
488  *
489  * @adev: amdgpu_device pointer
490  *
491  * Stop the compute async dma queues.
492  */
493 static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev)
494 {
495         /* XXX todo */
496 }
497
498 /**
499  * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch
500  *
501  * @adev: amdgpu_device pointer
502  * @enable: enable/disable the DMA MEs context switch.
503  *
504  * Halt or unhalt the async dma engines context switch.
505  */
506 static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
507 {
508         u32 f32_cntl, phase_quantum = 0;
509         int i;
510
511         if (amdgpu_sdma_phase_quantum) {
512                 unsigned value = amdgpu_sdma_phase_quantum;
513                 unsigned unit = 0;
514
515                 while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
516                                 SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
517                         value = (value + 1) >> 1;
518                         unit++;
519                 }
520                 if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
521                             SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
522                         value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
523                                  SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
524                         unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
525                                 SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
526                         WARN_ONCE(1,
527                         "clamping sdma_phase_quantum to %uK clock cycles\n",
528                                   value << unit);
529                 }
530                 phase_quantum =
531                         value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
532                         unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
533         }
534
535         for (i = 0; i < adev->sdma.num_instances; i++) {
536                 f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
537                 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
538                                 AUTO_CTXSW_ENABLE, enable ? 1 : 0);
539                 if (enable && amdgpu_sdma_phase_quantum) {
540                         WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
541                                phase_quantum);
542                         WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
543                                phase_quantum);
544                         WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
545                                phase_quantum);
546                 }
547                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
548         }
549
550 }
551
552 /**
553  * sdma_v5_2_enable - stop the async dma engines
554  *
555  * @adev: amdgpu_device pointer
556  * @enable: enable/disable the DMA MEs.
557  *
558  * Halt or unhalt the async dma engines.
559  */
560 static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
561 {
562         u32 f32_cntl;
563         int i;
564
565         if (!enable) {
566                 sdma_v5_2_gfx_stop(adev);
567                 sdma_v5_2_rlc_stop(adev);
568         }
569
570         for (i = 0; i < adev->sdma.num_instances; i++) {
571                 f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
572                 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
573                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
574         }
575 }
576
577 /**
578  * sdma_v5_2_gfx_resume - setup and start the async dma engines
579  *
580  * @adev: amdgpu_device pointer
581  *
582  * Set up the gfx DMA ring buffers and enable them.
583  * Returns 0 for success, error for failure.
584  */
585 static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
586 {
587         struct amdgpu_ring *ring;
588         u32 rb_cntl, ib_cntl;
589         u32 rb_bufsz;
590         u32 wb_offset;
591         u32 doorbell;
592         u32 doorbell_offset;
593         u32 temp;
594         u32 wptr_poll_cntl;
595         u64 wptr_gpu_addr;
596         int i, r;
597
598         for (i = 0; i < adev->sdma.num_instances; i++) {
599                 ring = &adev->sdma.instance[i].ring;
600                 wb_offset = (ring->rptr_offs * 4);
601
602                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
603
604                 /* Set ring buffer size in dwords */
605                 rb_bufsz = order_base_2(ring->ring_size / 4);
606                 rb_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
607                 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
608 #ifdef __BIG_ENDIAN
609                 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
610                 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
611                                         RPTR_WRITEBACK_SWAP_ENABLE, 1);
612 #endif
613                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
614
615                 /* Initialize the ring buffer's read and write pointers */
616                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
617                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
618                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
619                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
620
621                 /* setup the wptr shadow polling */
622                 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
623                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
624                        lower_32_bits(wptr_gpu_addr));
625                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
626                        upper_32_bits(wptr_gpu_addr));
627                 wptr_poll_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i,
628                                                          mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
629                 wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
630                                                SDMA0_GFX_RB_WPTR_POLL_CNTL,
631                                                F32_POLL_ENABLE, 1);
632                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
633                        wptr_poll_cntl);
634
635                 /* set the wb address whether it's enabled or not */
636                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
637                        upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
638                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
639                        lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
640
641                 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
642
643                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
644                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
645
646                 ring->wptr = 0;
647
648                 /* before programing wptr to a less value, need set minor_ptr_update first */
649                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
650
651                 if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
652                         WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
653                         WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
654                 }
655
656                 doorbell = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
657                 doorbell_offset = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
658
659                 if (ring->use_doorbell) {
660                         doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
661                         doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
662                                         OFFSET, ring->doorbell_index);
663                 } else {
664                         doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
665                 }
666                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
667                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
668
669                 adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
670                                                       ring->doorbell_index,
671                                                       adev->doorbell_index.sdma_doorbell_range);
672
673                 if (amdgpu_sriov_vf(adev))
674                         sdma_v5_2_ring_set_wptr(ring);
675
676                 /* set minor_ptr_update to 0 after wptr programed */
677                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
678
679                 /* set utc l1 enable flag always to 1 */
680                 temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
681                 temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
682
683                 /* enable MCBP */
684                 temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
685                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
686
687                 /* Set up RESP_MODE to non-copy addresses */
688                 temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
689                 temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
690                 temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
691                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
692
693                 /* program default cache read and write policy */
694                 temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
695                 /* clean read policy and write policy bits */
696                 temp &= 0xFF0FFF;
697                 temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
698                          (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
699                          0x01000000);
700                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
701
702                 if (!amdgpu_sriov_vf(adev)) {
703                         /* unhalt engine */
704                         temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
705                         temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
706                         WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
707                 }
708
709                 /* enable DMA RB */
710                 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
711                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
712
713                 ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
714                 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
715 #ifdef __BIG_ENDIAN
716                 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
717 #endif
718                 /* enable DMA IBs */
719                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
720
721                 ring->sched.ready = true;
722
723                 if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
724                         sdma_v5_2_ctx_switch_enable(adev, true);
725                         sdma_v5_2_enable(adev, true);
726                 }
727
728                 r = amdgpu_ring_test_ring(ring);
729                 if (r) {
730                         ring->sched.ready = false;
731                         return r;
732                 }
733
734                 if (adev->mman.buffer_funcs_ring == ring)
735                         amdgpu_ttm_set_buffer_funcs_status(adev, true);
736         }
737
738         return 0;
739 }
740
741 /**
742  * sdma_v5_2_rlc_resume - setup and start the async dma engines
743  *
744  * @adev: amdgpu_device pointer
745  *
746  * Set up the compute DMA queues and enable them.
747  * Returns 0 for success, error for failure.
748  */
749 static int sdma_v5_2_rlc_resume(struct amdgpu_device *adev)
750 {
751         return 0;
752 }
753
754 /**
755  * sdma_v5_2_load_microcode - load the sDMA ME ucode
756  *
757  * @adev: amdgpu_device pointer
758  *
759  * Loads the sDMA0/1/2/3 ucode.
760  * Returns 0 for success, -EINVAL if the ucode is not available.
761  */
762 static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
763 {
764         const struct sdma_firmware_header_v1_0 *hdr;
765         const __le32 *fw_data;
766         u32 fw_size;
767         int i, j;
768
769         /* halt the MEs */
770         sdma_v5_2_enable(adev, false);
771
772         for (i = 0; i < adev->sdma.num_instances; i++) {
773                 if (!adev->sdma.instance[i].fw)
774                         return -EINVAL;
775
776                 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
777                 amdgpu_ucode_print_sdma_hdr(&hdr->header);
778                 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
779
780                 fw_data = (const __le32 *)
781                         (adev->sdma.instance[i].fw->data +
782                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
783
784                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0);
785
786                 for (j = 0; j < fw_size; j++) {
787                         if (amdgpu_emu_mode == 1 && j % 500 == 0)
788                                 msleep(1);
789                         WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
790                 }
791
792                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
793         }
794
795         return 0;
796 }
797
798 /**
799  * sdma_v5_2_start - setup and start the async dma engines
800  *
801  * @adev: amdgpu_device pointer
802  *
803  * Set up the DMA engines and enable them.
804  * Returns 0 for success, error for failure.
805  */
806 static int sdma_v5_2_start(struct amdgpu_device *adev)
807 {
808         int r = 0;
809
810         if (amdgpu_sriov_vf(adev)) {
811                 sdma_v5_2_ctx_switch_enable(adev, false);
812                 sdma_v5_2_enable(adev, false);
813
814                 /* set RB registers */
815                 r = sdma_v5_2_gfx_resume(adev);
816                 return r;
817         }
818
819         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
820                 r = sdma_v5_2_load_microcode(adev);
821                 if (r)
822                         return r;
823
824                 /* The value of mmSDMA_F32_CNTL is invalid the moment after loading fw */
825                 if (amdgpu_emu_mode == 1)
826                         msleep(1000);
827         }
828
829         /* unhalt the MEs */
830         sdma_v5_2_enable(adev, true);
831         /* enable sdma ring preemption */
832         sdma_v5_2_ctx_switch_enable(adev, true);
833
834         /* start the gfx rings and rlc compute queues */
835         r = sdma_v5_2_gfx_resume(adev);
836         if (r)
837                 return r;
838         r = sdma_v5_2_rlc_resume(adev);
839
840         return r;
841 }
842
843 /**
844  * sdma_v5_2_ring_test_ring - simple async dma engine test
845  *
846  * @ring: amdgpu_ring structure holding ring information
847  *
848  * Test the DMA engine by writing using it to write an
849  * value to memory.
850  * Returns 0 for success, error for failure.
851  */
852 static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
853 {
854         struct amdgpu_device *adev = ring->adev;
855         unsigned i;
856         unsigned index;
857         int r;
858         u32 tmp;
859         u64 gpu_addr;
860
861         r = amdgpu_device_wb_get(adev, &index);
862         if (r) {
863                 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
864                 return r;
865         }
866
867         gpu_addr = adev->wb.gpu_addr + (index * 4);
868         tmp = 0xCAFEDEAD;
869         adev->wb.wb[index] = cpu_to_le32(tmp);
870
871         r = amdgpu_ring_alloc(ring, 5);
872         if (r) {
873                 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
874                 amdgpu_device_wb_free(adev, index);
875                 return r;
876         }
877
878         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
879                           SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
880         amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
881         amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
882         amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
883         amdgpu_ring_write(ring, 0xDEADBEEF);
884         amdgpu_ring_commit(ring);
885
886         for (i = 0; i < adev->usec_timeout; i++) {
887                 tmp = le32_to_cpu(adev->wb.wb[index]);
888                 if (tmp == 0xDEADBEEF)
889                         break;
890                 if (amdgpu_emu_mode == 1)
891                         msleep(1);
892                 else
893                         udelay(1);
894         }
895
896         if (i >= adev->usec_timeout)
897                 r = -ETIMEDOUT;
898
899         amdgpu_device_wb_free(adev, index);
900
901         return r;
902 }
903
904 /**
905  * sdma_v5_2_ring_test_ib - test an IB on the DMA engine
906  *
907  * @ring: amdgpu_ring structure holding ring information
908  *
909  * Test a simple IB in the DMA ring.
910  * Returns 0 on success, error on failure.
911  */
912 static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
913 {
914         struct amdgpu_device *adev = ring->adev;
915         struct amdgpu_ib ib;
916         struct dma_fence *f = NULL;
917         unsigned index;
918         long r;
919         u32 tmp = 0;
920         u64 gpu_addr;
921
922         r = amdgpu_device_wb_get(adev, &index);
923         if (r) {
924                 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
925                 return r;
926         }
927
928         gpu_addr = adev->wb.gpu_addr + (index * 4);
929         tmp = 0xCAFEDEAD;
930         adev->wb.wb[index] = cpu_to_le32(tmp);
931         memset(&ib, 0, sizeof(ib));
932         r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
933         if (r) {
934                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
935                 goto err0;
936         }
937
938         ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
939                 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
940         ib.ptr[1] = lower_32_bits(gpu_addr);
941         ib.ptr[2] = upper_32_bits(gpu_addr);
942         ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
943         ib.ptr[4] = 0xDEADBEEF;
944         ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
945         ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
946         ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
947         ib.length_dw = 8;
948
949         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
950         if (r)
951                 goto err1;
952
953         r = dma_fence_wait_timeout(f, false, timeout);
954         if (r == 0) {
955                 DRM_ERROR("amdgpu: IB test timed out\n");
956                 r = -ETIMEDOUT;
957                 goto err1;
958         } else if (r < 0) {
959                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
960                 goto err1;
961         }
962         tmp = le32_to_cpu(adev->wb.wb[index]);
963         if (tmp == 0xDEADBEEF)
964                 r = 0;
965         else
966                 r = -EINVAL;
967
968 err1:
969         amdgpu_ib_free(adev, &ib, NULL);
970         dma_fence_put(f);
971 err0:
972         amdgpu_device_wb_free(adev, index);
973         return r;
974 }
975
976
977 /**
978  * sdma_v5_2_vm_copy_pte - update PTEs by copying them from the GART
979  *
980  * @ib: indirect buffer to fill with commands
981  * @pe: addr of the page entry
982  * @src: src addr to copy from
983  * @count: number of page entries to update
984  *
985  * Update PTEs by copying them from the GART using sDMA.
986  */
987 static void sdma_v5_2_vm_copy_pte(struct amdgpu_ib *ib,
988                                   uint64_t pe, uint64_t src,
989                                   unsigned count)
990 {
991         unsigned bytes = count * 8;
992
993         ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
994                 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
995         ib->ptr[ib->length_dw++] = bytes - 1;
996         ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
997         ib->ptr[ib->length_dw++] = lower_32_bits(src);
998         ib->ptr[ib->length_dw++] = upper_32_bits(src);
999         ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1000         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1001
1002 }
1003
1004 /**
1005  * sdma_v5_2_vm_write_pte - update PTEs by writing them manually
1006  *
1007  * @ib: indirect buffer to fill with commands
1008  * @pe: addr of the page entry
1009  * @addr: dst addr to write into pe
1010  * @count: number of page entries to update
1011  * @incr: increase next addr by incr bytes
1012  * @flags: access flags
1013  *
1014  * Update PTEs by writing them manually using sDMA.
1015  */
1016 static void sdma_v5_2_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
1017                                    uint64_t value, unsigned count,
1018                                    uint32_t incr)
1019 {
1020         unsigned ndw = count * 2;
1021
1022         ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
1023                 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1024         ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1025         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1026         ib->ptr[ib->length_dw++] = ndw - 1;
1027         for (; ndw > 0; ndw -= 2) {
1028                 ib->ptr[ib->length_dw++] = lower_32_bits(value);
1029                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
1030                 value += incr;
1031         }
1032 }
1033
1034 /**
1035  * sdma_v5_2_vm_set_pte_pde - update the page tables using sDMA
1036  *
1037  * @ib: indirect buffer to fill with commands
1038  * @pe: addr of the page entry
1039  * @addr: dst addr to write into pe
1040  * @count: number of page entries to update
1041  * @incr: increase next addr by incr bytes
1042  * @flags: access flags
1043  *
1044  * Update the page tables using sDMA.
1045  */
1046 static void sdma_v5_2_vm_set_pte_pde(struct amdgpu_ib *ib,
1047                                      uint64_t pe,
1048                                      uint64_t addr, unsigned count,
1049                                      uint32_t incr, uint64_t flags)
1050 {
1051         /* for physically contiguous pages (vram) */
1052         ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE);
1053         ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
1054         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1055         ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
1056         ib->ptr[ib->length_dw++] = upper_32_bits(flags);
1057         ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
1058         ib->ptr[ib->length_dw++] = upper_32_bits(addr);
1059         ib->ptr[ib->length_dw++] = incr; /* increment size */
1060         ib->ptr[ib->length_dw++] = 0;
1061         ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
1062 }
1063
1064 /**
1065  * sdma_v5_2_ring_pad_ib - pad the IB
1066  *
1067  * @ib: indirect buffer to fill with padding
1068  *
1069  * Pad the IB with NOPs to a boundary multiple of 8.
1070  */
1071 static void sdma_v5_2_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
1072 {
1073         struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
1074         u32 pad_count;
1075         int i;
1076
1077         pad_count = (-ib->length_dw) & 0x7;
1078         for (i = 0; i < pad_count; i++)
1079                 if (sdma && sdma->burst_nop && (i == 0))
1080                         ib->ptr[ib->length_dw++] =
1081                                 SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
1082                                 SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
1083                 else
1084                         ib->ptr[ib->length_dw++] =
1085                                 SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
1086 }
1087
1088
1089 /**
1090  * sdma_v5_2_ring_emit_pipeline_sync - sync the pipeline
1091  *
1092  * @ring: amdgpu_ring pointer
1093  *
1094  * Make sure all previous operations are completed (CIK).
1095  */
1096 static void sdma_v5_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1097 {
1098         uint32_t seq = ring->fence_drv.sync_seq;
1099         uint64_t addr = ring->fence_drv.gpu_addr;
1100
1101         /* wait for idle */
1102         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1103                           SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
1104                           SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
1105                           SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
1106         amdgpu_ring_write(ring, addr & 0xfffffffc);
1107         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
1108         amdgpu_ring_write(ring, seq); /* reference */
1109         amdgpu_ring_write(ring, 0xffffffff); /* mask */
1110         amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1111                           SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
1112 }
1113
1114
1115 /**
1116  * sdma_v5_2_ring_emit_vm_flush - vm flush using sDMA
1117  *
1118  * @ring: amdgpu_ring pointer
1119  * @vm: amdgpu_vm pointer
1120  *
1121  * Update the page table base and flush the VM TLB
1122  * using sDMA.
1123  */
1124 static void sdma_v5_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
1125                                          unsigned vmid, uint64_t pd_addr)
1126 {
1127         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1128 }
1129
1130 static void sdma_v5_2_ring_emit_wreg(struct amdgpu_ring *ring,
1131                                      uint32_t reg, uint32_t val)
1132 {
1133         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
1134                           SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
1135         amdgpu_ring_write(ring, reg);
1136         amdgpu_ring_write(ring, val);
1137 }
1138
1139 static void sdma_v5_2_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1140                                          uint32_t val, uint32_t mask)
1141 {
1142         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1143                           SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
1144                           SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
1145         amdgpu_ring_write(ring, reg << 2);
1146         amdgpu_ring_write(ring, 0);
1147         amdgpu_ring_write(ring, val); /* reference */
1148         amdgpu_ring_write(ring, mask); /* mask */
1149         amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1150                           SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
1151 }
1152
1153 static void sdma_v5_2_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
1154                                                    uint32_t reg0, uint32_t reg1,
1155                                                    uint32_t ref, uint32_t mask)
1156 {
1157         amdgpu_ring_emit_wreg(ring, reg0, ref);
1158         /* wait for a cycle to reset vm_inv_eng*_ack */
1159         amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
1160         amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
1161 }
1162
1163 static int sdma_v5_2_early_init(void *handle)
1164 {
1165         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1166
1167         switch (adev->asic_type) {
1168         case CHIP_SIENNA_CICHLID:
1169                 adev->sdma.num_instances = 4;
1170                 break;
1171         case CHIP_NAVY_FLOUNDER:
1172                 adev->sdma.num_instances = 2;
1173                 break;
1174         default:
1175                 break;
1176         }
1177
1178         sdma_v5_2_set_ring_funcs(adev);
1179         sdma_v5_2_set_buffer_funcs(adev);
1180         sdma_v5_2_set_vm_pte_funcs(adev);
1181         sdma_v5_2_set_irq_funcs(adev);
1182
1183         return 0;
1184 }
1185
1186 static unsigned sdma_v5_2_seq_to_irq_id(int seq_num)
1187 {
1188         switch (seq_num) {
1189         case 0:
1190                 return SOC15_IH_CLIENTID_SDMA0;
1191         case 1:
1192                 return SOC15_IH_CLIENTID_SDMA1;
1193         case 2:
1194                 return SOC15_IH_CLIENTID_SDMA2;
1195         case 3:
1196                 return SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid;
1197         default:
1198                 break;
1199         }
1200         return -EINVAL;
1201 }
1202
1203 static unsigned sdma_v5_2_seq_to_trap_id(int seq_num)
1204 {
1205         switch (seq_num) {
1206         case 0:
1207                 return SDMA0_5_0__SRCID__SDMA_TRAP;
1208         case 1:
1209                 return SDMA1_5_0__SRCID__SDMA_TRAP;
1210         case 2:
1211                 return SDMA2_5_0__SRCID__SDMA_TRAP;
1212         case 3:
1213                 return SDMA3_5_0__SRCID__SDMA_TRAP;
1214         default:
1215                 break;
1216         }
1217         return -EINVAL;
1218 }
1219
1220 static int sdma_v5_2_sw_init(void *handle)
1221 {
1222         struct amdgpu_ring *ring;
1223         int r, i;
1224         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1225
1226         /* SDMA trap event */
1227         for (i = 0; i < adev->sdma.num_instances; i++) {
1228                 r = amdgpu_irq_add_id(adev, sdma_v5_2_seq_to_irq_id(i),
1229                                       sdma_v5_2_seq_to_trap_id(i),
1230                                       &adev->sdma.trap_irq);
1231                 if (r)
1232                         return r;
1233         }
1234
1235         r = sdma_v5_2_init_microcode(adev);
1236         if (r) {
1237                 DRM_ERROR("Failed to load sdma firmware!\n");
1238                 return r;
1239         }
1240
1241         for (i = 0; i < adev->sdma.num_instances; i++) {
1242                 ring = &adev->sdma.instance[i].ring;
1243                 ring->ring_obj = NULL;
1244                 ring->use_doorbell = true;
1245                 ring->me = i;
1246
1247                 DRM_INFO("use_doorbell being set to: [%s]\n",
1248                                 ring->use_doorbell?"true":"false");
1249
1250                 ring->doorbell_index =
1251                         (adev->doorbell_index.sdma_engine[i] << 1); //get DWORD offset
1252
1253                 sprintf(ring->name, "sdma%d", i);
1254                 r = amdgpu_ring_init(adev, ring, 1024,
1255                                      &adev->sdma.trap_irq,
1256                                      AMDGPU_SDMA_IRQ_INSTANCE0 + i,
1257                                      AMDGPU_RING_PRIO_DEFAULT);
1258                 if (r)
1259                         return r;
1260         }
1261
1262         return r;
1263 }
1264
1265 static int sdma_v5_2_sw_fini(void *handle)
1266 {
1267         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1268         int i;
1269
1270         for (i = 0; i < adev->sdma.num_instances; i++)
1271                 amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1272
1273         sdma_v5_2_destroy_inst_ctx(adev);
1274
1275         return 0;
1276 }
1277
1278 static int sdma_v5_2_hw_init(void *handle)
1279 {
1280         int r;
1281         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1282
1283         sdma_v5_2_init_golden_registers(adev);
1284
1285         r = sdma_v5_2_start(adev);
1286
1287         return r;
1288 }
1289
1290 static int sdma_v5_2_hw_fini(void *handle)
1291 {
1292         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1293
1294         if (amdgpu_sriov_vf(adev))
1295                 return 0;
1296
1297         sdma_v5_2_ctx_switch_enable(adev, false);
1298         sdma_v5_2_enable(adev, false);
1299
1300         return 0;
1301 }
1302
1303 static int sdma_v5_2_suspend(void *handle)
1304 {
1305         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1306
1307         return sdma_v5_2_hw_fini(adev);
1308 }
1309
1310 static int sdma_v5_2_resume(void *handle)
1311 {
1312         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1313
1314         return sdma_v5_2_hw_init(adev);
1315 }
1316
1317 static bool sdma_v5_2_is_idle(void *handle)
1318 {
1319         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1320         u32 i;
1321
1322         for (i = 0; i < adev->sdma.num_instances; i++) {
1323                 u32 tmp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_STATUS_REG));
1324
1325                 if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
1326                         return false;
1327         }
1328
1329         return true;
1330 }
1331
1332 static int sdma_v5_2_wait_for_idle(void *handle)
1333 {
1334         unsigned i;
1335         u32 sdma0, sdma1, sdma2, sdma3;
1336         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1337
1338         for (i = 0; i < adev->usec_timeout; i++) {
1339                 sdma0 = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
1340                 sdma1 = RREG32(sdma_v5_2_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG));
1341                 sdma2 = RREG32(sdma_v5_2_get_reg_offset(adev, 2, mmSDMA0_STATUS_REG));
1342                 sdma3 = RREG32(sdma_v5_2_get_reg_offset(adev, 3, mmSDMA0_STATUS_REG));
1343
1344                 if (sdma0 & sdma1 & sdma2 & sdma3 & SDMA0_STATUS_REG__IDLE_MASK)
1345                         return 0;
1346                 udelay(1);
1347         }
1348         return -ETIMEDOUT;
1349 }
1350
1351 static int sdma_v5_2_soft_reset(void *handle)
1352 {
1353         /* todo */
1354
1355         return 0;
1356 }
1357
1358 static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring)
1359 {
1360         int i, r = 0;
1361         struct amdgpu_device *adev = ring->adev;
1362         u32 index = 0;
1363         u64 sdma_gfx_preempt;
1364
1365         amdgpu_sdma_get_index_from_ring(ring, &index);
1366         sdma_gfx_preempt =
1367                 sdma_v5_2_get_reg_offset(adev, index, mmSDMA0_GFX_PREEMPT);
1368
1369         /* assert preemption condition */
1370         amdgpu_ring_set_preempt_cond_exec(ring, false);
1371
1372         /* emit the trailing fence */
1373         ring->trail_seq += 1;
1374         amdgpu_ring_alloc(ring, 10);
1375         sdma_v5_2_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
1376                                   ring->trail_seq, 0);
1377         amdgpu_ring_commit(ring);
1378
1379         /* assert IB preemption */
1380         WREG32(sdma_gfx_preempt, 1);
1381
1382         /* poll the trailing fence */
1383         for (i = 0; i < adev->usec_timeout; i++) {
1384                 if (ring->trail_seq ==
1385                     le32_to_cpu(*(ring->trail_fence_cpu_addr)))
1386                         break;
1387                 udelay(1);
1388         }
1389
1390         if (i >= adev->usec_timeout) {
1391                 r = -EINVAL;
1392                 DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
1393         }
1394
1395         /* deassert IB preemption */
1396         WREG32(sdma_gfx_preempt, 0);
1397
1398         /* deassert the preemption condition */
1399         amdgpu_ring_set_preempt_cond_exec(ring, true);
1400         return r;
1401 }
1402
1403 static int sdma_v5_2_set_trap_irq_state(struct amdgpu_device *adev,
1404                                         struct amdgpu_irq_src *source,
1405                                         unsigned type,
1406                                         enum amdgpu_interrupt_state state)
1407 {
1408         u32 sdma_cntl;
1409
1410         u32 reg_offset = sdma_v5_2_get_reg_offset(adev, type, mmSDMA0_CNTL);
1411
1412         sdma_cntl = RREG32(reg_offset);
1413         sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
1414                        state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
1415         WREG32(reg_offset, sdma_cntl);
1416
1417         return 0;
1418 }
1419
1420 static int sdma_v5_2_process_trap_irq(struct amdgpu_device *adev,
1421                                       struct amdgpu_irq_src *source,
1422                                       struct amdgpu_iv_entry *entry)
1423 {
1424         DRM_DEBUG("IH: SDMA trap\n");
1425         switch (entry->client_id) {
1426         case SOC15_IH_CLIENTID_SDMA0:
1427                 switch (entry->ring_id) {
1428                 case 0:
1429                         amdgpu_fence_process(&adev->sdma.instance[0].ring);
1430                         break;
1431                 case 1:
1432                         /* XXX compute */
1433                         break;
1434                 case 2:
1435                         /* XXX compute */
1436                         break;
1437                 case 3:
1438                         /* XXX page queue*/
1439                         break;
1440                 }
1441                 break;
1442         case SOC15_IH_CLIENTID_SDMA1:
1443                 switch (entry->ring_id) {
1444                 case 0:
1445                         amdgpu_fence_process(&adev->sdma.instance[1].ring);
1446                         break;
1447                 case 1:
1448                         /* XXX compute */
1449                         break;
1450                 case 2:
1451                         /* XXX compute */
1452                         break;
1453                 case 3:
1454                         /* XXX page queue*/
1455                         break;
1456                 }
1457                 break;
1458         case SOC15_IH_CLIENTID_SDMA2:
1459                 switch (entry->ring_id) {
1460                 case 0:
1461                         amdgpu_fence_process(&adev->sdma.instance[2].ring);
1462                         break;
1463                 case 1:
1464                         /* XXX compute */
1465                         break;
1466                 case 2:
1467                         /* XXX compute */
1468                         break;
1469                 case 3:
1470                         /* XXX page queue*/
1471                         break;
1472                 }
1473                 break;
1474         case SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid:
1475                 switch (entry->ring_id) {
1476                 case 0:
1477                         amdgpu_fence_process(&adev->sdma.instance[3].ring);
1478                         break;
1479                 case 1:
1480                         /* XXX compute */
1481                         break;
1482                 case 2:
1483                         /* XXX compute */
1484                         break;
1485                 case 3:
1486                         /* XXX page queue*/
1487                         break;
1488                 }
1489                 break;
1490         }
1491         return 0;
1492 }
1493
1494 static int sdma_v5_2_process_illegal_inst_irq(struct amdgpu_device *adev,
1495                                               struct amdgpu_irq_src *source,
1496                                               struct amdgpu_iv_entry *entry)
1497 {
1498         return 0;
1499 }
1500
1501 static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
1502                                                        bool enable)
1503 {
1504         uint32_t data, def;
1505         int i;
1506
1507         for (i = 0; i < adev->sdma.num_instances; i++) {
1508                 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
1509                         /* Enable sdma clock gating */
1510                         def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
1511                         data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
1512                                   SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
1513                                   SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
1514                                   SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
1515                                   SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK |
1516                                   SDMA0_CLK_CTRL__SOFT_OVERRIDER_REG_MASK);
1517                         if (def != data)
1518                                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data);
1519                 } else {
1520                         /* Disable sdma clock gating */
1521                         def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
1522                         data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
1523                                  SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
1524                                  SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
1525                                  SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
1526                                  SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK |
1527                                  SDMA0_CLK_CTRL__SOFT_OVERRIDER_REG_MASK);
1528                         if (def != data)
1529                                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data);
1530                 }
1531         }
1532 }
1533
1534 static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev,
1535                                                       bool enable)
1536 {
1537         uint32_t data, def;
1538         int i;
1539
1540         for (i = 0; i < adev->sdma.num_instances; i++) {
1541                 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
1542                         /* Enable sdma mem light sleep */
1543                         def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
1544                         data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1545                         if (def != data)
1546                                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data);
1547
1548                 } else {
1549                         /* Disable sdma mem light sleep */
1550                         def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
1551                         data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1552                         if (def != data)
1553                                 WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data);
1554
1555                 }
1556         }
1557 }
1558
1559 static int sdma_v5_2_set_clockgating_state(void *handle,
1560                                            enum amd_clockgating_state state)
1561 {
1562         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1563
1564         if (amdgpu_sriov_vf(adev))
1565                 return 0;
1566
1567         switch (adev->asic_type) {
1568         case CHIP_SIENNA_CICHLID:
1569         case CHIP_NAVY_FLOUNDER:
1570                 sdma_v5_2_update_medium_grain_clock_gating(adev,
1571                                 state == AMD_CG_STATE_GATE ? true : false);
1572                 sdma_v5_2_update_medium_grain_light_sleep(adev,
1573                                 state == AMD_CG_STATE_GATE ? true : false);
1574                 break;
1575         default:
1576                 break;
1577         }
1578
1579         return 0;
1580 }
1581
1582 static int sdma_v5_2_set_powergating_state(void *handle,
1583                                           enum amd_powergating_state state)
1584 {
1585         return 0;
1586 }
1587
1588 static void sdma_v5_2_get_clockgating_state(void *handle, u32 *flags)
1589 {
1590         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1591         int data;
1592
1593         if (amdgpu_sriov_vf(adev))
1594                 *flags = 0;
1595
1596         /* AMD_CG_SUPPORT_SDMA_LS */
1597         data = RREG32_KIQ(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL));
1598         if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
1599                 *flags |= AMD_CG_SUPPORT_SDMA_LS;
1600 }
1601
1602 const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
1603         .name = "sdma_v5_2",
1604         .early_init = sdma_v5_2_early_init,
1605         .late_init = NULL,
1606         .sw_init = sdma_v5_2_sw_init,
1607         .sw_fini = sdma_v5_2_sw_fini,
1608         .hw_init = sdma_v5_2_hw_init,
1609         .hw_fini = sdma_v5_2_hw_fini,
1610         .suspend = sdma_v5_2_suspend,
1611         .resume = sdma_v5_2_resume,
1612         .is_idle = sdma_v5_2_is_idle,
1613         .wait_for_idle = sdma_v5_2_wait_for_idle,
1614         .soft_reset = sdma_v5_2_soft_reset,
1615         .set_clockgating_state = sdma_v5_2_set_clockgating_state,
1616         .set_powergating_state = sdma_v5_2_set_powergating_state,
1617         .get_clockgating_state = sdma_v5_2_get_clockgating_state,
1618 };
1619
1620 static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
1621         .type = AMDGPU_RING_TYPE_SDMA,
1622         .align_mask = 0xf,
1623         .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
1624         .support_64bit_ptrs = true,
1625         .vmhub = AMDGPU_GFXHUB_0,
1626         .get_rptr = sdma_v5_2_ring_get_rptr,
1627         .get_wptr = sdma_v5_2_ring_get_wptr,
1628         .set_wptr = sdma_v5_2_ring_set_wptr,
1629         .emit_frame_size =
1630                 5 + /* sdma_v5_2_ring_init_cond_exec */
1631                 6 + /* sdma_v5_2_ring_emit_hdp_flush */
1632                 3 + /* hdp_invalidate */
1633                 6 + /* sdma_v5_2_ring_emit_pipeline_sync */
1634                 /* sdma_v5_2_ring_emit_vm_flush */
1635                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1636                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
1637                 10 + 10 + 10, /* sdma_v5_2_ring_emit_fence x3 for user fence, vm fence */
1638         .emit_ib_size = 7 + 6, /* sdma_v5_2_ring_emit_ib */
1639         .emit_ib = sdma_v5_2_ring_emit_ib,
1640         .emit_fence = sdma_v5_2_ring_emit_fence,
1641         .emit_pipeline_sync = sdma_v5_2_ring_emit_pipeline_sync,
1642         .emit_vm_flush = sdma_v5_2_ring_emit_vm_flush,
1643         .emit_hdp_flush = sdma_v5_2_ring_emit_hdp_flush,
1644         .test_ring = sdma_v5_2_ring_test_ring,
1645         .test_ib = sdma_v5_2_ring_test_ib,
1646         .insert_nop = sdma_v5_2_ring_insert_nop,
1647         .pad_ib = sdma_v5_2_ring_pad_ib,
1648         .emit_wreg = sdma_v5_2_ring_emit_wreg,
1649         .emit_reg_wait = sdma_v5_2_ring_emit_reg_wait,
1650         .emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
1651         .init_cond_exec = sdma_v5_2_ring_init_cond_exec,
1652         .patch_cond_exec = sdma_v5_2_ring_patch_cond_exec,
1653         .preempt_ib = sdma_v5_2_ring_preempt_ib,
1654 };
1655
1656 static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev)
1657 {
1658         int i;
1659
1660         for (i = 0; i < adev->sdma.num_instances; i++) {
1661                 adev->sdma.instance[i].ring.funcs = &sdma_v5_2_ring_funcs;
1662                 adev->sdma.instance[i].ring.me = i;
1663         }
1664 }
1665
1666 static const struct amdgpu_irq_src_funcs sdma_v5_2_trap_irq_funcs = {
1667         .set = sdma_v5_2_set_trap_irq_state,
1668         .process = sdma_v5_2_process_trap_irq,
1669 };
1670
1671 static const struct amdgpu_irq_src_funcs sdma_v5_2_illegal_inst_irq_funcs = {
1672         .process = sdma_v5_2_process_illegal_inst_irq,
1673 };
1674
1675 static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev)
1676 {
1677         adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
1678                                         adev->sdma.num_instances;
1679         adev->sdma.trap_irq.funcs = &sdma_v5_2_trap_irq_funcs;
1680         adev->sdma.illegal_inst_irq.funcs = &sdma_v5_2_illegal_inst_irq_funcs;
1681 }
1682
1683 /**
1684  * sdma_v5_2_emit_copy_buffer - copy buffer using the sDMA engine
1685  *
1686  * @ring: amdgpu_ring structure holding ring information
1687  * @src_offset: src GPU address
1688  * @dst_offset: dst GPU address
1689  * @byte_count: number of bytes to xfer
1690  *
1691  * Copy GPU buffers using the DMA engine.
1692  * Used by the amdgpu ttm implementation to move pages if
1693  * registered as the asic copy callback.
1694  */
1695 static void sdma_v5_2_emit_copy_buffer(struct amdgpu_ib *ib,
1696                                        uint64_t src_offset,
1697                                        uint64_t dst_offset,
1698                                        uint32_t byte_count,
1699                                        bool tmz)
1700 {
1701         ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
1702                 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
1703                 SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
1704         ib->ptr[ib->length_dw++] = byte_count - 1;
1705         ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1706         ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
1707         ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
1708         ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1709         ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1710 }
1711
1712 /**
1713  * sdma_v5_2_emit_fill_buffer - fill buffer using the sDMA engine
1714  *
1715  * @ring: amdgpu_ring structure holding ring information
1716  * @src_data: value to write to buffer
1717  * @dst_offset: dst GPU address
1718  * @byte_count: number of bytes to xfer
1719  *
1720  * Fill GPU buffers using the DMA engine.
1721  */
1722 static void sdma_v5_2_emit_fill_buffer(struct amdgpu_ib *ib,
1723                                        uint32_t src_data,
1724                                        uint64_t dst_offset,
1725                                        uint32_t byte_count)
1726 {
1727         ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
1728         ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1729         ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1730         ib->ptr[ib->length_dw++] = src_data;
1731         ib->ptr[ib->length_dw++] = byte_count - 1;
1732 }
1733
1734 static const struct amdgpu_buffer_funcs sdma_v5_2_buffer_funcs = {
1735         .copy_max_bytes = 0x400000,
1736         .copy_num_dw = 7,
1737         .emit_copy_buffer = sdma_v5_2_emit_copy_buffer,
1738
1739         .fill_max_bytes = 0x400000,
1740         .fill_num_dw = 5,
1741         .emit_fill_buffer = sdma_v5_2_emit_fill_buffer,
1742 };
1743
1744 static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev)
1745 {
1746         if (adev->mman.buffer_funcs == NULL) {
1747                 adev->mman.buffer_funcs = &sdma_v5_2_buffer_funcs;
1748                 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
1749         }
1750 }
1751
1752 static const struct amdgpu_vm_pte_funcs sdma_v5_2_vm_pte_funcs = {
1753         .copy_pte_num_dw = 7,
1754         .copy_pte = sdma_v5_2_vm_copy_pte,
1755         .write_pte = sdma_v5_2_vm_write_pte,
1756         .set_pte_pde = sdma_v5_2_vm_set_pte_pde,
1757 };
1758
1759 static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev)
1760 {
1761         unsigned i;
1762
1763         if (adev->vm_manager.vm_pte_funcs == NULL) {
1764                 adev->vm_manager.vm_pte_funcs = &sdma_v5_2_vm_pte_funcs;
1765                 for (i = 0; i < adev->sdma.num_instances; i++) {
1766                         adev->vm_manager.vm_pte_scheds[i] =
1767                                 &adev->sdma.instance[i].ring.sched;
1768                 }
1769                 adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
1770         }
1771 }
1772
1773 const struct amdgpu_ip_block_version sdma_v5_2_ip_block = {
1774         .type = AMD_IP_BLOCK_TYPE_SDMA,
1775         .major = 5,
1776         .minor = 2,
1777         .rev = 0,
1778         .funcs = &sdma_v5_2_ip_funcs,
1779 };
This page took 0.143486 seconds and 4 git commands to generate.