]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
Merge branch 'drm-next-4.2-amdgpu' of git://people.freedesktop.org/~agd5f/linux into...
[linux.git] / drivers / gpu / drm / amd / amdgpu / sdma_v3_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_ucode.h"
28 #include "amdgpu_trace.h"
29 #include "vi.h"
30 #include "vid.h"
31
32 #include "oss/oss_3_0_d.h"
33 #include "oss/oss_3_0_sh_mask.h"
34
35 #include "gmc/gmc_8_1_d.h"
36 #include "gmc/gmc_8_1_sh_mask.h"
37
38 #include "gca/gfx_8_0_d.h"
39 #include "gca/gfx_8_0_enum.h"
40 #include "gca/gfx_8_0_sh_mask.h"
41
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44
45 #include "tonga_sdma_pkt_open.h"
46
47 static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev);
48 static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev);
49 static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev);
50 static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev);
51
52 MODULE_FIRMWARE("amdgpu/tonga_sdma.bin");
53 MODULE_FIRMWARE("amdgpu/tonga_sdma1.bin");
54 MODULE_FIRMWARE("amdgpu/carrizo_sdma.bin");
55 MODULE_FIRMWARE("amdgpu/carrizo_sdma1.bin");
56
57 static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
58 {
59         SDMA0_REGISTER_OFFSET,
60         SDMA1_REGISTER_OFFSET
61 };
62
63 static const u32 golden_settings_tonga_a11[] =
64 {
65         mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
66         mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000,
67         mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100,
68         mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100,
69         mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100,
70         mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
71         mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000,
72         mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100,
73         mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100,
74         mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100,
75 };
76
77 static const u32 tonga_mgcg_cgcg_init[] =
78 {
79         mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100,
80         mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100
81 };
82
83 static const u32 cz_golden_settings_a11[] =
84 {
85         mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
86         mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000,
87         mmSDMA0_GFX_IB_CNTL, 0x00000100, 0x00000100,
88         mmSDMA0_POWER_CNTL, 0x00000800, 0x0003c800,
89         mmSDMA0_RLC0_IB_CNTL, 0x00000100, 0x00000100,
90         mmSDMA0_RLC1_IB_CNTL, 0x00000100, 0x00000100,
91         mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
92         mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000,
93         mmSDMA1_GFX_IB_CNTL, 0x00000100, 0x00000100,
94         mmSDMA1_POWER_CNTL, 0x00000800, 0x0003c800,
95         mmSDMA1_RLC0_IB_CNTL, 0x00000100, 0x00000100,
96         mmSDMA1_RLC1_IB_CNTL, 0x00000100, 0x00000100,
97 };
98
99 static const u32 cz_mgcg_cgcg_init[] =
100 {
101         mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100,
102         mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100
103 };
104
105 /*
106  * sDMA - System DMA
107  * Starting with CIK, the GPU has new asynchronous
108  * DMA engines.  These engines are used for compute
109  * and gfx.  There are two DMA engines (SDMA0, SDMA1)
110  * and each one supports 1 ring buffer used for gfx
111  * and 2 queues used for compute.
112  *
113  * The programming model is very similar to the CP
114  * (ring buffer, IBs, etc.), but sDMA has it's own
115  * packet format that is different from the PM4 format
116  * used by the CP. sDMA supports copying data, writing
117  * embedded data, solid fills, and a number of other
118  * things.  It also has support for tiling/detiling of
119  * buffers.
120  */
121
122 static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev)
123 {
124         switch (adev->asic_type) {
125         case CHIP_TONGA:
126                 amdgpu_program_register_sequence(adev,
127                                                  tonga_mgcg_cgcg_init,
128                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
129                 amdgpu_program_register_sequence(adev,
130                                                  golden_settings_tonga_a11,
131                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
132                 break;
133         case CHIP_CARRIZO:
134                 amdgpu_program_register_sequence(adev,
135                                                  cz_mgcg_cgcg_init,
136                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
137                 amdgpu_program_register_sequence(adev,
138                                                  cz_golden_settings_a11,
139                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
140                 break;
141         default:
142                 break;
143         }
144 }
145
146 /**
147  * sdma_v3_0_init_microcode - load ucode images from disk
148  *
149  * @adev: amdgpu_device pointer
150  *
151  * Use the firmware interface to load the ucode images into
152  * the driver (not loaded into hw).
153  * Returns 0 on success, error on failure.
154  */
155 static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
156 {
157         const char *chip_name;
158         char fw_name[30];
159         int err, i;
160         struct amdgpu_firmware_info *info = NULL;
161         const struct common_firmware_header *header = NULL;
162
163         DRM_DEBUG("\n");
164
165         switch (adev->asic_type) {
166         case CHIP_TONGA:
167                 chip_name = "tonga";
168                 break;
169         case CHIP_CARRIZO:
170                 chip_name = "carrizo";
171                 break;
172         default: BUG();
173         }
174
175         for (i = 0; i < SDMA_MAX_INSTANCE; i++) {
176                 if (i == 0)
177                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
178                 else
179                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
180                 err = request_firmware(&adev->sdma[i].fw, fw_name, adev->dev);
181                 if (err)
182                         goto out;
183                 err = amdgpu_ucode_validate(adev->sdma[i].fw);
184                 if (err)
185                         goto out;
186
187                 if (adev->firmware.smu_load) {
188                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
189                         info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
190                         info->fw = adev->sdma[i].fw;
191                         header = (const struct common_firmware_header *)info->fw->data;
192                         adev->firmware.fw_size +=
193                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
194                 }
195         }
196 out:
197         if (err) {
198                 printk(KERN_ERR
199                        "sdma_v3_0: Failed to load firmware \"%s\"\n",
200                        fw_name);
201                 for (i = 0; i < SDMA_MAX_INSTANCE; i++) {
202                         release_firmware(adev->sdma[i].fw);
203                         adev->sdma[i].fw = NULL;
204                 }
205         }
206         return err;
207 }
208
209 /**
210  * sdma_v3_0_ring_get_rptr - get the current read pointer
211  *
212  * @ring: amdgpu ring pointer
213  *
214  * Get the current rptr from the hardware (VI+).
215  */
216 static uint32_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
217 {
218         u32 rptr;
219
220         /* XXX check if swapping is necessary on BE */
221         rptr = ring->adev->wb.wb[ring->rptr_offs] >> 2;
222
223         return rptr;
224 }
225
226 /**
227  * sdma_v3_0_ring_get_wptr - get the current write pointer
228  *
229  * @ring: amdgpu ring pointer
230  *
231  * Get the current wptr from the hardware (VI+).
232  */
233 static uint32_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
234 {
235         struct amdgpu_device *adev = ring->adev;
236         u32 wptr;
237
238         if (ring->use_doorbell) {
239                 /* XXX check if swapping is necessary on BE */
240                 wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2;
241         } else {
242                 int me = (ring == &ring->adev->sdma[0].ring) ? 0 : 1;
243
244                 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) >> 2;
245         }
246
247         return wptr;
248 }
249
250 /**
251  * sdma_v3_0_ring_set_wptr - commit the write pointer
252  *
253  * @ring: amdgpu ring pointer
254  *
255  * Write the wptr back to the hardware (VI+).
256  */
257 static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
258 {
259         struct amdgpu_device *adev = ring->adev;
260
261         if (ring->use_doorbell) {
262                 /* XXX check if swapping is necessary on BE */
263                 adev->wb.wb[ring->wptr_offs] = ring->wptr << 2;
264                 WDOORBELL32(ring->doorbell_index, ring->wptr << 2);
265         } else {
266                 int me = (ring == &ring->adev->sdma[0].ring) ? 0 : 1;
267
268                 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2);
269         }
270 }
271
272 /**
273  * sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine
274  *
275  * @ring: amdgpu ring pointer
276  * @ib: IB object to schedule
277  *
278  * Schedule an IB in the DMA ring (VI).
279  */
280 static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
281                                    struct amdgpu_ib *ib)
282 {
283         u32 vmid = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf;
284         u32 next_rptr = ring->wptr + 5;
285
286         while ((next_rptr & 7) != 2)
287                 next_rptr++;
288         next_rptr += 6;
289
290         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
291                           SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
292         amdgpu_ring_write(ring, lower_32_bits(ring->next_rptr_gpu_addr) & 0xfffffffc);
293         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
294         amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
295         amdgpu_ring_write(ring, next_rptr);
296
297         /* IB packet must end on a 8 DW boundary */
298         while ((ring->wptr & 7) != 2)
299                 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_NOP));
300
301         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
302                           SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
303         /* base must be 32 byte aligned */
304         amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
305         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
306         amdgpu_ring_write(ring, ib->length_dw);
307         amdgpu_ring_write(ring, 0);
308         amdgpu_ring_write(ring, 0);
309
310 }
311
312 /**
313  * sdma_v3_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
314  *
315  * @ring: amdgpu ring pointer
316  *
317  * Emit an hdp flush packet on the requested DMA ring.
318  */
319 static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
320 {
321         u32 ref_and_mask = 0;
322
323         if (ring == &ring->adev->sdma[0].ring)
324                 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1);
325         else
326                 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1);
327
328         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
329                           SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
330                           SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
331         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE << 2);
332         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ << 2);
333         amdgpu_ring_write(ring, ref_and_mask); /* reference */
334         amdgpu_ring_write(ring, ref_and_mask); /* mask */
335         amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
336                           SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
337 }
338
339 /**
340  * sdma_v3_0_ring_emit_fence - emit a fence on the DMA ring
341  *
342  * @ring: amdgpu ring pointer
343  * @fence: amdgpu fence object
344  *
345  * Add a DMA fence packet to the ring to write
346  * the fence seq number and DMA trap packet to generate
347  * an interrupt if needed (VI).
348  */
349 static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
350                                       bool write64bits)
351 {
352         /* write the fence */
353         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
354         amdgpu_ring_write(ring, lower_32_bits(addr));
355         amdgpu_ring_write(ring, upper_32_bits(addr));
356         amdgpu_ring_write(ring, lower_32_bits(seq));
357
358         /* optionally write high bits as well */
359         if (write64bits) {
360                 addr += 4;
361                 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
362                 amdgpu_ring_write(ring, lower_32_bits(addr));
363                 amdgpu_ring_write(ring, upper_32_bits(addr));
364                 amdgpu_ring_write(ring, upper_32_bits(seq));
365         }
366
367         /* generate an interrupt */
368         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
369         amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
370 }
371
372
373 /**
374  * sdma_v3_0_ring_emit_semaphore - emit a semaphore on the dma ring
375  *
376  * @ring: amdgpu_ring structure holding ring information
377  * @semaphore: amdgpu semaphore object
378  * @emit_wait: wait or signal semaphore
379  *
380  * Add a DMA semaphore packet to the ring wait on or signal
381  * other rings (VI).
382  */
383 static bool sdma_v3_0_ring_emit_semaphore(struct amdgpu_ring *ring,
384                                           struct amdgpu_semaphore *semaphore,
385                                           bool emit_wait)
386 {
387         u64 addr = semaphore->gpu_addr;
388         u32 sig = emit_wait ? 0 : 1;
389
390         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SEM) |
391                           SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(sig));
392         amdgpu_ring_write(ring, lower_32_bits(addr) & 0xfffffff8);
393         amdgpu_ring_write(ring, upper_32_bits(addr));
394
395         return true;
396 }
397
398 /**
399  * sdma_v3_0_gfx_stop - stop the gfx async dma engines
400  *
401  * @adev: amdgpu_device pointer
402  *
403  * Stop the gfx async dma ring buffers (VI).
404  */
405 static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev)
406 {
407         struct amdgpu_ring *sdma0 = &adev->sdma[0].ring;
408         struct amdgpu_ring *sdma1 = &adev->sdma[1].ring;
409         u32 rb_cntl, ib_cntl;
410         int i;
411
412         if ((adev->mman.buffer_funcs_ring == sdma0) ||
413             (adev->mman.buffer_funcs_ring == sdma1))
414                 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
415
416         for (i = 0; i < SDMA_MAX_INSTANCE; i++) {
417                 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
418                 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
419                 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
420                 ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]);
421                 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
422                 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
423         }
424         sdma0->ready = false;
425         sdma1->ready = false;
426 }
427
428 /**
429  * sdma_v3_0_rlc_stop - stop the compute async dma engines
430  *
431  * @adev: amdgpu_device pointer
432  *
433  * Stop the compute async dma queues (VI).
434  */
435 static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev)
436 {
437         /* XXX todo */
438 }
439
440 /**
441  * sdma_v3_0_enable - stop the async dma engines
442  *
443  * @adev: amdgpu_device pointer
444  * @enable: enable/disable the DMA MEs.
445  *
446  * Halt or unhalt the async dma engines (VI).
447  */
448 static void sdma_v3_0_enable(struct amdgpu_device *adev, bool enable)
449 {
450         u32 f32_cntl;
451         int i;
452
453         if (enable == false) {
454                 sdma_v3_0_gfx_stop(adev);
455                 sdma_v3_0_rlc_stop(adev);
456         }
457
458         for (i = 0; i < SDMA_MAX_INSTANCE; i++) {
459                 f32_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]);
460                 if (enable)
461                         f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 0);
462                 else
463                         f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
464                 WREG32(mmSDMA0_F32_CNTL + sdma_offsets[i], f32_cntl);
465         }
466 }
467
468 /**
469  * sdma_v3_0_gfx_resume - setup and start the async dma engines
470  *
471  * @adev: amdgpu_device pointer
472  *
473  * Set up the gfx DMA ring buffers and enable them (VI).
474  * Returns 0 for success, error for failure.
475  */
476 static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
477 {
478         struct amdgpu_ring *ring;
479         u32 rb_cntl, ib_cntl;
480         u32 rb_bufsz;
481         u32 wb_offset;
482         u32 doorbell;
483         int i, j, r;
484
485         for (i = 0; i < SDMA_MAX_INSTANCE; i++) {
486                 ring = &adev->sdma[i].ring;
487                 wb_offset = (ring->rptr_offs * 4);
488
489                 mutex_lock(&adev->srbm_mutex);
490                 for (j = 0; j < 16; j++) {
491                         vi_srbm_select(adev, 0, 0, 0, j);
492                         /* SDMA GFX */
493                         WREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i], 0);
494                         WREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i], 0);
495                 }
496                 vi_srbm_select(adev, 0, 0, 0, 0);
497                 mutex_unlock(&adev->srbm_mutex);
498
499                 WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
500
501                 /* Set ring buffer size in dwords */
502                 rb_bufsz = order_base_2(ring->ring_size / 4);
503                 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
504                 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
505 #ifdef __BIG_ENDIAN
506                 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
507                 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
508                                         RPTR_WRITEBACK_SWAP_ENABLE, 1);
509 #endif
510                 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
511
512                 /* Initialize the ring buffer's read and write pointers */
513                 WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
514                 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
515
516                 /* set the wb address whether it's enabled or not */
517                 WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
518                        upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
519                 WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i],
520                        lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
521
522                 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
523
524                 WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
525                 WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
526
527                 ring->wptr = 0;
528                 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
529
530                 doorbell = RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]);
531
532                 if (ring->use_doorbell) {
533                         doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL,
534                                                  OFFSET, ring->doorbell_index);
535                         doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
536                 } else {
537                         doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
538                 }
539                 WREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i], doorbell);
540
541                 /* enable DMA RB */
542                 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
543                 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
544
545                 ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]);
546                 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
547 #ifdef __BIG_ENDIAN
548                 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
549 #endif
550                 /* enable DMA IBs */
551                 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
552
553                 ring->ready = true;
554
555                 r = amdgpu_ring_test_ring(ring);
556                 if (r) {
557                         ring->ready = false;
558                         return r;
559                 }
560
561                 if (adev->mman.buffer_funcs_ring == ring)
562                         amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size);
563         }
564
565         return 0;
566 }
567
568 /**
569  * sdma_v3_0_rlc_resume - setup and start the async dma engines
570  *
571  * @adev: amdgpu_device pointer
572  *
573  * Set up the compute DMA queues and enable them (VI).
574  * Returns 0 for success, error for failure.
575  */
576 static int sdma_v3_0_rlc_resume(struct amdgpu_device *adev)
577 {
578         /* XXX todo */
579         return 0;
580 }
581
582 /**
583  * sdma_v3_0_load_microcode - load the sDMA ME ucode
584  *
585  * @adev: amdgpu_device pointer
586  *
587  * Loads the sDMA0/1 ucode.
588  * Returns 0 for success, -EINVAL if the ucode is not available.
589  */
590 static int sdma_v3_0_load_microcode(struct amdgpu_device *adev)
591 {
592         const struct sdma_firmware_header_v1_0 *hdr;
593         const __le32 *fw_data;
594         u32 fw_size;
595         int i, j;
596
597         if (!adev->sdma[0].fw || !adev->sdma[1].fw)
598                 return -EINVAL;
599
600         /* halt the MEs */
601         sdma_v3_0_enable(adev, false);
602
603         for (i = 0; i < SDMA_MAX_INSTANCE; i++) {
604                 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data;
605                 amdgpu_ucode_print_sdma_hdr(&hdr->header);
606                 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
607                 adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
608
609                 fw_data = (const __le32 *)
610                         (adev->sdma[i].fw->data +
611                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
612                 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0);
613                 for (j = 0; j < fw_size; j++)
614                         WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++));
615                 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma[i].fw_version);
616         }
617
618         return 0;
619 }
620
621 /**
622  * sdma_v3_0_start - setup and start the async dma engines
623  *
624  * @adev: amdgpu_device pointer
625  *
626  * Set up the DMA engines and enable them (VI).
627  * Returns 0 for success, error for failure.
628  */
629 static int sdma_v3_0_start(struct amdgpu_device *adev)
630 {
631         int r;
632
633         if (!adev->firmware.smu_load) {
634                 r = sdma_v3_0_load_microcode(adev);
635                 if (r)
636                         return r;
637         } else {
638                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
639                                                 AMDGPU_UCODE_ID_SDMA0);
640                 if (r)
641                         return -EINVAL;
642                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
643                                                 AMDGPU_UCODE_ID_SDMA1);
644                 if (r)
645                         return -EINVAL;
646         }
647
648         /* unhalt the MEs */
649         sdma_v3_0_enable(adev, true);
650
651         /* start the gfx rings and rlc compute queues */
652         r = sdma_v3_0_gfx_resume(adev);
653         if (r)
654                 return r;
655         r = sdma_v3_0_rlc_resume(adev);
656         if (r)
657                 return r;
658
659         return 0;
660 }
661
662 /**
663  * sdma_v3_0_ring_test_ring - simple async dma engine test
664  *
665  * @ring: amdgpu_ring structure holding ring information
666  *
667  * Test the DMA engine by writing using it to write an
668  * value to memory. (VI).
669  * Returns 0 for success, error for failure.
670  */
671 static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
672 {
673         struct amdgpu_device *adev = ring->adev;
674         unsigned i;
675         unsigned index;
676         int r;
677         u32 tmp;
678         u64 gpu_addr;
679
680         r = amdgpu_wb_get(adev, &index);
681         if (r) {
682                 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
683                 return r;
684         }
685
686         gpu_addr = adev->wb.gpu_addr + (index * 4);
687         tmp = 0xCAFEDEAD;
688         adev->wb.wb[index] = cpu_to_le32(tmp);
689
690         r = amdgpu_ring_lock(ring, 5);
691         if (r) {
692                 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
693                 amdgpu_wb_free(adev, index);
694                 return r;
695         }
696
697         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
698                           SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
699         amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
700         amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
701         amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
702         amdgpu_ring_write(ring, 0xDEADBEEF);
703         amdgpu_ring_unlock_commit(ring);
704
705         for (i = 0; i < adev->usec_timeout; i++) {
706                 tmp = le32_to_cpu(adev->wb.wb[index]);
707                 if (tmp == 0xDEADBEEF)
708                         break;
709                 DRM_UDELAY(1);
710         }
711
712         if (i < adev->usec_timeout) {
713                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
714         } else {
715                 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
716                           ring->idx, tmp);
717                 r = -EINVAL;
718         }
719         amdgpu_wb_free(adev, index);
720
721         return r;
722 }
723
724 /**
725  * sdma_v3_0_ring_test_ib - test an IB on the DMA engine
726  *
727  * @ring: amdgpu_ring structure holding ring information
728  *
729  * Test a simple IB in the DMA ring (VI).
730  * Returns 0 on success, error on failure.
731  */
732 static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
733 {
734         struct amdgpu_device *adev = ring->adev;
735         struct amdgpu_ib ib;
736         unsigned i;
737         unsigned index;
738         int r;
739         u32 tmp = 0;
740         u64 gpu_addr;
741
742         r = amdgpu_wb_get(adev, &index);
743         if (r) {
744                 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
745                 return r;
746         }
747
748         gpu_addr = adev->wb.gpu_addr + (index * 4);
749         tmp = 0xCAFEDEAD;
750         adev->wb.wb[index] = cpu_to_le32(tmp);
751
752         r = amdgpu_ib_get(ring, NULL, 256, &ib);
753         if (r) {
754                 amdgpu_wb_free(adev, index);
755                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
756                 return r;
757         }
758
759         ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
760                 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
761         ib.ptr[1] = lower_32_bits(gpu_addr);
762         ib.ptr[2] = upper_32_bits(gpu_addr);
763         ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1);
764         ib.ptr[4] = 0xDEADBEEF;
765         ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
766         ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
767         ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
768         ib.length_dw = 8;
769
770         r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED);
771         if (r) {
772                 amdgpu_ib_free(adev, &ib);
773                 amdgpu_wb_free(adev, index);
774                 DRM_ERROR("amdgpu: failed to schedule ib (%d).\n", r);
775                 return r;
776         }
777         r = amdgpu_fence_wait(ib.fence, false);
778         if (r) {
779                 amdgpu_ib_free(adev, &ib);
780                 amdgpu_wb_free(adev, index);
781                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
782                 return r;
783         }
784         for (i = 0; i < adev->usec_timeout; i++) {
785                 tmp = le32_to_cpu(adev->wb.wb[index]);
786                 if (tmp == 0xDEADBEEF)
787                         break;
788                 DRM_UDELAY(1);
789         }
790         if (i < adev->usec_timeout) {
791                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
792                          ib.fence->ring->idx, i);
793         } else {
794                 DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
795                 r = -EINVAL;
796         }
797         amdgpu_ib_free(adev, &ib);
798         amdgpu_wb_free(adev, index);
799         return r;
800 }
801
802 /**
803  * sdma_v3_0_vm_copy_pte - update PTEs by copying them from the GART
804  *
805  * @ib: indirect buffer to fill with commands
806  * @pe: addr of the page entry
807  * @src: src addr to copy from
808  * @count: number of page entries to update
809  *
810  * Update PTEs by copying them from the GART using sDMA (CIK).
811  */
812 static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib,
813                                   uint64_t pe, uint64_t src,
814                                   unsigned count)
815 {
816         while (count) {
817                 unsigned bytes = count * 8;
818                 if (bytes > 0x1FFFF8)
819                         bytes = 0x1FFFF8;
820
821                 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
822                         SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
823                 ib->ptr[ib->length_dw++] = bytes;
824                 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
825                 ib->ptr[ib->length_dw++] = lower_32_bits(src);
826                 ib->ptr[ib->length_dw++] = upper_32_bits(src);
827                 ib->ptr[ib->length_dw++] = lower_32_bits(pe);
828                 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
829
830                 pe += bytes;
831                 src += bytes;
832                 count -= bytes / 8;
833         }
834 }
835
836 /**
837  * sdma_v3_0_vm_write_pte - update PTEs by writing them manually
838  *
839  * @ib: indirect buffer to fill with commands
840  * @pe: addr of the page entry
841  * @addr: dst addr to write into pe
842  * @count: number of page entries to update
843  * @incr: increase next addr by incr bytes
844  * @flags: access flags
845  *
846  * Update PTEs by writing them manually using sDMA (CIK).
847  */
848 static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib,
849                                    uint64_t pe,
850                                    uint64_t addr, unsigned count,
851                                    uint32_t incr, uint32_t flags)
852 {
853         uint64_t value;
854         unsigned ndw;
855
856         while (count) {
857                 ndw = count * 2;
858                 if (ndw > 0xFFFFE)
859                         ndw = 0xFFFFE;
860
861                 /* for non-physically contiguous pages (system) */
862                 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
863                         SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
864                 ib->ptr[ib->length_dw++] = pe;
865                 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
866                 ib->ptr[ib->length_dw++] = ndw;
867                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
868                         if (flags & AMDGPU_PTE_SYSTEM) {
869                                 value = amdgpu_vm_map_gart(ib->ring->adev, addr);
870                                 value &= 0xFFFFFFFFFFFFF000ULL;
871                         } else if (flags & AMDGPU_PTE_VALID) {
872                                 value = addr;
873                         } else {
874                                 value = 0;
875                         }
876                         addr += incr;
877                         value |= flags;
878                         ib->ptr[ib->length_dw++] = value;
879                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
880                 }
881         }
882 }
883
884 /**
885  * sdma_v3_0_vm_set_pte_pde - update the page tables using sDMA
886  *
887  * @ib: indirect buffer to fill with commands
888  * @pe: addr of the page entry
889  * @addr: dst addr to write into pe
890  * @count: number of page entries to update
891  * @incr: increase next addr by incr bytes
892  * @flags: access flags
893  *
894  * Update the page tables using sDMA (CIK).
895  */
896 static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib,
897                                      uint64_t pe,
898                                      uint64_t addr, unsigned count,
899                                      uint32_t incr, uint32_t flags)
900 {
901         uint64_t value;
902         unsigned ndw;
903
904         while (count) {
905                 ndw = count;
906                 if (ndw > 0x7FFFF)
907                         ndw = 0x7FFFF;
908
909                 if (flags & AMDGPU_PTE_VALID)
910                         value = addr;
911                 else
912                         value = 0;
913
914                 /* for physically contiguous pages (vram) */
915                 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
916                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
917                 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
918                 ib->ptr[ib->length_dw++] = flags; /* mask */
919                 ib->ptr[ib->length_dw++] = 0;
920                 ib->ptr[ib->length_dw++] = value; /* value */
921                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
922                 ib->ptr[ib->length_dw++] = incr; /* increment size */
923                 ib->ptr[ib->length_dw++] = 0;
924                 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
925
926                 pe += ndw * 8;
927                 addr += ndw * incr;
928                 count -= ndw;
929         }
930 }
931
932 /**
933  * sdma_v3_0_vm_pad_ib - pad the IB to the required number of dw
934  *
935  * @ib: indirect buffer to fill with padding
936  *
937  */
938 static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib)
939 {
940         while (ib->length_dw & 0x7)
941                 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
942 }
943
944 /**
945  * sdma_v3_0_ring_emit_vm_flush - cik vm flush using sDMA
946  *
947  * @ring: amdgpu_ring pointer
948  * @vm: amdgpu_vm pointer
949  *
950  * Update the page table base and flush the VM TLB
951  * using sDMA (VI).
952  */
953 static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
954                                          unsigned vm_id, uint64_t pd_addr)
955 {
956         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
957                           SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
958         if (vm_id < 8) {
959                 amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
960         } else {
961                 amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
962         }
963         amdgpu_ring_write(ring, pd_addr >> 12);
964
965         /* flush TLB */
966         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
967                           SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
968         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
969         amdgpu_ring_write(ring, 1 << vm_id);
970
971         /* wait for flush */
972         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
973                           SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
974                           SDMA_PKT_POLL_REGMEM_HEADER_FUNC(0)); /* always */
975         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
976         amdgpu_ring_write(ring, 0);
977         amdgpu_ring_write(ring, 0); /* reference */
978         amdgpu_ring_write(ring, 0); /* mask */
979         amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
980                           SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
981 }
982
983 static int sdma_v3_0_early_init(void *handle)
984 {
985         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
986
987         sdma_v3_0_set_ring_funcs(adev);
988         sdma_v3_0_set_buffer_funcs(adev);
989         sdma_v3_0_set_vm_pte_funcs(adev);
990         sdma_v3_0_set_irq_funcs(adev);
991
992         return 0;
993 }
994
995 static int sdma_v3_0_sw_init(void *handle)
996 {
997         struct amdgpu_ring *ring;
998         int r;
999         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1000
1001         /* SDMA trap event */
1002         r = amdgpu_irq_add_id(adev, 224, &adev->sdma_trap_irq);
1003         if (r)
1004                 return r;
1005
1006         /* SDMA Privileged inst */
1007         r = amdgpu_irq_add_id(adev, 241, &adev->sdma_illegal_inst_irq);
1008         if (r)
1009                 return r;
1010
1011         /* SDMA Privileged inst */
1012         r = amdgpu_irq_add_id(adev, 247, &adev->sdma_illegal_inst_irq);
1013         if (r)
1014                 return r;
1015
1016         r = sdma_v3_0_init_microcode(adev);
1017         if (r) {
1018                 DRM_ERROR("Failed to load sdma firmware!\n");
1019                 return r;
1020         }
1021
1022         ring = &adev->sdma[0].ring;
1023         ring->ring_obj = NULL;
1024         ring->use_doorbell = true;
1025         ring->doorbell_index = AMDGPU_DOORBELL_sDMA_ENGINE0;
1026
1027         ring = &adev->sdma[1].ring;
1028         ring->ring_obj = NULL;
1029         ring->use_doorbell = true;
1030         ring->doorbell_index = AMDGPU_DOORBELL_sDMA_ENGINE1;
1031
1032         ring = &adev->sdma[0].ring;
1033         sprintf(ring->name, "sdma0");
1034         r = amdgpu_ring_init(adev, ring, 256 * 1024,
1035                              SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf,
1036                              &adev->sdma_trap_irq, AMDGPU_SDMA_IRQ_TRAP0,
1037                              AMDGPU_RING_TYPE_SDMA);
1038         if (r)
1039                 return r;
1040
1041         ring = &adev->sdma[1].ring;
1042         sprintf(ring->name, "sdma1");
1043         r = amdgpu_ring_init(adev, ring, 256 * 1024,
1044                              SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf,
1045                              &adev->sdma_trap_irq, AMDGPU_SDMA_IRQ_TRAP1,
1046                              AMDGPU_RING_TYPE_SDMA);
1047         if (r)
1048                 return r;
1049
1050         return r;
1051 }
1052
1053 static int sdma_v3_0_sw_fini(void *handle)
1054 {
1055         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1056
1057         amdgpu_ring_fini(&adev->sdma[0].ring);
1058         amdgpu_ring_fini(&adev->sdma[1].ring);
1059
1060         return 0;
1061 }
1062
1063 static int sdma_v3_0_hw_init(void *handle)
1064 {
1065         int r;
1066         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1067
1068         sdma_v3_0_init_golden_registers(adev);
1069
1070         r = sdma_v3_0_start(adev);
1071         if (r)
1072                 return r;
1073
1074         return r;
1075 }
1076
1077 static int sdma_v3_0_hw_fini(void *handle)
1078 {
1079         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1080
1081         sdma_v3_0_enable(adev, false);
1082
1083         return 0;
1084 }
1085
1086 static int sdma_v3_0_suspend(void *handle)
1087 {
1088         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1089
1090         return sdma_v3_0_hw_fini(adev);
1091 }
1092
1093 static int sdma_v3_0_resume(void *handle)
1094 {
1095         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1096
1097         return sdma_v3_0_hw_init(adev);
1098 }
1099
1100 static bool sdma_v3_0_is_idle(void *handle)
1101 {
1102         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1103         u32 tmp = RREG32(mmSRBM_STATUS2);
1104
1105         if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
1106                    SRBM_STATUS2__SDMA1_BUSY_MASK))
1107             return false;
1108
1109         return true;
1110 }
1111
1112 static int sdma_v3_0_wait_for_idle(void *handle)
1113 {
1114         unsigned i;
1115         u32 tmp;
1116         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1117
1118         for (i = 0; i < adev->usec_timeout; i++) {
1119                 tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
1120                                 SRBM_STATUS2__SDMA1_BUSY_MASK);
1121
1122                 if (!tmp)
1123                         return 0;
1124                 udelay(1);
1125         }
1126         return -ETIMEDOUT;
1127 }
1128
1129 static void sdma_v3_0_print_status(void *handle)
1130 {
1131         int i, j;
1132         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1133
1134         dev_info(adev->dev, "VI SDMA registers\n");
1135         dev_info(adev->dev, "  SRBM_STATUS2=0x%08X\n",
1136                  RREG32(mmSRBM_STATUS2));
1137         for (i = 0; i < SDMA_MAX_INSTANCE; i++) {
1138                 dev_info(adev->dev, "  SDMA%d_STATUS_REG=0x%08X\n",
1139                          i, RREG32(mmSDMA0_STATUS_REG + sdma_offsets[i]));
1140                 dev_info(adev->dev, "  SDMA%d_F32_CNTL=0x%08X\n",
1141                          i, RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]));
1142                 dev_info(adev->dev, "  SDMA%d_CNTL=0x%08X\n",
1143                          i, RREG32(mmSDMA0_CNTL + sdma_offsets[i]));
1144                 dev_info(adev->dev, "  SDMA%d_SEM_WAIT_FAIL_TIMER_CNTL=0x%08X\n",
1145                          i, RREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i]));
1146                 dev_info(adev->dev, "  SDMA%d_GFX_IB_CNTL=0x%08X\n",
1147                          i, RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]));
1148                 dev_info(adev->dev, "  SDMA%d_GFX_RB_CNTL=0x%08X\n",
1149                          i, RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]));
1150                 dev_info(adev->dev, "  SDMA%d_GFX_RB_RPTR=0x%08X\n",
1151                          i, RREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i]));
1152                 dev_info(adev->dev, "  SDMA%d_GFX_RB_WPTR=0x%08X\n",
1153                          i, RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i]));
1154                 dev_info(adev->dev, "  SDMA%d_GFX_RB_RPTR_ADDR_HI=0x%08X\n",
1155                          i, RREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i]));
1156                 dev_info(adev->dev, "  SDMA%d_GFX_RB_RPTR_ADDR_LO=0x%08X\n",
1157                          i, RREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i]));
1158                 dev_info(adev->dev, "  SDMA%d_GFX_RB_BASE=0x%08X\n",
1159                          i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i]));
1160                 dev_info(adev->dev, "  SDMA%d_GFX_RB_BASE_HI=0x%08X\n",
1161                          i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i]));
1162                 dev_info(adev->dev, "  SDMA%d_GFX_DOORBELL=0x%08X\n",
1163                          i, RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]));
1164                 mutex_lock(&adev->srbm_mutex);
1165                 for (j = 0; j < 16; j++) {
1166                         vi_srbm_select(adev, 0, 0, 0, j);
1167                         dev_info(adev->dev, "  VM %d:\n", j);
1168                         dev_info(adev->dev, "  SDMA%d_GFX_VIRTUAL_ADDR=0x%08X\n",
1169                                  i, RREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i]));
1170                         dev_info(adev->dev, "  SDMA%d_GFX_APE1_CNTL=0x%08X\n",
1171                                  i, RREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i]));
1172                 }
1173                 vi_srbm_select(adev, 0, 0, 0, 0);
1174                 mutex_unlock(&adev->srbm_mutex);
1175         }
1176 }
1177
1178 static int sdma_v3_0_soft_reset(void *handle)
1179 {
1180         u32 srbm_soft_reset = 0;
1181         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1182         u32 tmp = RREG32(mmSRBM_STATUS2);
1183
1184         if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) {
1185                 /* sdma0 */
1186                 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
1187                 tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0);
1188                 WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
1189                 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK;
1190         }
1191         if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) {
1192                 /* sdma1 */
1193                 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
1194                 tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0);
1195                 WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
1196                 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK;
1197         }
1198
1199         if (srbm_soft_reset) {
1200                 sdma_v3_0_print_status((void *)adev);
1201
1202                 tmp = RREG32(mmSRBM_SOFT_RESET);
1203                 tmp |= srbm_soft_reset;
1204                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
1205                 WREG32(mmSRBM_SOFT_RESET, tmp);
1206                 tmp = RREG32(mmSRBM_SOFT_RESET);
1207
1208                 udelay(50);
1209
1210                 tmp &= ~srbm_soft_reset;
1211                 WREG32(mmSRBM_SOFT_RESET, tmp);
1212                 tmp = RREG32(mmSRBM_SOFT_RESET);
1213
1214                 /* Wait a little for things to settle down */
1215                 udelay(50);
1216
1217                 sdma_v3_0_print_status((void *)adev);
1218         }
1219
1220         return 0;
1221 }
1222
1223 static int sdma_v3_0_set_trap_irq_state(struct amdgpu_device *adev,
1224                                         struct amdgpu_irq_src *source,
1225                                         unsigned type,
1226                                         enum amdgpu_interrupt_state state)
1227 {
1228         u32 sdma_cntl;
1229
1230         switch (type) {
1231         case AMDGPU_SDMA_IRQ_TRAP0:
1232                 switch (state) {
1233                 case AMDGPU_IRQ_STATE_DISABLE:
1234                         sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
1235                         sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0);
1236                         WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl);
1237                         break;
1238                 case AMDGPU_IRQ_STATE_ENABLE:
1239                         sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
1240                         sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1);
1241                         WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl);
1242                         break;
1243                 default:
1244                         break;
1245                 }
1246                 break;
1247         case AMDGPU_SDMA_IRQ_TRAP1:
1248                 switch (state) {
1249                 case AMDGPU_IRQ_STATE_DISABLE:
1250                         sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
1251                         sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0);
1252                         WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl);
1253                         break;
1254                 case AMDGPU_IRQ_STATE_ENABLE:
1255                         sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
1256                         sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1);
1257                         WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl);
1258                         break;
1259                 default:
1260                         break;
1261                 }
1262                 break;
1263         default:
1264                 break;
1265         }
1266         return 0;
1267 }
1268
1269 static int sdma_v3_0_process_trap_irq(struct amdgpu_device *adev,
1270                                       struct amdgpu_irq_src *source,
1271                                       struct amdgpu_iv_entry *entry)
1272 {
1273         u8 instance_id, queue_id;
1274
1275         instance_id = (entry->ring_id & 0x3) >> 0;
1276         queue_id = (entry->ring_id & 0xc) >> 2;
1277         DRM_DEBUG("IH: SDMA trap\n");
1278         switch (instance_id) {
1279         case 0:
1280                 switch (queue_id) {
1281                 case 0:
1282                         amdgpu_fence_process(&adev->sdma[0].ring);
1283                         break;
1284                 case 1:
1285                         /* XXX compute */
1286                         break;
1287                 case 2:
1288                         /* XXX compute */
1289                         break;
1290                 }
1291                 break;
1292         case 1:
1293                 switch (queue_id) {
1294                 case 0:
1295                         amdgpu_fence_process(&adev->sdma[1].ring);
1296                         break;
1297                 case 1:
1298                         /* XXX compute */
1299                         break;
1300                 case 2:
1301                         /* XXX compute */
1302                         break;
1303                 }
1304                 break;
1305         }
1306         return 0;
1307 }
1308
1309 static int sdma_v3_0_process_illegal_inst_irq(struct amdgpu_device *adev,
1310                                               struct amdgpu_irq_src *source,
1311                                               struct amdgpu_iv_entry *entry)
1312 {
1313         DRM_ERROR("Illegal instruction in SDMA command stream\n");
1314         schedule_work(&adev->reset_work);
1315         return 0;
1316 }
1317
1318 static int sdma_v3_0_set_clockgating_state(void *handle,
1319                                           enum amd_clockgating_state state)
1320 {
1321         return 0;
1322 }
1323
1324 static int sdma_v3_0_set_powergating_state(void *handle,
1325                                           enum amd_powergating_state state)
1326 {
1327         return 0;
1328 }
1329
1330 const struct amd_ip_funcs sdma_v3_0_ip_funcs = {
1331         .early_init = sdma_v3_0_early_init,
1332         .late_init = NULL,
1333         .sw_init = sdma_v3_0_sw_init,
1334         .sw_fini = sdma_v3_0_sw_fini,
1335         .hw_init = sdma_v3_0_hw_init,
1336         .hw_fini = sdma_v3_0_hw_fini,
1337         .suspend = sdma_v3_0_suspend,
1338         .resume = sdma_v3_0_resume,
1339         .is_idle = sdma_v3_0_is_idle,
1340         .wait_for_idle = sdma_v3_0_wait_for_idle,
1341         .soft_reset = sdma_v3_0_soft_reset,
1342         .print_status = sdma_v3_0_print_status,
1343         .set_clockgating_state = sdma_v3_0_set_clockgating_state,
1344         .set_powergating_state = sdma_v3_0_set_powergating_state,
1345 };
1346
1347 /**
1348  * sdma_v3_0_ring_is_lockup - Check if the DMA engine is locked up
1349  *
1350  * @ring: amdgpu_ring structure holding ring information
1351  *
1352  * Check if the async DMA engine is locked up (VI).
1353  * Returns true if the engine appears to be locked up, false if not.
1354  */
1355 static bool sdma_v3_0_ring_is_lockup(struct amdgpu_ring *ring)
1356 {
1357
1358         if (sdma_v3_0_is_idle(ring->adev)) {
1359                 amdgpu_ring_lockup_update(ring);
1360                 return false;
1361         }
1362         return amdgpu_ring_test_lockup(ring);
1363 }
1364
1365 static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
1366         .get_rptr = sdma_v3_0_ring_get_rptr,
1367         .get_wptr = sdma_v3_0_ring_get_wptr,
1368         .set_wptr = sdma_v3_0_ring_set_wptr,
1369         .parse_cs = NULL,
1370         .emit_ib = sdma_v3_0_ring_emit_ib,
1371         .emit_fence = sdma_v3_0_ring_emit_fence,
1372         .emit_semaphore = sdma_v3_0_ring_emit_semaphore,
1373         .emit_vm_flush = sdma_v3_0_ring_emit_vm_flush,
1374         .emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush,
1375         .test_ring = sdma_v3_0_ring_test_ring,
1376         .test_ib = sdma_v3_0_ring_test_ib,
1377         .is_lockup = sdma_v3_0_ring_is_lockup,
1378 };
1379
1380 static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)
1381 {
1382         adev->sdma[0].ring.funcs = &sdma_v3_0_ring_funcs;
1383         adev->sdma[1].ring.funcs = &sdma_v3_0_ring_funcs;
1384 }
1385
1386 static const struct amdgpu_irq_src_funcs sdma_v3_0_trap_irq_funcs = {
1387         .set = sdma_v3_0_set_trap_irq_state,
1388         .process = sdma_v3_0_process_trap_irq,
1389 };
1390
1391 static const struct amdgpu_irq_src_funcs sdma_v3_0_illegal_inst_irq_funcs = {
1392         .process = sdma_v3_0_process_illegal_inst_irq,
1393 };
1394
1395 static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev)
1396 {
1397         adev->sdma_trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
1398         adev->sdma_trap_irq.funcs = &sdma_v3_0_trap_irq_funcs;
1399         adev->sdma_illegal_inst_irq.funcs = &sdma_v3_0_illegal_inst_irq_funcs;
1400 }
1401
1402 /**
1403  * sdma_v3_0_emit_copy_buffer - copy buffer using the sDMA engine
1404  *
1405  * @ring: amdgpu_ring structure holding ring information
1406  * @src_offset: src GPU address
1407  * @dst_offset: dst GPU address
1408  * @byte_count: number of bytes to xfer
1409  *
1410  * Copy GPU buffers using the DMA engine (VI).
1411  * Used by the amdgpu ttm implementation to move pages if
1412  * registered as the asic copy callback.
1413  */
1414 static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ring *ring,
1415                                        uint64_t src_offset,
1416                                        uint64_t dst_offset,
1417                                        uint32_t byte_count)
1418 {
1419         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
1420                           SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR));
1421         amdgpu_ring_write(ring, byte_count);
1422         amdgpu_ring_write(ring, 0); /* src/dst endian swap */
1423         amdgpu_ring_write(ring, lower_32_bits(src_offset));
1424         amdgpu_ring_write(ring, upper_32_bits(src_offset));
1425         amdgpu_ring_write(ring, lower_32_bits(dst_offset));
1426         amdgpu_ring_write(ring, upper_32_bits(dst_offset));
1427 }
1428
1429 /**
1430  * sdma_v3_0_emit_fill_buffer - fill buffer using the sDMA engine
1431  *
1432  * @ring: amdgpu_ring structure holding ring information
1433  * @src_data: value to write to buffer
1434  * @dst_offset: dst GPU address
1435  * @byte_count: number of bytes to xfer
1436  *
1437  * Fill GPU buffers using the DMA engine (VI).
1438  */
1439 static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ring *ring,
1440                                        uint32_t src_data,
1441                                        uint64_t dst_offset,
1442                                        uint32_t byte_count)
1443 {
1444         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL));
1445         amdgpu_ring_write(ring, lower_32_bits(dst_offset));
1446         amdgpu_ring_write(ring, upper_32_bits(dst_offset));
1447         amdgpu_ring_write(ring, src_data);
1448         amdgpu_ring_write(ring, byte_count);
1449 }
1450
1451 static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = {
1452         .copy_max_bytes = 0x1fffff,
1453         .copy_num_dw = 7,
1454         .emit_copy_buffer = sdma_v3_0_emit_copy_buffer,
1455
1456         .fill_max_bytes = 0x1fffff,
1457         .fill_num_dw = 5,
1458         .emit_fill_buffer = sdma_v3_0_emit_fill_buffer,
1459 };
1460
1461 static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev)
1462 {
1463         if (adev->mman.buffer_funcs == NULL) {
1464                 adev->mman.buffer_funcs = &sdma_v3_0_buffer_funcs;
1465                 adev->mman.buffer_funcs_ring = &adev->sdma[0].ring;
1466         }
1467 }
1468
1469 static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
1470         .copy_pte = sdma_v3_0_vm_copy_pte,
1471         .write_pte = sdma_v3_0_vm_write_pte,
1472         .set_pte_pde = sdma_v3_0_vm_set_pte_pde,
1473         .pad_ib = sdma_v3_0_vm_pad_ib,
1474 };
1475
1476 static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev)
1477 {
1478         if (adev->vm_manager.vm_pte_funcs == NULL) {
1479                 adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs;
1480                 adev->vm_manager.vm_pte_funcs_ring = &adev->sdma[0].ring;
1481         }
1482 }
This page took 0.118956 seconds and 4 git commands to generate.