]> Git Repo - linux.git/blob - drivers/gpu/drm/msm/adreno/a6xx_gpu.c
Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux...
[linux.git] / drivers / gpu / drm / msm / adreno / a6xx_gpu.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
3
4
5 #include "msm_gem.h"
6 #include "msm_mmu.h"
7 #include "msm_gpu_trace.h"
8 #include "a6xx_gpu.h"
9 #include "a6xx_gmu.xml.h"
10
11 #include <linux/bitfield.h>
12 #include <linux/devfreq.h>
13 #include <linux/pm_domain.h>
14 #include <linux/soc/qcom/llcc-qcom.h>
15
16 #define GPU_PAS_ID 13
17
18 static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
19 {
20         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
21         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
22
23         /* Check that the GMU is idle */
24         if (!a6xx_gmu_isidle(&a6xx_gpu->gmu))
25                 return false;
26
27         /* Check tha the CX master is idle */
28         if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) &
29                         ~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
30                 return false;
31
32         return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) &
33                 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
34 }
35
36 static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
37 {
38         /* wait for CP to drain ringbuffer: */
39         if (!adreno_idle(gpu, ring))
40                 return false;
41
42         if (spin_until(_a6xx_check_idle(gpu))) {
43                 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
44                         gpu->name, __builtin_return_address(0),
45                         gpu_read(gpu, REG_A6XX_RBBM_STATUS),
46                         gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS),
47                         gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
48                         gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
49                 return false;
50         }
51
52         return true;
53 }
54
55 static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
56 {
57         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
58         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
59
60         /* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */
61         if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) {
62                 OUT_PKT7(ring, CP_WHERE_AM_I, 2);
63                 OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring)));
64                 OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
65         }
66 }
67
68 static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
69 {
70         uint32_t wptr;
71         unsigned long flags;
72
73         update_shadow_rptr(gpu, ring);
74
75         spin_lock_irqsave(&ring->preempt_lock, flags);
76
77         /* Copy the shadow to the actual register */
78         ring->cur = ring->next;
79
80         /* Make sure to wrap wptr if we need to */
81         wptr = get_wptr(ring);
82
83         spin_unlock_irqrestore(&ring->preempt_lock, flags);
84
85         /* Make sure everything is posted before making a decision */
86         mb();
87
88         gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
89 }
90
91 static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter,
92                 u64 iova)
93 {
94         OUT_PKT7(ring, CP_REG_TO_MEM, 3);
95         OUT_RING(ring, CP_REG_TO_MEM_0_REG(counter) |
96                 CP_REG_TO_MEM_0_CNT(2) |
97                 CP_REG_TO_MEM_0_64B);
98         OUT_RING(ring, lower_32_bits(iova));
99         OUT_RING(ring, upper_32_bits(iova));
100 }
101
102 static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
103                 struct msm_ringbuffer *ring, struct msm_file_private *ctx)
104 {
105         bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
106         phys_addr_t ttbr;
107         u32 asid;
108         u64 memptr = rbmemptr(ring, ttbr0);
109
110         if (ctx->seqno == a6xx_gpu->base.base.cur_ctx_seqno)
111                 return;
112
113         if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid))
114                 return;
115
116         if (!sysprof) {
117                 /* Turn off protected mode to write to special registers */
118                 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
119                 OUT_RING(ring, 0);
120
121                 OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
122                 OUT_RING(ring, 1);
123         }
124
125         /* Execute the table update */
126         OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
127         OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr)));
128
129         OUT_RING(ring,
130                 CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) |
131                 CP_SMMU_TABLE_UPDATE_1_ASID(asid));
132         OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0));
133         OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0));
134
135         /*
136          * Write the new TTBR0 to the memstore. This is good for debugging.
137          */
138         OUT_PKT7(ring, CP_MEM_WRITE, 4);
139         OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr)));
140         OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr)));
141         OUT_RING(ring, lower_32_bits(ttbr));
142         OUT_RING(ring, (asid << 16) | upper_32_bits(ttbr));
143
144         /*
145          * And finally, trigger a uche flush to be sure there isn't anything
146          * lingering in that part of the GPU
147          */
148
149         OUT_PKT7(ring, CP_EVENT_WRITE, 1);
150         OUT_RING(ring, CACHE_INVALIDATE);
151
152         if (!sysprof) {
153                 /*
154                  * Wait for SRAM clear after the pgtable update, so the
155                  * two can happen in parallel:
156                  */
157                 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
158                 OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ));
159                 OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO(
160                                 REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS));
161                 OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0));
162                 OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1));
163                 OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1));
164                 OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0));
165
166                 /* Re-enable protected mode: */
167                 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
168                 OUT_RING(ring, 1);
169         }
170 }
171
172 static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
173 {
174         unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
175         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
176         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
177         struct msm_ringbuffer *ring = submit->ring;
178         unsigned int i, ibs = 0;
179
180         a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx);
181
182         get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
183                 rbmemptr_stats(ring, index, cpcycles_start));
184
185         /*
186          * For PM4 the GMU register offsets are calculated from the base of the
187          * GPU registers so we need to add 0x1a800 to the register value on A630
188          * to get the right value from PM4.
189          */
190         get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
191                 rbmemptr_stats(ring, index, alwayson_start));
192
193         /* Invalidate CCU depth and color */
194         OUT_PKT7(ring, CP_EVENT_WRITE, 1);
195         OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH));
196
197         OUT_PKT7(ring, CP_EVENT_WRITE, 1);
198         OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_COLOR));
199
200         /* Submit the commands */
201         for (i = 0; i < submit->nr_cmds; i++) {
202                 switch (submit->cmd[i].type) {
203                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
204                         break;
205                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
206                         if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
207                                 break;
208                         fallthrough;
209                 case MSM_SUBMIT_CMD_BUF:
210                         OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
211                         OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
212                         OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
213                         OUT_RING(ring, submit->cmd[i].size);
214                         ibs++;
215                         break;
216                 }
217
218                 /*
219                  * Periodically update shadow-wptr if needed, so that we
220                  * can see partial progress of submits with large # of
221                  * cmds.. otherwise we could needlessly stall waiting for
222                  * ringbuffer state, simply due to looking at a shadow
223                  * rptr value that has not been updated
224                  */
225                 if ((ibs % 32) == 0)
226                         update_shadow_rptr(gpu, ring);
227         }
228
229         get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
230                 rbmemptr_stats(ring, index, cpcycles_end));
231         get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
232                 rbmemptr_stats(ring, index, alwayson_end));
233
234         /* Write the fence to the scratch register */
235         OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
236         OUT_RING(ring, submit->seqno);
237
238         /*
239          * Execute a CACHE_FLUSH_TS event. This will ensure that the
240          * timestamp is written to the memory and then triggers the interrupt
241          */
242         OUT_PKT7(ring, CP_EVENT_WRITE, 4);
243         OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
244                 CP_EVENT_WRITE_0_IRQ);
245         OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
246         OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
247         OUT_RING(ring, submit->seqno);
248
249         trace_msm_gpu_submit_flush(submit,
250                 gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER));
251
252         a6xx_flush(gpu, ring);
253 }
254
255 /* For a615 family (a615, a616, a618 and a619) */
256 const struct adreno_reglist a615_hwcg[] = {
257         {REG_A6XX_RBBM_CLOCK_CNTL_SP0,  0x02222222},
258         {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
259         {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
260         {REG_A6XX_RBBM_CLOCK_HYST_SP0,  0x0000F3CF},
261         {REG_A6XX_RBBM_CLOCK_CNTL_TP0,  0x02222222},
262         {REG_A6XX_RBBM_CLOCK_CNTL_TP1,  0x02222222},
263         {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
264         {REG_A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
265         {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
266         {REG_A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222},
267         {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
268         {REG_A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222},
269         {REG_A6XX_RBBM_CLOCK_HYST_TP0,  0x77777777},
270         {REG_A6XX_RBBM_CLOCK_HYST_TP1,  0x77777777},
271         {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
272         {REG_A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
273         {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
274         {REG_A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777},
275         {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
276         {REG_A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777},
277         {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
278         {REG_A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
279         {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
280         {REG_A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
281         {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
282         {REG_A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111},
283         {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
284         {REG_A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111},
285         {REG_A6XX_RBBM_CLOCK_CNTL_UCHE,  0x22222222},
286         {REG_A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
287         {REG_A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
288         {REG_A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
289         {REG_A6XX_RBBM_CLOCK_HYST_UCHE,  0x00000004},
290         {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
291         {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
292         {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222},
293         {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002020},
294         {REG_A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220},
295         {REG_A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220},
296         {REG_A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220},
297         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
298         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040F00},
299         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040F00},
300         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040F00},
301         {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022},
302         {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
303         {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
304         {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
305         {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
306         {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
307         {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
308         {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
309         {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
310         {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
311         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
312         {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
313         {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
314         {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
315         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
316         {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
317         {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
318         {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
319         {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
320         {},
321 };
322
323 const struct adreno_reglist a630_hwcg[] = {
324         {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x22222222},
325         {REG_A6XX_RBBM_CLOCK_CNTL_SP1, 0x22222222},
326         {REG_A6XX_RBBM_CLOCK_CNTL_SP2, 0x22222222},
327         {REG_A6XX_RBBM_CLOCK_CNTL_SP3, 0x22222222},
328         {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02022220},
329         {REG_A6XX_RBBM_CLOCK_CNTL2_SP1, 0x02022220},
330         {REG_A6XX_RBBM_CLOCK_CNTL2_SP2, 0x02022220},
331         {REG_A6XX_RBBM_CLOCK_CNTL2_SP3, 0x02022220},
332         {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
333         {REG_A6XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
334         {REG_A6XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
335         {REG_A6XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
336         {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000f3cf},
337         {REG_A6XX_RBBM_CLOCK_HYST_SP1, 0x0000f3cf},
338         {REG_A6XX_RBBM_CLOCK_HYST_SP2, 0x0000f3cf},
339         {REG_A6XX_RBBM_CLOCK_HYST_SP3, 0x0000f3cf},
340         {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
341         {REG_A6XX_RBBM_CLOCK_CNTL_TP1, 0x02222222},
342         {REG_A6XX_RBBM_CLOCK_CNTL_TP2, 0x02222222},
343         {REG_A6XX_RBBM_CLOCK_CNTL_TP3, 0x02222222},
344         {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
345         {REG_A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
346         {REG_A6XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
347         {REG_A6XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
348         {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
349         {REG_A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222},
350         {REG_A6XX_RBBM_CLOCK_CNTL3_TP2, 0x22222222},
351         {REG_A6XX_RBBM_CLOCK_CNTL3_TP3, 0x22222222},
352         {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
353         {REG_A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222},
354         {REG_A6XX_RBBM_CLOCK_CNTL4_TP2, 0x00022222},
355         {REG_A6XX_RBBM_CLOCK_CNTL4_TP3, 0x00022222},
356         {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
357         {REG_A6XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
358         {REG_A6XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
359         {REG_A6XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
360         {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
361         {REG_A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
362         {REG_A6XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
363         {REG_A6XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
364         {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
365         {REG_A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777},
366         {REG_A6XX_RBBM_CLOCK_HYST3_TP2, 0x77777777},
367         {REG_A6XX_RBBM_CLOCK_HYST3_TP3, 0x77777777},
368         {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
369         {REG_A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777},
370         {REG_A6XX_RBBM_CLOCK_HYST4_TP2, 0x00077777},
371         {REG_A6XX_RBBM_CLOCK_HYST4_TP3, 0x00077777},
372         {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
373         {REG_A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
374         {REG_A6XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
375         {REG_A6XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
376         {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
377         {REG_A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
378         {REG_A6XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
379         {REG_A6XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
380         {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
381         {REG_A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111},
382         {REG_A6XX_RBBM_CLOCK_DELAY3_TP2, 0x11111111},
383         {REG_A6XX_RBBM_CLOCK_DELAY3_TP3, 0x11111111},
384         {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
385         {REG_A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111},
386         {REG_A6XX_RBBM_CLOCK_DELAY4_TP2, 0x00011111},
387         {REG_A6XX_RBBM_CLOCK_DELAY4_TP3, 0x00011111},
388         {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
389         {REG_A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
390         {REG_A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
391         {REG_A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
392         {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
393         {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
394         {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
395         {REG_A6XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
396         {REG_A6XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
397         {REG_A6XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
398         {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222},
399         {REG_A6XX_RBBM_CLOCK_CNTL2_RB1, 0x00002222},
400         {REG_A6XX_RBBM_CLOCK_CNTL2_RB2, 0x00002222},
401         {REG_A6XX_RBBM_CLOCK_CNTL2_RB3, 0x00002222},
402         {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
403         {REG_A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220},
404         {REG_A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220},
405         {REG_A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220},
406         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040f00},
407         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040f00},
408         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040f00},
409         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040f00},
410         {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022},
411         {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
412         {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
413         {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
414         {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
415         {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
416         {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
417         {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
418         {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
419         {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
420         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
421         {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
422         {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
423         {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
424         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
425         {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
426         {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
427         {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
428         {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
429         {},
430 };
431
432 const struct adreno_reglist a640_hwcg[] = {
433         {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
434         {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
435         {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
436         {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
437         {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
438         {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
439         {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
440         {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
441         {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
442         {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
443         {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
444         {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
445         {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
446         {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
447         {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
448         {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
449         {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
450         {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
451         {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
452         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
453         {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05222022},
454         {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
455         {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
456         {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
457         {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
458         {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
459         {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
460         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
461         {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
462         {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
463         {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
464         {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
465         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
466         {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
467         {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
468         {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
469         {REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
470         {REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
471         {REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
472         {REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000},
473         {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
474         {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
475         {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
476         {REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
477         {REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
478         {REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
479         {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
480         {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
481         {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
482         {},
483 };
484
485 const struct adreno_reglist a650_hwcg[] = {
486         {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
487         {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
488         {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
489         {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
490         {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
491         {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
492         {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
493         {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
494         {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
495         {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
496         {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
497         {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
498         {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
499         {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
500         {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
501         {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
502         {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
503         {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
504         {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
505         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
506         {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022},
507         {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
508         {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
509         {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
510         {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
511         {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
512         {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
513         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
514         {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
515         {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
516         {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
517         {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
518         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
519         {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
520         {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
521         {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
522         {REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
523         {REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
524         {REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
525         {REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000777},
526         {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
527         {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
528         {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
529         {REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
530         {REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
531         {REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
532         {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
533         {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
534         {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
535         {},
536 };
537
538 const struct adreno_reglist a660_hwcg[] = {
539         {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
540         {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
541         {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
542         {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
543         {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
544         {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
545         {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
546         {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
547         {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
548         {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
549         {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
550         {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
551         {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
552         {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
553         {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
554         {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
555         {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
556         {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
557         {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
558         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
559         {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022},
560         {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
561         {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
562         {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
563         {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
564         {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
565         {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
566         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
567         {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
568         {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
569         {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
570         {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
571         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
572         {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
573         {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
574         {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
575         {REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
576         {REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
577         {REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
578         {REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000},
579         {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
580         {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
581         {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
582         {REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
583         {REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
584         {REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
585         {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
586         {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
587         {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
588         {},
589 };
590
591 static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
592 {
593         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
594         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
595         struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
596         const struct adreno_reglist *reg;
597         unsigned int i;
598         u32 val, clock_cntl_on;
599
600         if (!adreno_gpu->info->hwcg)
601                 return;
602
603         if (adreno_is_a630(adreno_gpu))
604                 clock_cntl_on = 0x8aa8aa02;
605         else
606                 clock_cntl_on = 0x8aa8aa82;
607
608         val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL);
609
610         /* Don't re-program the registers if they are already correct */
611         if ((!state && !val) || (state && (val == clock_cntl_on)))
612                 return;
613
614         /* Disable SP clock before programming HWCG registers */
615         gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
616
617         for (i = 0; (reg = &adreno_gpu->info->hwcg[i], reg->offset); i++)
618                 gpu_write(gpu, reg->offset, state ? reg->value : 0);
619
620         /* Enable SP clock */
621         gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
622
623         gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0);
624 }
625
626 /* For a615, a616, a618, a619, a630, a640 and a680 */
627 static const u32 a6xx_protect[] = {
628         A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
629         A6XX_PROTECT_RDONLY(0x00501, 0x0005),
630         A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
631         A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
632         A6XX_PROTECT_NORDWR(0x00510, 0x0000),
633         A6XX_PROTECT_NORDWR(0x00534, 0x0000),
634         A6XX_PROTECT_NORDWR(0x00800, 0x0082),
635         A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
636         A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
637         A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
638         A6XX_PROTECT_NORDWR(0x00900, 0x004d),
639         A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
640         A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
641         A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
642         A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
643         A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
644         A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
645         A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
646         A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
647         A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
648         A6XX_PROTECT_NORDWR(0x09624, 0x01db),
649         A6XX_PROTECT_NORDWR(0x09e70, 0x0001),
650         A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
651         A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
652         A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
653         A6XX_PROTECT_NORDWR(0x0ae50, 0x032f),
654         A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
655         A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
656         A6XX_PROTECT_NORDWR(0x0be20, 0x17df),
657         A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
658         A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
659         A6XX_PROTECT_NORDWR(0x11c00, 0x0000), /* note: infinite range */
660 };
661
662 /* These are for a620 and a650 */
663 static const u32 a650_protect[] = {
664         A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
665         A6XX_PROTECT_RDONLY(0x00501, 0x0005),
666         A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
667         A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
668         A6XX_PROTECT_NORDWR(0x00510, 0x0000),
669         A6XX_PROTECT_NORDWR(0x00534, 0x0000),
670         A6XX_PROTECT_NORDWR(0x00800, 0x0082),
671         A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
672         A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
673         A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
674         A6XX_PROTECT_NORDWR(0x00900, 0x004d),
675         A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
676         A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
677         A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
678         A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
679         A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
680         A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
681         A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
682         A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
683         A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
684         A6XX_PROTECT_NORDWR(0x08e80, 0x027f),
685         A6XX_PROTECT_NORDWR(0x09624, 0x01db),
686         A6XX_PROTECT_NORDWR(0x09e60, 0x0011),
687         A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
688         A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
689         A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
690         A6XX_PROTECT_NORDWR(0x0ae50, 0x032f),
691         A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
692         A6XX_PROTECT_NORDWR(0x0b608, 0x0007),
693         A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
694         A6XX_PROTECT_NORDWR(0x0be20, 0x17df),
695         A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
696         A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
697         A6XX_PROTECT_NORDWR(0x18400, 0x1fff),
698         A6XX_PROTECT_NORDWR(0x1a800, 0x1fff),
699         A6XX_PROTECT_NORDWR(0x1f400, 0x0443),
700         A6XX_PROTECT_RDONLY(0x1f844, 0x007b),
701         A6XX_PROTECT_NORDWR(0x1f887, 0x001b),
702         A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */
703 };
704
705 /* These are for a635 and a660 */
706 static const u32 a660_protect[] = {
707         A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
708         A6XX_PROTECT_RDONLY(0x00501, 0x0005),
709         A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
710         A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
711         A6XX_PROTECT_NORDWR(0x00510, 0x0000),
712         A6XX_PROTECT_NORDWR(0x00534, 0x0000),
713         A6XX_PROTECT_NORDWR(0x00800, 0x0082),
714         A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
715         A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
716         A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
717         A6XX_PROTECT_NORDWR(0x00900, 0x004d),
718         A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
719         A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
720         A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
721         A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
722         A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
723         A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
724         A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
725         A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
726         A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
727         A6XX_PROTECT_NORDWR(0x08e80, 0x027f),
728         A6XX_PROTECT_NORDWR(0x09624, 0x01db),
729         A6XX_PROTECT_NORDWR(0x09e60, 0x0011),
730         A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
731         A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
732         A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
733         A6XX_PROTECT_NORDWR(0x0ae50, 0x012f),
734         A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
735         A6XX_PROTECT_NORDWR(0x0b608, 0x0006),
736         A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
737         A6XX_PROTECT_NORDWR(0x0be20, 0x015f),
738         A6XX_PROTECT_NORDWR(0x0d000, 0x05ff),
739         A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
740         A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
741         A6XX_PROTECT_NORDWR(0x18400, 0x1fff),
742         A6XX_PROTECT_NORDWR(0x1a400, 0x1fff),
743         A6XX_PROTECT_NORDWR(0x1f400, 0x0443),
744         A6XX_PROTECT_RDONLY(0x1f844, 0x007b),
745         A6XX_PROTECT_NORDWR(0x1f860, 0x0000),
746         A6XX_PROTECT_NORDWR(0x1f887, 0x001b),
747         A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */
748 };
749
750 static void a6xx_set_cp_protect(struct msm_gpu *gpu)
751 {
752         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
753         const u32 *regs = a6xx_protect;
754         unsigned i, count, count_max;
755
756         if (adreno_is_a650(adreno_gpu)) {
757                 regs = a650_protect;
758                 count = ARRAY_SIZE(a650_protect);
759                 count_max = 48;
760                 BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48);
761         } else if (adreno_is_a660_family(adreno_gpu)) {
762                 regs = a660_protect;
763                 count = ARRAY_SIZE(a660_protect);
764                 count_max = 48;
765                 BUILD_BUG_ON(ARRAY_SIZE(a660_protect) > 48);
766         } else {
767                 regs = a6xx_protect;
768                 count = ARRAY_SIZE(a6xx_protect);
769                 count_max = 32;
770                 BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32);
771         }
772
773         /*
774          * Enable access protection to privileged registers, fault on an access
775          * protect violation and select the last span to protect from the start
776          * address all the way to the end of the register address space
777          */
778         gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL, BIT(0) | BIT(1) | BIT(3));
779
780         for (i = 0; i < count - 1; i++)
781                 gpu_write(gpu, REG_A6XX_CP_PROTECT(i), regs[i]);
782         /* last CP_PROTECT to have "infinite" length on the last entry */
783         gpu_write(gpu, REG_A6XX_CP_PROTECT(count_max - 1), regs[i]);
784 }
785
786 static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
787 {
788         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
789         u32 lower_bit = 2;
790         u32 amsbc = 0;
791         u32 rgb565_predicator = 0;
792         u32 uavflagprd_inv = 0;
793
794         /* a618 is using the hw default values */
795         if (adreno_is_a618(adreno_gpu))
796                 return;
797
798         if (adreno_is_a640_family(adreno_gpu))
799                 amsbc = 1;
800
801         if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) {
802                 /* TODO: get ddr type from bootloader and use 2 for LPDDR4 */
803                 lower_bit = 3;
804                 amsbc = 1;
805                 rgb565_predicator = 1;
806                 uavflagprd_inv = 2;
807         }
808
809         if (adreno_is_7c3(adreno_gpu)) {
810                 lower_bit = 1;
811                 amsbc = 1;
812                 rgb565_predicator = 1;
813                 uavflagprd_inv = 2;
814         }
815
816         gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL,
817                 rgb565_predicator << 11 | amsbc << 4 | lower_bit << 1);
818         gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, lower_bit << 1);
819         gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
820                 uavflagprd_inv << 4 | lower_bit << 1);
821         gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, lower_bit << 21);
822 }
823
824 static int a6xx_cp_init(struct msm_gpu *gpu)
825 {
826         struct msm_ringbuffer *ring = gpu->rb[0];
827
828         OUT_PKT7(ring, CP_ME_INIT, 8);
829
830         OUT_RING(ring, 0x0000002f);
831
832         /* Enable multiple hardware contexts */
833         OUT_RING(ring, 0x00000003);
834
835         /* Enable error detection */
836         OUT_RING(ring, 0x20000000);
837
838         /* Don't enable header dump */
839         OUT_RING(ring, 0x00000000);
840         OUT_RING(ring, 0x00000000);
841
842         /* No workarounds enabled */
843         OUT_RING(ring, 0x00000000);
844
845         /* Pad rest of the cmds with 0's */
846         OUT_RING(ring, 0x00000000);
847         OUT_RING(ring, 0x00000000);
848
849         a6xx_flush(gpu, ring);
850         return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
851 }
852
853 /*
854  * Check that the microcode version is new enough to include several key
855  * security fixes. Return true if the ucode is safe.
856  */
857 static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
858                 struct drm_gem_object *obj)
859 {
860         struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
861         struct msm_gpu *gpu = &adreno_gpu->base;
862         const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE];
863         u32 *buf = msm_gem_get_vaddr(obj);
864         bool ret = false;
865
866         if (IS_ERR(buf))
867                 return false;
868
869         /*
870          * Targets up to a640 (a618, a630 and a640) need to check for a
871          * microcode version that is patched to support the whereami opcode or
872          * one that is new enough to include it by default.
873          *
874          * a650 tier targets don't need whereami but still need to be
875          * equal to or newer than 0.95 for other security fixes
876          *
877          * a660 targets have all the critical security fixes from the start
878          */
879         if (!strcmp(sqe_name, "a630_sqe.fw")) {
880                 /*
881                  * If the lowest nibble is 0xa that is an indication that this
882                  * microcode has been patched. The actual version is in dword
883                  * [3] but we only care about the patchlevel which is the lowest
884                  * nibble of dword [3]
885                  *
886                  * Otherwise check that the firmware is greater than or equal
887                  * to 1.90 which was the first version that had this fix built
888                  * in
889                  */
890                 if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) ||
891                         (buf[0] & 0xfff) >= 0x190) {
892                         a6xx_gpu->has_whereami = true;
893                         ret = true;
894                         goto out;
895                 }
896
897                 DRM_DEV_ERROR(&gpu->pdev->dev,
898                         "a630 SQE ucode is too old. Have version %x need at least %x\n",
899                         buf[0] & 0xfff, 0x190);
900         } else if (!strcmp(sqe_name, "a650_sqe.fw")) {
901                 if ((buf[0] & 0xfff) >= 0x095) {
902                         ret = true;
903                         goto out;
904                 }
905
906                 DRM_DEV_ERROR(&gpu->pdev->dev,
907                         "a650 SQE ucode is too old. Have version %x need at least %x\n",
908                         buf[0] & 0xfff, 0x095);
909         } else if (!strcmp(sqe_name, "a660_sqe.fw")) {
910                 ret = true;
911         } else {
912                 DRM_DEV_ERROR(&gpu->pdev->dev,
913                         "unknown GPU, add it to a6xx_ucode_check_version()!!\n");
914         }
915 out:
916         msm_gem_put_vaddr(obj);
917         return ret;
918 }
919
920 static int a6xx_ucode_load(struct msm_gpu *gpu)
921 {
922         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
923         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
924
925         if (!a6xx_gpu->sqe_bo) {
926                 a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu,
927                         adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova);
928
929                 if (IS_ERR(a6xx_gpu->sqe_bo)) {
930                         int ret = PTR_ERR(a6xx_gpu->sqe_bo);
931
932                         a6xx_gpu->sqe_bo = NULL;
933                         DRM_DEV_ERROR(&gpu->pdev->dev,
934                                 "Could not allocate SQE ucode: %d\n", ret);
935
936                         return ret;
937                 }
938
939                 msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw");
940                 if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) {
941                         msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
942                         drm_gem_object_put(a6xx_gpu->sqe_bo);
943
944                         a6xx_gpu->sqe_bo = NULL;
945                         return -EPERM;
946                 }
947         }
948
949         /*
950          * Expanded APRIV and targets that support WHERE_AM_I both need a
951          * privileged buffer to store the RPTR shadow
952          */
953         if ((adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) &&
954             !a6xx_gpu->shadow_bo) {
955                 a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
956                                                       sizeof(u32) * gpu->nr_rings,
957                                                       MSM_BO_WC | MSM_BO_MAP_PRIV,
958                                                       gpu->aspace, &a6xx_gpu->shadow_bo,
959                                                       &a6xx_gpu->shadow_iova);
960
961                 if (IS_ERR(a6xx_gpu->shadow))
962                         return PTR_ERR(a6xx_gpu->shadow);
963
964                 msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow");
965         }
966
967         return 0;
968 }
969
970 static int a6xx_zap_shader_init(struct msm_gpu *gpu)
971 {
972         static bool loaded;
973         int ret;
974
975         if (loaded)
976                 return 0;
977
978         ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
979
980         loaded = !ret;
981         return ret;
982 }
983
984 #define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
985           A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
986           A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
987           A6XX_RBBM_INT_0_MASK_CP_IB2 | \
988           A6XX_RBBM_INT_0_MASK_CP_IB1 | \
989           A6XX_RBBM_INT_0_MASK_CP_RB | \
990           A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
991           A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
992           A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
993           A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
994           A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
995
996 static int hw_init(struct msm_gpu *gpu)
997 {
998         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
999         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1000         int ret;
1001
1002         /* Make sure the GMU keeps the GPU on while we set it up */
1003         a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1004
1005         /* Clear GBIF halt in case GX domain was not collapsed */
1006         if (a6xx_has_gbif(adreno_gpu))
1007                 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0);
1008
1009         gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
1010
1011         /*
1012          * Disable the trusted memory range - we don't actually supported secure
1013          * memory rendering at this point in time and we don't want to block off
1014          * part of the virtual memory space.
1015          */
1016         gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000);
1017         gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
1018
1019         /* Turn on 64 bit addressing for all blocks */
1020         gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1);
1021         gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1);
1022         gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1);
1023         gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1);
1024         gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1);
1025         gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1);
1026         gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1);
1027         gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1);
1028         gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1);
1029         gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1);
1030         gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1);
1031         gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
1032
1033         /* enable hardware clockgating */
1034         a6xx_set_hwcg(gpu, true);
1035
1036         /* VBIF/GBIF start*/
1037         if (adreno_is_a640_family(adreno_gpu) ||
1038             adreno_is_a650_family(adreno_gpu)) {
1039                 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620);
1040                 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620);
1041                 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620);
1042                 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
1043                 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
1044                 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3);
1045         } else {
1046                 gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3);
1047         }
1048
1049         if (adreno_is_a630(adreno_gpu))
1050                 gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
1051
1052         /* Make all blocks contribute to the GPU BUSY perf counter */
1053         gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
1054
1055         /* Disable L2 bypass in the UCHE */
1056         gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, 0x0001ffffffffffc0llu);
1057         gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu);
1058         gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu);
1059
1060         if (!adreno_is_a650_family(adreno_gpu)) {
1061                 /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
1062                 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN, 0x00100000);
1063
1064                 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX,
1065                         0x00100000 + adreno_gpu->gmem - 1);
1066         }
1067
1068         gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804);
1069         gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4);
1070
1071         if (adreno_is_a640_family(adreno_gpu) ||
1072             adreno_is_a650_family(adreno_gpu))
1073                 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140);
1074         else
1075                 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0);
1076         gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
1077
1078         if (adreno_is_a660_family(adreno_gpu))
1079                 gpu_write(gpu, REG_A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020);
1080
1081         /* Setting the mem pool size */
1082         gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128);
1083
1084         /* Setting the primFifo thresholds default values,
1085          * and vccCacheSkipDis=1 bit (0x200) for A640 and newer
1086         */
1087         if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu))
1088                 gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200);
1089         else if (adreno_is_a640_family(adreno_gpu) || adreno_is_7c3(adreno_gpu))
1090                 gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00200200);
1091         else if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu))
1092                 gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200);
1093         else
1094                 gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00180000);
1095
1096         /* Set the AHB default slave response to "ERROR" */
1097         gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1);
1098
1099         /* Turn on performance counters */
1100         gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1);
1101
1102         /* Select CP0 to always count cycles */
1103         gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL(0), PERF_CP_ALWAYS_COUNT);
1104
1105         a6xx_set_ubwc_config(gpu);
1106
1107         /* Enable fault detection */
1108         gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL,
1109                 (1 << 30) | 0x1fffff);
1110
1111         gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, 1);
1112
1113         /* Set weights for bicubic filtering */
1114         if (adreno_is_a650_family(adreno_gpu)) {
1115                 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0);
1116                 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1,
1117                         0x3fe05ff4);
1118                 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2,
1119                         0x3fa0ebee);
1120                 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3,
1121                         0x3f5193ed);
1122                 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4,
1123                         0x3f0243f0);
1124         }
1125
1126         /* Protect registers from the CP */
1127         a6xx_set_cp_protect(gpu);
1128
1129         if (adreno_is_a660_family(adreno_gpu)) {
1130                 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1);
1131                 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0);
1132         }
1133
1134         /* Set dualQ + disable afull for A660 GPU */
1135         if (adreno_is_a660(adreno_gpu))
1136                 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906);
1137
1138         /* Enable expanded apriv for targets that support it */
1139         if (gpu->hw_apriv) {
1140                 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
1141                         (1 << 6) | (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1));
1142         }
1143
1144         /* Enable interrupts */
1145         gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, A6XX_INT_MASK);
1146
1147         ret = adreno_hw_init(gpu);
1148         if (ret)
1149                 goto out;
1150
1151         gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
1152
1153         /* Set the ringbuffer address */
1154         gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
1155
1156         /* Targets that support extended APRIV can use the RPTR shadow from
1157          * hardware but all the other ones need to disable the feature. Targets
1158          * that support the WHERE_AM_I opcode can use that instead
1159          */
1160         if (adreno_gpu->base.hw_apriv)
1161                 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
1162         else
1163                 gpu_write(gpu, REG_A6XX_CP_RB_CNTL,
1164                         MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
1165
1166         /* Configure the RPTR shadow if needed: */
1167         if (a6xx_gpu->shadow_bo) {
1168                 gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR,
1169                         shadowptr(a6xx_gpu, gpu->rb[0]));
1170         }
1171
1172         /* Always come up on rb 0 */
1173         a6xx_gpu->cur_ring = gpu->rb[0];
1174
1175         gpu->cur_ctx_seqno = 0;
1176
1177         /* Enable the SQE_to start the CP engine */
1178         gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1);
1179
1180         ret = a6xx_cp_init(gpu);
1181         if (ret)
1182                 goto out;
1183
1184         /*
1185          * Try to load a zap shader into the secure world. If successful
1186          * we can use the CP to switch out of secure mode. If not then we
1187          * have no resource but to try to switch ourselves out manually. If we
1188          * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
1189          * be blocked and a permissions violation will soon follow.
1190          */
1191         ret = a6xx_zap_shader_init(gpu);
1192         if (!ret) {
1193                 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
1194                 OUT_RING(gpu->rb[0], 0x00000000);
1195
1196                 a6xx_flush(gpu, gpu->rb[0]);
1197                 if (!a6xx_idle(gpu, gpu->rb[0]))
1198                         return -EINVAL;
1199         } else if (ret == -ENODEV) {
1200                 /*
1201                  * This device does not use zap shader (but print a warning
1202                  * just in case someone got their dt wrong.. hopefully they
1203                  * have a debug UART to realize the error of their ways...
1204                  * if you mess this up you are about to crash horribly)
1205                  */
1206                 dev_warn_once(gpu->dev->dev,
1207                         "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
1208                 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
1209                 ret = 0;
1210         } else {
1211                 return ret;
1212         }
1213
1214 out:
1215         /*
1216          * Tell the GMU that we are done touching the GPU and it can start power
1217          * management
1218          */
1219         a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1220
1221         if (a6xx_gpu->gmu.legacy) {
1222                 /* Take the GMU out of its special boot mode */
1223                 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER);
1224         }
1225
1226         return ret;
1227 }
1228
1229 static int a6xx_hw_init(struct msm_gpu *gpu)
1230 {
1231         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1232         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1233         int ret;
1234
1235         mutex_lock(&a6xx_gpu->gmu.lock);
1236         ret = hw_init(gpu);
1237         mutex_unlock(&a6xx_gpu->gmu.lock);
1238
1239         return ret;
1240 }
1241
1242 static void a6xx_dump(struct msm_gpu *gpu)
1243 {
1244         DRM_DEV_INFO(&gpu->pdev->dev, "status:   %08x\n",
1245                         gpu_read(gpu, REG_A6XX_RBBM_STATUS));
1246         adreno_dump(gpu);
1247 }
1248
1249 #define VBIF_RESET_ACK_TIMEOUT  100
1250 #define VBIF_RESET_ACK_MASK     0x00f0
1251
1252 static void a6xx_recover(struct msm_gpu *gpu)
1253 {
1254         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1255         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1256         struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1257         int i, active_submits;
1258
1259         adreno_dump_info(gpu);
1260
1261         for (i = 0; i < 8; i++)
1262                 DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i,
1263                         gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i)));
1264
1265         if (hang_debug)
1266                 a6xx_dump(gpu);
1267
1268         /*
1269          * To handle recovery specific sequences during the rpm suspend we are
1270          * about to trigger
1271          */
1272         a6xx_gpu->hung = true;
1273
1274         /* Halt SQE first */
1275         gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
1276
1277         /*
1278          * Turn off keep alive that might have been enabled by the hang
1279          * interrupt
1280          */
1281         gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 0);
1282
1283         pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
1284
1285         /* active_submit won't change until we make a submission */
1286         mutex_lock(&gpu->active_lock);
1287         active_submits = gpu->active_submits;
1288
1289         /*
1290          * Temporarily clear active_submits count to silence a WARN() in the
1291          * runtime suspend cb
1292          */
1293         gpu->active_submits = 0;
1294
1295         reinit_completion(&gmu->pd_gate);
1296         dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb);
1297         dev_pm_genpd_synced_poweroff(gmu->cxpd);
1298
1299         /* Drop the rpm refcount from active submits */
1300         if (active_submits)
1301                 pm_runtime_put(&gpu->pdev->dev);
1302
1303         /* And the final one from recover worker */
1304         pm_runtime_put_sync(&gpu->pdev->dev);
1305
1306         if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000)))
1307                 DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n");
1308
1309         dev_pm_genpd_remove_notifier(gmu->cxpd);
1310
1311         pm_runtime_use_autosuspend(&gpu->pdev->dev);
1312
1313         if (active_submits)
1314                 pm_runtime_get(&gpu->pdev->dev);
1315
1316         pm_runtime_get_sync(&gpu->pdev->dev);
1317
1318         gpu->active_submits = active_submits;
1319         mutex_unlock(&gpu->active_lock);
1320
1321         msm_gpu_hw_init(gpu);
1322         a6xx_gpu->hung = false;
1323 }
1324
1325 static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
1326 {
1327         static const char *uche_clients[7] = {
1328                 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ",
1329         };
1330         u32 val;
1331
1332         if (mid < 1 || mid > 3)
1333                 return "UNKNOWN";
1334
1335         /*
1336          * The source of the data depends on the mid ID read from FSYNR1.
1337          * and the client ID read from the UCHE block
1338          */
1339         val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF);
1340
1341         /* mid = 3 is most precise and refers to only one block per client */
1342         if (mid == 3)
1343                 return uche_clients[val & 7];
1344
1345         /* For mid=2 the source is TP or VFD except when the client id is 0 */
1346         if (mid == 2)
1347                 return ((val & 7) == 0) ? "TP" : "TP|VFD";
1348
1349         /* For mid=1 just return "UCHE" as a catchall for everything else */
1350         return "UCHE";
1351 }
1352
1353 static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id)
1354 {
1355         if (id == 0)
1356                 return "CP";
1357         else if (id == 4)
1358                 return "CCU";
1359         else if (id == 6)
1360                 return "CDP Prefetch";
1361
1362         return a6xx_uche_fault_block(gpu, id);
1363 }
1364
1365 static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1366 {
1367         struct msm_gpu *gpu = arg;
1368         struct adreno_smmu_fault_info *info = data;
1369         const char *block = "unknown";
1370
1371         u32 scratch[] = {
1372                         gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
1373                         gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
1374                         gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
1375                         gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)),
1376         };
1377
1378         if (info)
1379                 block = a6xx_fault_block(gpu, info->fsynr1 & 0xff);
1380
1381         return adreno_fault_handler(gpu, iova, flags, info, block, scratch);
1382 }
1383
1384 static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu)
1385 {
1386         u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS);
1387
1388         if (status & A6XX_CP_INT_CP_OPCODE_ERROR) {
1389                 u32 val;
1390
1391                 gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1);
1392                 val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA);
1393                 dev_err_ratelimited(&gpu->pdev->dev,
1394                         "CP | opcode error | possible opcode=0x%8.8X\n",
1395                         val);
1396         }
1397
1398         if (status & A6XX_CP_INT_CP_UCODE_ERROR)
1399                 dev_err_ratelimited(&gpu->pdev->dev,
1400                         "CP ucode error interrupt\n");
1401
1402         if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR)
1403                 dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n",
1404                         gpu_read(gpu, REG_A6XX_CP_HW_FAULT));
1405
1406         if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
1407                 u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS);
1408
1409                 dev_err_ratelimited(&gpu->pdev->dev,
1410                         "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
1411                         val & (1 << 20) ? "READ" : "WRITE",
1412                         (val & 0x3ffff), val);
1413         }
1414
1415         if (status & A6XX_CP_INT_CP_AHB_ERROR)
1416                 dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n");
1417
1418         if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR)
1419                 dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n");
1420
1421         if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR)
1422                 dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n");
1423
1424 }
1425
1426 static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
1427 {
1428         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1429         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1430         struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1431
1432         /*
1433          * If stalled on SMMU fault, we could trip the GPU's hang detection,
1434          * but the fault handler will trigger the devcore dump, and we want
1435          * to otherwise resume normally rather than killing the submit, so
1436          * just bail.
1437          */
1438         if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT)
1439                 return;
1440
1441         /*
1442          * Force the GPU to stay on until after we finish
1443          * collecting information
1444          */
1445         gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
1446
1447         DRM_DEV_ERROR(&gpu->pdev->dev,
1448                 "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1449                 ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
1450                 gpu_read(gpu, REG_A6XX_RBBM_STATUS),
1451                 gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
1452                 gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1453                 gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
1454                 gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
1455                 gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
1456                 gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
1457
1458         /* Turn off the hangcheck timer to keep it from bothering us */
1459         del_timer(&gpu->hangcheck_timer);
1460
1461         kthread_queue_work(gpu->worker, &gpu->recover_work);
1462 }
1463
1464 static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
1465 {
1466         struct msm_drm_private *priv = gpu->dev->dev_private;
1467         u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS);
1468
1469         gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status);
1470
1471         if (priv->disable_err_irq)
1472                 status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS;
1473
1474         if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
1475                 a6xx_fault_detect_irq(gpu);
1476
1477         if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR)
1478                 dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n");
1479
1480         if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1481                 a6xx_cp_hw_err_irq(gpu);
1482
1483         if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
1484                 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
1485
1486         if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1487                 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
1488
1489         if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1490                 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
1491
1492         if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS)
1493                 msm_gpu_retire(gpu);
1494
1495         return IRQ_HANDLED;
1496 }
1497
1498 static void a6xx_llc_rmw(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 mask, u32 or)
1499 {
1500         return msm_rmw(a6xx_gpu->llc_mmio + (reg << 2), mask, or);
1501 }
1502
1503 static void a6xx_llc_write(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 value)
1504 {
1505         msm_writel(value, a6xx_gpu->llc_mmio + (reg << 2));
1506 }
1507
1508 static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
1509 {
1510         llcc_slice_deactivate(a6xx_gpu->llc_slice);
1511         llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
1512 }
1513
1514 static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
1515 {
1516         struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1517         struct msm_gpu *gpu = &adreno_gpu->base;
1518         u32 cntl1_regval = 0;
1519
1520         if (IS_ERR(a6xx_gpu->llc_mmio))
1521                 return;
1522
1523         if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
1524                 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
1525
1526                 gpu_scid &= 0x1f;
1527                 cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) |
1528                                (gpu_scid << 15) | (gpu_scid << 20);
1529
1530                 /* On A660, the SCID programming for UCHE traffic is done in
1531                  * A6XX_GBIF_SCACHE_CNTL0[14:10]
1532                  */
1533                 if (adreno_is_a660_family(adreno_gpu))
1534                         gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) |
1535                                 (1 << 8), (gpu_scid << 10) | (1 << 8));
1536         }
1537
1538         /*
1539          * For targets with a MMU500, activate the slice but don't program the
1540          * register.  The XBL will take care of that.
1541          */
1542         if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
1543                 if (!a6xx_gpu->have_mmu500) {
1544                         u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
1545
1546                         gpuhtw_scid &= 0x1f;
1547                         cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
1548                 }
1549         }
1550
1551         if (!cntl1_regval)
1552                 return;
1553
1554         /*
1555          * Program the slice IDs for the various GPU blocks and GPU MMU
1556          * pagetables
1557          */
1558         if (!a6xx_gpu->have_mmu500) {
1559                 a6xx_llc_write(a6xx_gpu,
1560                         REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval);
1561
1562                 /*
1563                  * Program cacheability overrides to not allocate cache
1564                  * lines on a write miss
1565                  */
1566                 a6xx_llc_rmw(a6xx_gpu,
1567                         REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03);
1568                 return;
1569         }
1570
1571         gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval);
1572 }
1573
1574 static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
1575 {
1576         llcc_slice_putd(a6xx_gpu->llc_slice);
1577         llcc_slice_putd(a6xx_gpu->htw_llc_slice);
1578 }
1579
1580 static void a6xx_llc_slices_init(struct platform_device *pdev,
1581                 struct a6xx_gpu *a6xx_gpu)
1582 {
1583         struct device_node *phandle;
1584
1585         /*
1586          * There is a different programming path for targets with an mmu500
1587          * attached, so detect if that is the case
1588          */
1589         phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0);
1590         a6xx_gpu->have_mmu500 = (phandle &&
1591                 of_device_is_compatible(phandle, "arm,mmu-500"));
1592         of_node_put(phandle);
1593
1594         if (a6xx_gpu->have_mmu500)
1595                 a6xx_gpu->llc_mmio = NULL;
1596         else
1597                 a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem");
1598
1599         a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
1600         a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
1601
1602         if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
1603                 a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
1604 }
1605
1606 static int a6xx_pm_resume(struct msm_gpu *gpu)
1607 {
1608         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1609         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1610         int ret;
1611
1612         gpu->needs_hw_init = true;
1613
1614         trace_msm_gpu_resume(0);
1615
1616         mutex_lock(&a6xx_gpu->gmu.lock);
1617         ret = a6xx_gmu_resume(a6xx_gpu);
1618         mutex_unlock(&a6xx_gpu->gmu.lock);
1619         if (ret)
1620                 return ret;
1621
1622         msm_devfreq_resume(gpu);
1623
1624         a6xx_llc_activate(a6xx_gpu);
1625
1626         return 0;
1627 }
1628
1629 static int a6xx_pm_suspend(struct msm_gpu *gpu)
1630 {
1631         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1632         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1633         int i, ret;
1634
1635         trace_msm_gpu_suspend(0);
1636
1637         a6xx_llc_deactivate(a6xx_gpu);
1638
1639         msm_devfreq_suspend(gpu);
1640
1641         mutex_lock(&a6xx_gpu->gmu.lock);
1642         ret = a6xx_gmu_stop(a6xx_gpu);
1643         mutex_unlock(&a6xx_gpu->gmu.lock);
1644         if (ret)
1645                 return ret;
1646
1647         if (a6xx_gpu->shadow_bo)
1648                 for (i = 0; i < gpu->nr_rings; i++)
1649                         a6xx_gpu->shadow[i] = 0;
1650
1651         gpu->suspend_count++;
1652
1653         return 0;
1654 }
1655
1656 static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1657 {
1658         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1659         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1660
1661         mutex_lock(&a6xx_gpu->gmu.lock);
1662
1663         /* Force the GPU power on so we can read this register */
1664         a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
1665
1666         *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER);
1667
1668         a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
1669
1670         mutex_unlock(&a6xx_gpu->gmu.lock);
1671
1672         return 0;
1673 }
1674
1675 static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu)
1676 {
1677         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1678         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1679
1680         return a6xx_gpu->cur_ring;
1681 }
1682
1683 static void a6xx_destroy(struct msm_gpu *gpu)
1684 {
1685         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1686         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1687
1688         if (a6xx_gpu->sqe_bo) {
1689                 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
1690                 drm_gem_object_put(a6xx_gpu->sqe_bo);
1691         }
1692
1693         if (a6xx_gpu->shadow_bo) {
1694                 msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->aspace);
1695                 drm_gem_object_put(a6xx_gpu->shadow_bo);
1696         }
1697
1698         a6xx_llc_slices_destroy(a6xx_gpu);
1699
1700         mutex_lock(&a6xx_gpu->gmu.lock);
1701         a6xx_gmu_remove(a6xx_gpu);
1702         mutex_unlock(&a6xx_gpu->gmu.lock);
1703
1704         adreno_gpu_cleanup(adreno_gpu);
1705
1706         kfree(a6xx_gpu);
1707 }
1708
1709 static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
1710 {
1711         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1712         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1713         u64 busy_cycles;
1714
1715         /* 19.2MHz */
1716         *out_sample_rate = 19200000;
1717
1718         busy_cycles = gmu_read64(&a6xx_gpu->gmu,
1719                         REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
1720                         REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
1721
1722         return busy_cycles;
1723 }
1724
1725 static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
1726                               bool suspended)
1727 {
1728         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1729         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1730
1731         mutex_lock(&a6xx_gpu->gmu.lock);
1732         a6xx_gmu_set_freq(gpu, opp, suspended);
1733         mutex_unlock(&a6xx_gpu->gmu.lock);
1734 }
1735
1736 static struct msm_gem_address_space *
1737 a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev)
1738 {
1739         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1740         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1741         unsigned long quirks = 0;
1742
1743         /*
1744          * This allows GPU to set the bus attributes required to use system
1745          * cache on behalf of the iommu page table walker.
1746          */
1747         if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
1748                 quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
1749
1750         return adreno_iommu_create_address_space(gpu, pdev, quirks);
1751 }
1752
1753 static struct msm_gem_address_space *
1754 a6xx_create_private_address_space(struct msm_gpu *gpu)
1755 {
1756         struct msm_mmu *mmu;
1757
1758         mmu = msm_iommu_pagetable_create(gpu->aspace->mmu);
1759
1760         if (IS_ERR(mmu))
1761                 return ERR_CAST(mmu);
1762
1763         return msm_gem_address_space_create(mmu,
1764                 "gpu", 0x100000000ULL,
1765                 adreno_private_address_space_size(gpu));
1766 }
1767
1768 static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1769 {
1770         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1771         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1772
1773         if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami)
1774                 return a6xx_gpu->shadow[ring->id];
1775
1776         return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
1777 }
1778
1779 static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1780 {
1781         struct msm_cp_state cp_state = {
1782                 .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
1783                 .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
1784                 .ib1_rem  = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
1785                 .ib2_rem  = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
1786         };
1787         bool progress;
1788
1789         /*
1790          * Adjust the remaining data to account for what has already been
1791          * fetched from memory, but not yet consumed by the SQE.
1792          *
1793          * This is not *technically* correct, the amount buffered could
1794          * exceed the IB size due to hw prefetching ahead, but:
1795          *
1796          * (1) We aren't trying to find the exact position, just whether
1797          *     progress has been made
1798          * (2) The CP_REG_TO_MEM at the end of a submit should be enough
1799          *     to prevent prefetching into an unrelated submit.  (And
1800          *     either way, at some point the ROQ will be full.)
1801          */
1802         cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB1) >> 16;
1803         cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB2) >> 16;
1804
1805         progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));
1806
1807         ring->last_cp_state = cp_state;
1808
1809         return progress;
1810 }
1811
1812 static u32 a618_get_speed_bin(u32 fuse)
1813 {
1814         if (fuse == 0)
1815                 return 0;
1816         else if (fuse == 169)
1817                 return 1;
1818         else if (fuse == 174)
1819                 return 2;
1820
1821         return UINT_MAX;
1822 }
1823
1824 static u32 a619_get_speed_bin(u32 fuse)
1825 {
1826         if (fuse == 0)
1827                 return 0;
1828         else if (fuse == 120)
1829                 return 4;
1830         else if (fuse == 138)
1831                 return 3;
1832         else if (fuse == 169)
1833                 return 2;
1834         else if (fuse == 180)
1835                 return 1;
1836
1837         return UINT_MAX;
1838 }
1839
1840 static u32 a640_get_speed_bin(u32 fuse)
1841 {
1842         if (fuse == 0)
1843                 return 0;
1844         else if (fuse == 1)
1845                 return 1;
1846
1847         return UINT_MAX;
1848 }
1849
1850 static u32 a650_get_speed_bin(u32 fuse)
1851 {
1852         if (fuse == 0)
1853                 return 0;
1854         else if (fuse == 1)
1855                 return 1;
1856         /* Yep, 2 and 3 are swapped! :/ */
1857         else if (fuse == 2)
1858                 return 3;
1859         else if (fuse == 3)
1860                 return 2;
1861
1862         return UINT_MAX;
1863 }
1864
1865 static u32 adreno_7c3_get_speed_bin(u32 fuse)
1866 {
1867         if (fuse == 0)
1868                 return 0;
1869         else if (fuse == 117)
1870                 return 0;
1871         else if (fuse == 190)
1872                 return 1;
1873
1874         return UINT_MAX;
1875 }
1876
1877 static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse)
1878 {
1879         u32 val = UINT_MAX;
1880
1881         if (adreno_cmp_rev(ADRENO_REV(6, 1, 8, ANY_ID), rev))
1882                 val = a618_get_speed_bin(fuse);
1883
1884         if (adreno_cmp_rev(ADRENO_REV(6, 1, 9, ANY_ID), rev))
1885                 val = a619_get_speed_bin(fuse);
1886
1887         if (adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), rev))
1888                 val = adreno_7c3_get_speed_bin(fuse);
1889
1890         if (adreno_cmp_rev(ADRENO_REV(6, 4, 0, ANY_ID), rev))
1891                 val = a640_get_speed_bin(fuse);
1892
1893         if (adreno_cmp_rev(ADRENO_REV(6, 5, 0, ANY_ID), rev))
1894                 val = a650_get_speed_bin(fuse);
1895
1896         if (val == UINT_MAX) {
1897                 DRM_DEV_ERROR(dev,
1898                         "missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n",
1899                         fuse);
1900                 return UINT_MAX;
1901         }
1902
1903         return (1 << val);
1904 }
1905
1906 static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev)
1907 {
1908         u32 supp_hw;
1909         u32 speedbin;
1910         int ret;
1911
1912         ret = adreno_read_speedbin(dev, &speedbin);
1913         /*
1914          * -ENOENT means that the platform doesn't support speedbin which is
1915          * fine
1916          */
1917         if (ret == -ENOENT) {
1918                 return 0;
1919         } else if (ret) {
1920                 dev_err_probe(dev, ret,
1921                               "failed to read speed-bin. Some OPPs may not be supported by hardware\n");
1922                 return ret;
1923         }
1924
1925         supp_hw = fuse_to_supp_hw(dev, rev, speedbin);
1926
1927         ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1);
1928         if (ret)
1929                 return ret;
1930
1931         return 0;
1932 }
1933
1934 static const struct adreno_gpu_funcs funcs = {
1935         .base = {
1936                 .get_param = adreno_get_param,
1937                 .set_param = adreno_set_param,
1938                 .hw_init = a6xx_hw_init,
1939                 .ucode_load = a6xx_ucode_load,
1940                 .pm_suspend = a6xx_pm_suspend,
1941                 .pm_resume = a6xx_pm_resume,
1942                 .recover = a6xx_recover,
1943                 .submit = a6xx_submit,
1944                 .active_ring = a6xx_active_ring,
1945                 .irq = a6xx_irq,
1946                 .destroy = a6xx_destroy,
1947 #if defined(CONFIG_DRM_MSM_GPU_STATE)
1948                 .show = a6xx_show,
1949 #endif
1950                 .gpu_busy = a6xx_gpu_busy,
1951                 .gpu_get_freq = a6xx_gmu_get_freq,
1952                 .gpu_set_freq = a6xx_gpu_set_freq,
1953 #if defined(CONFIG_DRM_MSM_GPU_STATE)
1954                 .gpu_state_get = a6xx_gpu_state_get,
1955                 .gpu_state_put = a6xx_gpu_state_put,
1956 #endif
1957                 .create_address_space = a6xx_create_address_space,
1958                 .create_private_address_space = a6xx_create_private_address_space,
1959                 .get_rptr = a6xx_get_rptr,
1960                 .progress = a6xx_progress,
1961         },
1962         .get_timestamp = a6xx_get_timestamp,
1963 };
1964
1965 struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
1966 {
1967         struct msm_drm_private *priv = dev->dev_private;
1968         struct platform_device *pdev = priv->gpu_pdev;
1969         struct adreno_platform_config *config = pdev->dev.platform_data;
1970         const struct adreno_info *info;
1971         struct device_node *node;
1972         struct a6xx_gpu *a6xx_gpu;
1973         struct adreno_gpu *adreno_gpu;
1974         struct msm_gpu *gpu;
1975         int ret;
1976
1977         a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL);
1978         if (!a6xx_gpu)
1979                 return ERR_PTR(-ENOMEM);
1980
1981         adreno_gpu = &a6xx_gpu->base;
1982         gpu = &adreno_gpu->base;
1983
1984         adreno_gpu->registers = NULL;
1985
1986         /*
1987          * We need to know the platform type before calling into adreno_gpu_init
1988          * so that the hw_apriv flag can be correctly set. Snoop into the info
1989          * and grab the revision number
1990          */
1991         info = adreno_info(config->rev);
1992
1993         if (info && (info->revn == 650 || info->revn == 660 ||
1994                         adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), info->rev)))
1995                 adreno_gpu->base.hw_apriv = true;
1996
1997         a6xx_llc_slices_init(pdev, a6xx_gpu);
1998
1999         ret = a6xx_set_supported_hw(&pdev->dev, config->rev);
2000         if (ret) {
2001                 a6xx_destroy(&(a6xx_gpu->base.base));
2002                 return ERR_PTR(ret);
2003         }
2004
2005         ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
2006         if (ret) {
2007                 a6xx_destroy(&(a6xx_gpu->base.base));
2008                 return ERR_PTR(ret);
2009         }
2010
2011         /*
2012          * For now only clamp to idle freq for devices where this is known not
2013          * to cause power supply issues:
2014          */
2015         if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
2016                 priv->gpu_clamp_to_idle = true;
2017
2018         /* Check if there is a GMU phandle and set it up */
2019         node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
2020
2021         /* FIXME: How do we gracefully handle this? */
2022         BUG_ON(!node);
2023
2024         ret = a6xx_gmu_init(a6xx_gpu, node);
2025         of_node_put(node);
2026         if (ret) {
2027                 a6xx_destroy(&(a6xx_gpu->base.base));
2028                 return ERR_PTR(ret);
2029         }
2030
2031         if (gpu->aspace)
2032                 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu,
2033                                 a6xx_fault_handler);
2034
2035         return gpu;
2036 }
This page took 0.155322 seconds and 4 git commands to generate.